librsync  2.3.0
hashtable.h
Go to the documentation of this file.
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * hashtable.h -- a generic open addressing hashtable.
4  *
5  * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20 #ifndef _HASHTABLE_H_
21 # define _HASHTABLE_H_
22 
23 # include <assert.h>
24 # include <stdlib.h>
25 
26 /** \file hashtable.h
27  * A generic open addressing hashtable.
28  *
29  * This is a minimal hashtable containing pointers to arbitrary entries with
30  * configurable hashtable size and support for custom hash() and cmp() methods.
31  * The cmp() method can either be a simple comparison between two keys, or can
32  * be against a special match object containing additional mutable state. This
33  * allows for things like deferred and cached evaluation of costly comparison
34  * data. The hash() function doesn't need to avoid clustering behaviour.
35  *
36  * It uses open addressing with quadratic probing for collisions. The
37  * MurmurHash3 finalization function is optionally used on the hash() output to
38  * avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is
39  * no support for removing entries, only adding them. Multiple entries with the
40  * same key can be added, and you can use a fancy cmp() function to find
41  * particular entries by more than just their key. There is an iterator for
42  * iterating through all entries in the hashtable. There are optional
43  * NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled
44  * by defining HASHTABLE_NSTATS.
45  *
46  * The types and methods of the hashtable and its contents are specified by
47  * using \#define parameters set to their basenames (the prefixes for the *_t
48  * type and *_func() methods) before doing \#include "hashtable.h". This
49  * produces static inline type-safe methods that are either application
50  * optimized for speed or wrappers around void* implementation methods for
51  * compactness.
52  *
53  * \param ENTRY - the entry type basename.
54  *
55  * \param KEY - optional key type basename (default: ENTRY).
56  *
57  * \param MATCH - optional match type basename (default: KEY).
58  *
59  * \param NAME - optional hashtable type basename (default: ENTRY_hashtable).
60  *
61  * Example: \code
62  * typedef ... mykey_t;
63  * int mykey_hash(const mykey_t *e);
64  * int mykey_cmp(mykey_t *e, const mykey_t *o);
65  *
66  * typedef struct myentry {
67  * mykey_t key; // Inherit from mykey_t.
68  * ...extra entry value data...
69  * } myentry_t;
70  * void myentry_init(myentry_t *e, ...);
71  *
72  * #define ENTRY myentry
73  * #define KEY mykey
74  * #include "hashtable.h"
75  *
76  * hashtable_t *t;
77  * myentry_t entries[300];
78  * mykey_t k;
79  * myentry_t *e;
80  *
81  * t = myentry_hashtable_new(300);
82  * myentry_init(&entries[5], ...);
83  * myentry_hashtable_add(t, &entries[5]);
84  * k = ...;
85  * e = myentry_hashtable_find(t, &k);
86  *
87  * int i;
88  * for (e = myentry_hashtable_iter(t, &i); e != NULL;
89  * e = myentry_hashtable_next(t, &i))
90  * ...
91  *
92  * myentry_hashtable_free(t);
93  * \endcode
94  *
95  * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to
96  * mykey/myentry instances the same as the pointers stored in the hashtable.
97  * However it is also possible for them to take "match objects" that are a
98  * "subclass" of the entry type that contain additional state for complicated
99  * comparision operations.
100  *
101  * Example: \code
102  * typedef struct mymatch {
103  * mykey_t key; // Inherit from mykey_t;
104  * ...extra match criteria and state data...
105  * } mymatch_t;
106  * int mymatch_cmp(mymatch_t *m, const myentry_t *e);
107  *
108  * #define ENTRY myentry
109  * #define KEY mykey
110  * #define MATCH mymatch
111  * #include "hashtable.h"
112  *
113  * ...
114  * mymatch_t m;
115  *
116  * t = myentry_hashtable_new(300);
117  * ...
118  * m = ...;
119  * e = myentry_hashtable_find(t, &m);
120  * \endcode
121  *
122  * The mymatch_cmp() function is only called for finding hashtable entries and
123  * can mutate the mymatch_t object for doing things like deferred and cached
124  * evaluation of expensive match data. It can also access the whole myentry_t
125  * object to match against more than just the key. */
126 
127 /** The hashtable type. */
128 typedef struct hashtable {
129  int size; /**< Size of allocated hashtable. */
130  int count; /**< Number of entries in hashtable. */
131 # ifndef HASHTABLE_NSTATS
132  /* The following are for accumulating NAME_find() stats. */
133  long find_count; /**< The count of finds tried. */
134  long match_count; /**< The count of matches found. */
135  long hashcmp_count; /**< The count of hash compares done. */
136  long entrycmp_count; /**< The count of entry compares done. */
137 # endif
138  void **etable; /**< Table of pointers to entries. */
139  unsigned ktable[]; /**< Table of hash keys. */
140 } hashtable_t;
141 
142 /* void* implementations for the type-safe static inline wrappers below. */
143 hashtable_t *_hashtable_new(int size);
144 void _hashtable_free(hashtable_t *t);
145 
146 /** MurmurHash3 finalization mix function. */
147 static inline unsigned mix32(unsigned int h)
148 {
149  h ^= h >> 16;
150  h *= 0x85ebca6b;
151  h ^= h >> 13;
152  h *= 0xc2b2ae35;
153  h ^= h >> 16;
154  return h;
155 }
156 
157 #endif /* _HASHTABLE_H_ */
158 
159 /* If ENTRY is defined, define type-dependent static inline methods. */
160 #ifdef ENTRY
161 
162 # define _JOIN2(x, y) x##y
163 # define _JOIN(x, y) _JOIN2(x, y)
164 
165 # ifndef KEY
166 # define KEY ENTRY
167 # endif
168 
169 # ifndef MATCH
170 # define MATCH KEY
171 # endif
172 
173 # ifndef NAME
174 # define NAME _JOIN(ENTRY, _hashtable)
175 # endif
176 
177 # define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */
178 # define KEY_t _JOIN(KEY, _t) /**< The key type. */
179 # define MATCH_t _JOIN(MATCH, _t) /**< The match type. */
180 # define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */
181 # define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */
182 /* The names for all the hashtable methods. */
183 # define NAME_new _JOIN(NAME, _new)
184 # define NAME_free _JOIN(NAME, _free)
185 # define NAME_stats_init _JOIN(NAME, _stats_init)
186 # define NAME_add _JOIN(NAME, _add)
187 # define NAME_find _JOIN(NAME, _find)
188 # define NAME_iter _JOIN(NAME, _iter)
189 # define NAME_next _JOIN(NAME, _next)
190 
191 /* Modified hash() with/without mix32(). */
192 # ifdef HASHTABLE_NMIX32
193 # define _KEY_HASH(k) KEY_hash((KEY_t *)k)
194 # else
195 # define _KEY_HASH(k) mix32(KEY_hash((KEY_t *)k))
196 # endif
197 
198 /* Loop macro for probing table t for key k, setting hk to the hash for k
199  reserving zero for empty buckets, and iterating with index i and entry hash
200  h, terminating at an empty bucket. */
201 # define _for_probe(t, k, hk, i, h) \
202  const unsigned mask = t->size - 1;\
203  unsigned hk = _KEY_HASH(k), i, s, h;\
204  if (hk == 0) hk = -1;\
205  for (i = hk & mask, s = 0; (h = t->ktable[i]); i = (i + ++s) & mask)
206 
207 /* Conditional macro for incrementing stats counters. */
208 # ifndef HASHTABLE_NSTATS
209 # define _stats_inc(c) (c++)
210 # else
211 # define _stats_inc(c)
212 # endif
213 
214 /** Allocate and initialize a hashtable instance.
215  *
216  * The provided size is used as an indication of the number of entries you wish
217  * to add, but the allocated size will probably be larger depending on the
218  * implementation to enable optimisations or avoid degraded performance. It may
219  * be possible to fill the table beyond the requested size, but performance can
220  * start to degrade badly if it is over filled.
221  *
222  * \param size - The desired minimum size of the hash table.
223  *
224  * \return The initialized hashtable instance or NULL if it failed. */
225 static inline hashtable_t *NAME_new(int size)
226 {
227  return _hashtable_new(size);
228 }
229 
230 /** Destroy and free a hashtable instance.
231  *
232  * This will free the hashtable, but will not free the entries in the
233  * hashtable. If you want to free the entries too, use a hashtable iterator to
234  * free the the entries first.
235  *
236  * \param *t - The hashtable to destroy and free. */
237 static inline void NAME_free(hashtable_t *t)
238 {
239  _hashtable_free(t);
240 }
241 
242 /** Initialize hashtable stats counters.
243  *
244  * This will reset all the stats counters for the hashtable,
245  *
246  * \param *t - The hashtable to initializ stats for. */
247 static inline void NAME_stats_init(hashtable_t *t)
248 {
249 # ifndef HASHTABLE_NSTATS
250  t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0;
251 # endif
252 }
253 
254 /** Add an entry to a hashtable.
255  *
256  * This doesn't use MATCH_cmp() or do any checks for existing copies or
257  * instances, so it will add duplicates. If you want to avoid adding
258  * duplicates, use NAME_find() to check for existing entries first.
259  *
260  * \param *t - The hashtable to add to.
261  *
262  * \param *e - The entry object to add.
263  *
264  * \return The added entry, or NULL if the table is full. */
265 static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e)
266 {
267  assert(e != NULL);
268  if (t->count + 1 == t->size)
269  return NULL;
270  _for_probe(t, e, he, i, h);
271  t->count++;
272  t->ktable[i] = he;
273  return t->etable[i] = e;
274 }
275 
276 /** Find an entry in a hashtable.
277  *
278  * Uses MATCH_cmp() to find the first matching entry in the table in the same
279  * hash() bucket.
280  *
281  * \param *t - The hashtable to search.
282  *
283  * \param *m - The key or match object to search for.
284  *
285  * \return The first found entry, or NULL if nothing was found. */
286 static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m)
287 {
288  assert(m != NULL);
289  ENTRY_t *e;
290 
291  _stats_inc(t->find_count);
292  _for_probe(t, m, hm, i, he) {
293  _stats_inc(t->hashcmp_count);
294  if (hm == he) {
295  _stats_inc(t->entrycmp_count);
296  if (!MATCH_cmp(m, e = t->etable[i])) {
297  _stats_inc(t->match_count);
298  return e;
299  }
300  }
301  }
302  return NULL;
303 }
304 
305 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i);
306 
307 /** Initialize a iteration and return the first entry.
308  *
309  * This works together with NAME_next() for iterating through all entries in a
310  * hashtable.
311  *
312  * Example: \code
313  * for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i))
314  * ...
315  * \endcode
316  *
317  * \param *t - the hashtable to iterate over.
318  *
319  * \param *i - the int iterator index to initialize.
320  *
321  * \return The first entry or NULL if the hashtable is empty. */
322 static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i)
323 {
324  assert(t != NULL);
325  assert(i != NULL);
326  *i = 0;
327  return NAME_next(t, i);
328 }
329 
330 /** Get the next entry from a hashtable iterator or NULL when finished.
331  *
332  * This works together with NAME_iter() for iterating through all entries in a
333  * hashtable.
334  *
335  * \param *t - the hashtable to iterate over.
336  *
337  * \param *i - the int iterator index to use.
338  *
339  * \return The next entry or NULL if the iterator is finished. */
340 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i)
341 {
342  assert(t != NULL);
343  assert(i != NULL);
344  ENTRY_t *e = NULL;
345 
346  while ((*i < t->size) && !(e = t->etable[(*i)++])) ;
347  return e;
348 }
349 
350 # undef ENTRY
351 # undef KEY
352 # undef MATCH
353 # undef NAME
354 # undef ENTRY_t
355 # undef KEY_t
356 # undef MATCH_t
357 # undef KEY_hash
358 # undef MATCH_cmp
359 # undef NAME_new
360 # undef NAME_free
361 # undef NAME_stats_init
362 # undef NAME_add
363 # undef NAME_find
364 # undef NAME_iter
365 # undef NAME_next
366 # undef _KEY_HASH
367 #endif /* ENTRY */
mix32
static unsigned mix32(unsigned int h)
MurmurHash3 finalization mix function.
Definition: hashtable.h:147
NAME_free
static void NAME_free(hashtable_t *t)
Destroy and free a hashtable instance.
Definition: hashtable.h:237
hashtable::ktable
unsigned ktable[]
Table of hash keys.
Definition: hashtable.h:139
MATCH_t
#define MATCH_t
The match type.
Definition: hashtable.h:179
NAME_add
static ENTRY_t * NAME_add(hashtable_t *t, ENTRY_t *e)
Add an entry to a hashtable.
Definition: hashtable.h:265
MATCH_cmp
#define MATCH_cmp
The match cmp(m, e) method.
Definition: hashtable.h:181
hashtable::etable
void ** etable
Table of pointers to entries.
Definition: hashtable.h:138
hashtable::find_count
long find_count
The count of finds tried.
Definition: hashtable.h:133
NAME_new
static hashtable_t * NAME_new(int size)
Allocate and initialize a hashtable instance.
Definition: hashtable.h:225
NAME_iter
static ENTRY_t * NAME_iter(hashtable_t *t, int *i)
Initialize a iteration and return the first entry.
Definition: hashtable.h:322
hashtable::count
int count
Number of entries in hashtable.
Definition: hashtable.h:130
hashtable::hashcmp_count
long hashcmp_count
The count of hash compares done.
Definition: hashtable.h:135
NAME_stats_init
static void NAME_stats_init(hashtable_t *t)
Initialize hashtable stats counters.
Definition: hashtable.h:247
hashtable_t
struct hashtable hashtable_t
The hashtable type.
NAME_find
static ENTRY_t * NAME_find(hashtable_t *t, MATCH_t *m)
Find an entry in a hashtable.
Definition: hashtable.h:286
hashtable::match_count
long match_count
The count of matches found.
Definition: hashtable.h:134
ENTRY_t
#define ENTRY_t
The entry type.
Definition: hashtable.h:177
hashtable::size
int size
Size of allocated hashtable.
Definition: hashtable.h:129
hashtable
The hashtable type.
Definition: hashtable.h:128
NAME_next
static ENTRY_t * NAME_next(hashtable_t *t, int *i)
Get the next entry from a hashtable iterator or NULL when finished.
Definition: hashtable.h:340
hashtable::entrycmp_count
long entrycmp_count
The count of entry compares done.
Definition: hashtable.h:136