Changeset 0ca7286 in mainline for uspace/lib/c/generic/adt/hash_table.c
- Timestamp:
- 2012-07-21T11:19:27Z (13 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
- Children:
- 2732c94
- Parents:
- 1c1da4b
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/generic/adt/hash_table.c
r1c1da4b r0ca7286 34 34 35 35 /* 36 * This is an implementation of generic chained hash table. 36 * This is an implementation of a generic resizable chained hash table. 37 * 38 * The table grows to 2*n+1 buckets each time, starting at n == 89, 39 * per Thomas Wang's recommendation: 40 * http://www.concentric.net/~Ttwang/tech/hashsize.htm 41 * 42 * This policy produces prime table sizes for the first five resizes 43 * and generally produces table sizes which are either prime or 44 * have fairly large (prime/odd) divisors. Having a prime table size 45 * mitigates the use of suboptimal hash functions and distributes 46 * items over the whole table. 37 47 */ 38 48 … … 44 54 #include <str.h> 45 55 56 /* Optimal initial bucket count. See comment above. */ 57 #define HT_MIN_BUCKETS 89 58 /* The table is resized when the average load per bucket exceeds this number. */ 59 #define HT_MAX_LOAD 2 60 61 62 static size_t round_up_size(size_t size); 63 static bool alloc_table(size_t bucket_cnt, list_t **pbuckets); 64 static void item_inserted(hash_table_t *h); 65 static void item_removed(hash_table_t *h); 66 static inline void remove_item(hash_table_t *h, link_t *item); 67 static size_t remove_duplicates(hash_table_t *h, unsigned long key[]); 68 static size_t remove_matching(hash_table_t *h, unsigned long key[], size_t key_cnt); 69 70 /* Dummy do nothing callback to invoke in place of remove_callback == NULL. */ 71 static void nop_remove_callback(link_t *item) 72 { 73 /* no-op */ 74 } 75 76 46 77 /** Create chained hash table. 47 78 * 48 79 * @param h Hash table structure. Will be initialized by this call. 49 * @param m Number of hash table buckets. 80 * @param init_size Initial desired number of hash table buckets. Pass zero 81 * if you want the default initial size. 50 82 * @param max_keys Maximal number of keys needed to identify an item. 51 * @param op Hash table operations structure. 83 * @param op Hash table operations structure. remove_callback() 84 * is optional and can be NULL if no action is to be taken 85 * upon removal. equal() is optional if and only if 86 * hash_table_insert_unique() will never be invoked. 87 * All other operations are mandatory. 52 88 * 53 89 * @return True on success 54 90 * 55 91 */ 56 bool hash_table_create(hash_table_t *h, hash_count_t m, hash_count_t max_keys,57 hash_table_op erations_t *op)92 bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_keys, 93 hash_table_ops_t *op) 58 94 { 59 95 assert(h); 60 assert(op && op->hash && op-> compare);96 assert(op && op->hash && op->key_hash && op->match); 61 97 assert(max_keys > 0); 62 98 63 h->entry = malloc(m * sizeof(list_t)); 64 if (!h->entry) 99 h->bucket_cnt = round_up_size(init_size); 100 101 if (!alloc_table(h->bucket_cnt, &h->bucket)) 65 102 return false; 66 103 67 memset((void *) h->entry, 0, m * sizeof(list_t));68 69 hash_count_t i;70 for (i = 0; i < m; i++)71 list_initialize(&h->entry[i]);72 73 h->entries = m;74 104 h->max_keys = max_keys; 105 h->items = 0; 75 106 h->op = op; 76 107 108 if (h->op->remove_callback == 0) 109 h->op->remove_callback = nop_remove_callback; 110 77 111 return true; 78 112 } … … 84 118 void hash_table_clear(hash_table_t *h) 85 119 { 86 for (hash_count_t chain = 0; chain < h->entries; ++chain) { 87 link_t *cur; 88 link_t *next; 89 90 for (cur = h->entry[chain].head.next; 91 cur != &h->entry[chain].head; 92 cur = next) { 93 next = cur->next; 120 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) { 121 list_foreach_safe(h->bucket[idx], cur, next) { 94 122 list_remove(cur); 95 123 h->op->remove_callback(cur); 96 124 } 97 125 } 126 127 h->items = 0; 128 129 /* Shrink the table to its minimum size if possible. */ 130 if (HT_MIN_BUCKETS < h->bucket_cnt) { 131 list_t *new_buckets; 132 if (alloc_table(HT_MIN_BUCKETS, &new_buckets)) { 133 free(h->bucket); 134 h->bucket = new_buckets; 135 h->bucket_cnt = HT_MIN_BUCKETS; 136 } 137 } 98 138 } 99 139 … … 106 146 { 107 147 assert(h); 108 assert(h->entry); 109 110 free(h->entry); 148 assert(h->bucket); 149 150 free(h->bucket); 151 152 h->bucket = 0; 153 h->bucket_cnt = 0; 111 154 } 112 155 … … 117 160 * @param item Item to be inserted into the hash table. 118 161 */ 119 void hash_table_insert(hash_table_t *h, unsigned long key[],link_t *item)162 void hash_table_insert(hash_table_t *h, link_t *item) 120 163 { 121 164 assert(item); 122 assert(h && h->op && h->op->hash && h->op->compare); 123 124 hash_index_t chain = h->op->hash(key); 125 assert(chain < h->entries); 126 127 list_append(item, &h->entry[chain]); 165 assert(h && h->bucket); 166 assert(h->op && h->op->hash); 167 168 size_t idx = h->op->hash(item) % h->bucket_cnt; 169 170 assert(idx < h->bucket_cnt); 171 172 list_append(item, &h->bucket[idx]); 173 item_inserted(h); 174 } 175 176 177 /** Insert item into a hash table if not already present. 178 * 179 * @param h Hash table. 180 * @param key Array of all keys necessary to compute hash index. 181 * @param item Item to be inserted into the hash table. 182 * 183 * @return False if such an item had already been inserted. 184 * @return True if the inserted item was the only item with such a lookup key. 185 */ 186 bool hash_table_insert_unique(hash_table_t *h, link_t *item) 187 { 188 assert(item); 189 assert(h && h->bucket && h->bucket_cnt); 190 assert(h->op && h->op->hash && h->op->equal); 191 192 size_t item_hash = h->op->hash(item); 193 size_t idx = item_hash % h->bucket_cnt; 194 195 assert(idx < h->bucket_cnt); 196 197 /* Check for duplicates. */ 198 list_foreach(h->bucket[idx], cur) { 199 /* 200 * We could filter out items using their hashes first, but 201 * calling equal() might very well be just as fast. 202 */ 203 if (h->op->equal(cur, item)) 204 return false; 205 } 206 207 list_append(item, &h->bucket[idx]); 208 item_inserted(h); 209 210 return true; 128 211 } 129 212 … … 138 221 link_t *hash_table_find(hash_table_t *h, unsigned long key[]) 139 222 { 140 assert(h && h->op && h->op->hash && h->op->compare); 141 142 hash_index_t chain = h->op->hash(key); 143 assert(chain < h->entries); 144 145 list_foreach(h->entry[chain], cur) { 146 if (h->op->compare(key, h->max_keys, cur)) { 147 /* 148 * The entry is there. 223 assert(h && h->bucket); 224 assert(h->op && h->op->key_hash && h->op->match); 225 226 size_t key_hash = h->op->key_hash(key); 227 size_t idx = key_hash % h->bucket_cnt; 228 229 assert(idx < h->bucket_cnt); 230 231 list_foreach(h->bucket[idx], cur) { 232 /* 233 * Is this is the item we are looking for? We could have first 234 * checked if the hashes match but op->match() may very well be 235 * just as fast as op->hash(). 236 */ 237 if (h->op->match(key, h->max_keys, cur)) { 238 return cur; 239 } 240 } 241 242 return NULL; 243 } 244 245 246 /** Apply function to all items in hash table. 247 * 248 * @param h Hash table. 249 * @param f Function to be applied. Return false if no more items 250 * should be visited. The functor must not delete the successor 251 * of the item passed in the first argument. 252 * @param arg Argument to be passed to the function. 253 * 254 */ 255 void hash_table_apply(hash_table_t *h, bool (*f)(link_t *, void *), void *arg) 256 { 257 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) { 258 list_foreach_safe(h->bucket[idx], cur, next) { 259 /* 260 * The next pointer had already been saved. f() may safely 261 * delete cur (but not next!). 149 262 */ 150 return cur; 151 } 152 } 153 154 return NULL; 263 if (!f(cur, arg)) 264 return; 265 } 266 } 155 267 } 156 268 … … 163 275 * the hash table. 164 276 * @param keys Number of keys in the 'key' array. 165 * 166 */ 167 void hash_table_remove(hash_table_t *h, unsigned long key[], hash_count_t keys) 168 { 169 assert(h && h->op && h->op->hash && h->op->compare && 277 * 278 * @return Returns the number of removed items. 279 */ 280 size_t hash_table_remove(hash_table_t *h, unsigned long key[], size_t key_cnt) 281 { 282 assert(h && h->bucket); 283 assert(h && h->op && h->op->hash && 170 284 h->op->remove_callback); 171 assert(keys <= h->max_keys); 172 173 if (keys == h->max_keys) { 285 assert(key_cnt <= h->max_keys); 286 287 /* All keys are known, remove from a specific bucket. */ 288 if (key_cnt == h->max_keys) { 289 return remove_duplicates(h, key); 290 } else { 174 291 /* 175 * All keys are known, hash_table_find() can be used to find the 176 * entry. 177 */ 178 179 link_t *cur = hash_table_find(h, key); 180 if (cur) { 181 list_remove(cur); 182 h->op->remove_callback(cur); 183 } 184 185 return; 186 } 187 292 * Fewer keys were passed. 293 * Any partially matching entries are to be removed. 294 */ 295 return remove_matching(h, key, key_cnt); 296 } 297 } 298 299 /** Removes an item already present in the table. The item must be in the table.*/ 300 void hash_table_remove_item(hash_table_t *h, link_t *item) 301 { 302 assert(item); 303 assert(h && h->bucket); 304 305 remove_item(h, item); 306 } 307 308 /** Unlink the item from a bucket, update statistics and resize if needed. */ 309 static inline void remove_item(hash_table_t *h, link_t *item) 310 { 311 assert(item); 312 313 list_remove(item); 314 item_removed(h); 315 h->op->remove_callback(item); 316 } 317 318 /** Removes all items matching key in the bucket key hashes to. */ 319 static size_t remove_duplicates(hash_table_t *h, unsigned long key[]) 320 { 321 assert(h && h->bucket); 322 assert(h->op && h->op->key_hash && h->op->match); 323 324 size_t key_hash = h->op->key_hash(key); 325 size_t idx = key_hash % h->bucket_cnt; 326 327 assert(idx < h->bucket_cnt); 328 329 size_t removed = 0; 330 331 list_foreach_safe(h->bucket[idx], cur, next) { 332 if (h->op->match(key, h->max_keys, cur)) { 333 ++removed; 334 remove_item(h, cur); 335 } 336 } 337 338 return removed; 339 } 340 341 /** Removes all items in any bucket in the table that match the partial key. */ 342 static size_t remove_matching(hash_table_t *h, unsigned long key[], 343 size_t key_cnt) 344 { 345 assert(h && h->bucket); 346 assert(key_cnt < h->max_keys); 347 348 size_t removed = 0; 188 349 /* 189 350 * Fewer keys were passed. 190 351 * Any partially matching entries are to be removed. 191 352 */ 192 hash_index_t chain; 193 for (chain = 0; chain < h->entries; chain++) { 194 for (link_t *cur = h->entry[chain].head.next; 195 cur != &h->entry[chain].head; 196 cur = cur->next) { 197 if (h->op->compare(key, keys, cur)) { 198 link_t *hlp; 199 200 hlp = cur; 201 cur = cur->prev; 202 203 list_remove(hlp); 204 h->op->remove_callback(hlp); 205 206 continue; 353 for (size_t idx = 0; idx < h->bucket_cnt; ++idx) { 354 list_foreach_safe(h->bucket[idx], cur, next) { 355 if (h->op->match(key, key_cnt, cur)) { 356 ++removed; 357 remove_item(h, cur); 207 358 } 208 359 } 209 360 } 210 } 211 212 /** Apply function to all items in hash table. 213 * 214 * @param h Hash table. 215 * @param f Function to be applied. 216 * @param arg Argument to be passed to the function. 217 * 218 */ 219 void hash_table_apply(hash_table_t *h, void (*f)(link_t *, void *), void *arg) 220 { 221 for (hash_index_t bucket = 0; bucket < h->entries; bucket++) { 222 link_t *cur; 223 link_t *next; 224 225 for (cur = h->entry[bucket].head.next; cur != &h->entry[bucket].head; 226 cur = next) { 227 /* 228 * The next pointer must be stored prior to the functor 229 * call to allow using destructor as the functor (the 230 * free function could overwrite the cur->next pointer). 231 */ 232 next = cur->next; 233 f(cur, arg); 234 } 235 } 236 } 361 362 return removed; 363 364 } 365 366 /** Rounds up size to the nearest suitable table size. */ 367 static size_t round_up_size(size_t size) 368 { 369 size_t rounded_size = HT_MIN_BUCKETS; 370 371 while (rounded_size < size) { 372 rounded_size = 2 * rounded_size + 1; 373 } 374 375 return rounded_size; 376 } 377 378 /** Allocates and initializes the desired number of buckets. True if successful.*/ 379 static bool alloc_table(size_t bucket_cnt, list_t **pbuckets) 380 { 381 assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt); 382 383 list_t *buckets = malloc(bucket_cnt * sizeof(list_t)); 384 if (!buckets) 385 return false; 386 387 for (size_t i = 0; i < bucket_cnt; i++) 388 list_initialize(&buckets[i]); 389 390 *pbuckets = buckets; 391 return true; 392 } 393 394 /** Allocates and rehashes items to a new table. Frees the old table. */ 395 static void resize(hash_table_t *h, size_t new_bucket_cnt) 396 { 397 assert(h && h->bucket); 398 399 list_t *new_buckets; 400 401 /* Leave the table as is if we cannot resize. */ 402 if (!alloc_table(new_bucket_cnt, &new_buckets)) 403 return; 404 405 /* Rehash all the items to the new table. */ 406 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) { 407 list_foreach_safe(h->bucket[old_idx], cur, next) { 408 size_t new_idx = h->op->hash(cur) % new_bucket_cnt; 409 list_remove(cur); 410 list_append(cur, &new_buckets[new_idx]); 411 } 412 } 413 414 free(h->bucket); 415 h->bucket = new_buckets; 416 h->bucket_cnt = new_bucket_cnt; 417 } 418 419 /** Shrinks the table if needed. */ 420 static void item_removed(hash_table_t *h) 421 { 422 --h->items; 423 424 if (HT_MIN_BUCKETS < h->items && h->items <= HT_MAX_LOAD * h->bucket_cnt / 4) { 425 /* 426 * Keep the bucket_cnt odd (possibly also prime). 427 * Shrink from 2n + 1 to n. Integer division discards the +1. 428 */ 429 size_t new_bucket_cnt = h->bucket_cnt / 2; 430 resize(h, new_bucket_cnt); 431 } 432 } 433 434 /** Grows the table if needed. */ 435 static void item_inserted(hash_table_t *h) 436 { 437 ++h->items; 438 439 /* Grow the table if the average bucket load exceeds the maximum. */ 440 if (HT_MAX_LOAD * h->bucket_cnt < h->items) { 441 /* Keep the bucket_cnt odd (possibly also prime). */ 442 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1; 443 resize(h, new_bucket_cnt); 444 } 445 } 446 237 447 238 448 /** @}
Note:
See TracChangeset
for help on using the changeset viewer.