Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • kernel/generic/src/adt/hash_table.c

    r82cbf8c6 rfeeac0d  
    11/*
    2  * Copyright (c) 2008 Jakub Jermar
    3  * Copyright (c) 2012 Adam Hraska
    4  *
     2 * Copyright (c) 2006 Jakub Jermar
    53 * All rights reserved.
    64 *
     
    2927 */
    3028
    31 /** @addtogroup libc
     29/** @addtogroup genericadt
    3230 * @{
    3331 */
    34 /** @file
    35  */
    3632
    37 /*
    38  * This is an implementation of a generic resizable chained hash table.
    39  *
    40  * The table grows to 2*n+1 buckets each time, starting at n == 89,
    41  * per Thomas Wang's recommendation:
    42  * http://www.concentric.net/~Ttwang/tech/hashsize.htm
    43  *
    44  * This policy produces prime table sizes for the first five resizes
    45  * and generally produces table sizes which are either prime or
    46  * have fairly large (prime/odd) divisors. Having a prime table size
    47  * mitigates the use of suboptimal hash functions and distributes
    48  * items over the whole table.
     33/**
     34 * @file
     35 * @brief Implementation of generic chained hash table.
     36 *
     37 * This file contains implementation of generic chained hash table.
    4938 */
    5039
    5140#include <adt/hash_table.h>
    5241#include <adt/list.h>
     42#include <typedefs.h>
     43#include <debug.h>
    5344#include <mm/slab.h>
    54 #include <assert.h>
    55 #include <str.h>
    56 
    57 /* Optimal initial bucket count. See comment above. */
    58 #define HT_MIN_BUCKETS  89
    59 /* The table is resized when the average load per bucket exceeds this number. */
    60 #define HT_MAX_LOAD     2
    61 
    62 
    63 static size_t round_up_size(size_t);
    64 static bool alloc_table(size_t, list_t **);
    65 static void clear_items(hash_table_t *);
    66 static void resize(hash_table_t *, size_t);
    67 static void grow_if_needed(hash_table_t *);
    68 static void shrink_if_needed(hash_table_t *);
    69 
    70 /* Dummy do nothing callback to invoke in place of remove_callback == NULL. */
    71 static void nop_remove_callback(ht_link_t *item)
    72 {
    73         /* no-op */
    74 }
    75 
     45#include <memstr.h>
    7646
    7747/** Create chained hash table.
    7848 *
    79  * @param h        Hash table structure. Will be initialized by this call.
    80  * @param init_size Initial desired number of hash table buckets. Pass zero
    81  *                 if you want the default initial size.
    82  * @param max_load The table is resized when the average load per bucket
    83  *                 exceeds this number. Pass zero if you want the default.
    84  * @param op       Hash table operations structure. remove_callback()
    85  *                 is optional and can be NULL if no action is to be taken
    86  *                 upon removal. equal() is optional if and only if
    87  *                 hash_table_insert_unique() will never be invoked.
    88  *                 All other operations are mandatory.
    89  *
    90  * @return True on success
    91  *
     49 * @param h Hash table structure. Will be initialized by this call.
     50 * @param m Number of slots in the hash table.
     51 * @param max_keys Maximal number of keys needed to identify an item.
     52 * @param op Hash table operations structure.
    9253 */
    93 bool hash_table_create(hash_table_t *h, size_t init_size, size_t max_load,
    94     hash_table_ops_t *op)
     54void hash_table_create(hash_table_t *h, size_t m, size_t max_keys, hash_table_operations_t *op)
    9555{
    96         assert(h);
    97         assert(op && op->hash && op->key_hash && op->key_equal);
     56        size_t i;
     57
     58        ASSERT(h);
     59        ASSERT(op);
     60        ASSERT(op->hash);
     61        ASSERT(op->compare);
     62        ASSERT(max_keys > 0);
    9863       
    99         /* Check for compulsory ops. */
    100         if (!op || !op->hash || !op->key_hash || !op->key_equal)
    101                 return false;
     64        h->entry = (list_t *) malloc(m * sizeof(list_t), 0);
     65        if (!h->entry)
     66                panic("Cannot allocate memory for hash table.");
    10267       
    103         h->bucket_cnt = round_up_size(init_size);
     68        memsetb(h->entry, m * sizeof(list_t), 0);
    10469       
    105         if (!alloc_table(h->bucket_cnt, &h->bucket))
    106                 return false;
     70        for (i = 0; i < m; i++)
     71                list_initialize(&h->entry[i]);
    10772       
    108         h->max_load = (max_load == 0) ? HT_MAX_LOAD : max_load;
    109         h->item_cnt = 0;
     73        h->entries = m;
     74        h->max_keys = max_keys;
    11075        h->op = op;
    111         h->full_item_cnt = h->max_load * h->bucket_cnt;
    112         h->apply_ongoing = false;
    113 
    114         if (h->op->remove_callback == NULL) {
    115                 h->op->remove_callback = nop_remove_callback;
    116         }
    117        
    118         return true;
    11976}
    12077
    121 /** Destroy a hash table instance.
     78/** Insert item into hash table.
    12279 *
    123  * @param h Hash table to be destroyed.
    124  *
    125  */
    126 void hash_table_destroy(hash_table_t *h)
    127 {
    128         assert(h && h->bucket);
    129         assert(!h->apply_ongoing);
    130        
    131         clear_items(h);
    132        
    133         free(h->bucket);
    134 
    135         h->bucket = NULL;
    136         h->bucket_cnt = 0;
    137 }
    138 
    139 /** Returns true if there are no items in the table. */
    140 bool hash_table_empty(hash_table_t *h)
    141 {
    142         assert(h && h->bucket);
    143         return h->item_cnt == 0;
    144 }
    145 
    146 /** Returns the number of items in the table. */
    147 size_t hash_table_size(hash_table_t *h)
    148 {
    149         assert(h && h->bucket);
    150         return h->item_cnt;
    151 }
    152 
    153 /** Remove all elements from the hash table
    154  *
    155  * @param h Hash table to be cleared
    156  */
    157 void hash_table_clear(hash_table_t *h)
    158 {
    159         assert(h && h->bucket);
    160         assert(!h->apply_ongoing);
    161        
    162         clear_items(h);
    163        
    164         /* Shrink the table to its minimum size if possible. */
    165         if (HT_MIN_BUCKETS < h->bucket_cnt) {
    166                 resize(h, HT_MIN_BUCKETS);
    167         }
    168 }
    169 
    170 /** Unlinks and removes all items but does not resize. */
    171 static void clear_items(hash_table_t *h)
    172 {
    173         if (h->item_cnt == 0)
    174                 return;
    175        
    176         for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
    177                 list_foreach_safe(h->bucket[idx], cur, next) {
    178                         assert(cur);
    179                         ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    180                        
    181                         list_remove(cur);
    182                         h->op->remove_callback(cur_link);
    183                 }
    184         }
    185        
    186         h->item_cnt = 0;
    187 }
    188 
    189 /** Insert item into a hash table.
    190  *
    191  * @param h    Hash table.
     80 * @param h Hash table.
     81 * @param key Array of all keys necessary to compute hash index.
    19282 * @param item Item to be inserted into the hash table.
    19383 */
    194 void hash_table_insert(hash_table_t *h, ht_link_t *item)
     84void hash_table_insert(hash_table_t *h, sysarg_t key[], link_t *item)
    19585{
    196         assert(item);
    197         assert(h && h->bucket);
    198         assert(!h->apply_ongoing);
     86        size_t chain;
    19987       
    200         size_t idx = h->op->hash(item) % h->bucket_cnt;
     88        ASSERT(item);
     89        ASSERT(h);
     90        ASSERT(h->op);
     91        ASSERT(h->op->hash);
     92        ASSERT(h->op->compare);
    20193       
    202         list_append(&item->link, &h->bucket[idx]);
    203         ++h->item_cnt;
    204         grow_if_needed(h);
    205 }
    206 
    207 
    208 /** Insert item into a hash table if not already present.
    209  *
    210  * @param h    Hash table.
    211  * @param item Item to be inserted into the hash table.
    212  *
    213  * @return False if such an item had already been inserted.
    214  * @return True if the inserted item was the only item with such a lookup key.
    215  */
    216 bool hash_table_insert_unique(hash_table_t *h, ht_link_t *item)
    217 {
    218         assert(item);
    219         assert(h && h->bucket && h->bucket_cnt);
    220         assert(h->op && h->op->hash && h->op->equal);
    221         assert(!h->apply_ongoing);
     94        chain = h->op->hash(key);
     95        ASSERT(chain < h->entries);
    22296       
    223         size_t idx = h->op->hash(item) % h->bucket_cnt;
    224        
    225         /* Check for duplicates. */
    226         list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
    227                 /*
    228                  * We could filter out items using their hashes first, but
    229                  * calling equal() might very well be just as fast.
    230                  */
    231                 if (h->op->equal(cur_link, item))
    232                         return false;
    233         }
    234        
    235         list_append(&item->link, &h->bucket[idx]);
    236         ++h->item_cnt;
    237         grow_if_needed(h);
    238        
    239         return true;
     97        list_append(item, &h->entry[chain]);
    24098}
    24199
    242100/** Search hash table for an item matching keys.
    243101 *
    244  * @param h   Hash table.
     102 * @param h Hash table.
    245103 * @param key Array of all keys needed to compute hash index.
    246104 *
    247105 * @return Matching item on success, NULL if there is no such item.
    248  *
    249106 */
    250 ht_link_t *hash_table_find(const hash_table_t *h, void *key)
     107link_t *hash_table_find(hash_table_t *h, sysarg_t key[])
    251108{
    252         assert(h && h->bucket);
     109        size_t chain;
    253110       
    254         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
    255 
    256         list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
    257                 /*
    258                  * Is this is the item we are looking for? We could have first
    259                  * checked if the hashes match but op->key_equal() may very well be
    260                  * just as fast as op->hash().
    261                  */
    262                 if (h->op->key_equal(key, cur_link)) {
    263                         return cur_link;
     111        ASSERT(h);
     112        ASSERT(h->op);
     113        ASSERT(h->op->hash);
     114        ASSERT(h->op->compare);
     115       
     116        chain = h->op->hash(key);
     117        ASSERT(chain < h->entries);
     118       
     119        link_t *cur = list_first(&h->entry[chain]);
     120        while (cur != NULL) {
     121                if (h->op->compare(key, h->max_keys, cur)) {
     122                        /*
     123                         * The entry is there.
     124                         */
     125                        return cur;
    264126                }
     127                cur = list_next(cur, &h->entry[chain]);
    265128        }
    266129       
     
    268131}
    269132
    270 /** Find the next item equal to item. */
    271 ht_link_t *hash_table_find_next(const hash_table_t *h, ht_link_t *item)
    272 {
    273         assert(item);
    274         assert(h && h->bucket);
    275 
    276         /* Traverse the circular list until we reach the starting item again. */
    277         for (link_t *cur = item->link.next; cur != &item->link; cur = cur->next) {
    278                 assert(cur);
    279                 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    280                 /*
    281                  * Is this is the item we are looking for? We could have first
    282                  * checked if the hashes match but op->equal() may very well be
    283                  * just as fast as op->hash().
    284                  */
    285                 if (h->op->equal(cur_link, item)) {
    286                         return cur_link;
    287                 }
    288         }
    289 
    290         return NULL;
    291 }
    292 
    293133/** Remove all matching items from hash table.
    294134 *
    295  * For each removed item, h->remove_callback() is called.
     135 * For each removed item, h->remove_callback() is called (if not NULL).
    296136 *
    297  * @param h    Hash table.
    298  * @param key  Array of keys that will be compared against items of
    299  *             the hash table.
    300  *
    301  * @return Returns the number of removed items.
     137 * @param h Hash table.
     138 * @param key Array of keys that will be compared against items of the hash table.
     139 * @param keys Number of keys in the key array.
    302140 */
    303 size_t hash_table_remove(hash_table_t *h, void *key)
     141void hash_table_remove(hash_table_t *h, sysarg_t key[], size_t keys)
    304142{
    305         assert(h && h->bucket);
    306         assert(!h->apply_ongoing);
     143        size_t chain;
    307144       
    308         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
    309 
    310         size_t removed = 0;
     145        ASSERT(h);
     146        ASSERT(h->op);
     147        ASSERT(h->op->hash);
     148        ASSERT(h->op->compare);
     149        ASSERT(keys <= h->max_keys);
    311150       
    312         list_foreach_safe(h->bucket[idx], cur, next) {
    313                 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
     151       
     152        if (keys == h->max_keys) {
     153                link_t *cur;
    314154               
    315                 if (h->op->key_equal(key, cur_link)) {
    316                         ++removed;
     155                /*
     156                 * All keys are known, hash_table_find() can be used to find the entry.
     157                 */
     158       
     159                cur = hash_table_find(h, key);
     160                if (cur) {
    317161                        list_remove(cur);
    318                         h->op->remove_callback(cur_link);
     162                        if (h->op->remove_callback)
     163                                h->op->remove_callback(cur);
    319164                }
    320         }
    321 
    322         h->item_cnt -= removed;
    323         shrink_if_needed(h);
    324        
    325         return removed;
    326 }
    327 
    328 /** Removes an item already present in the table. The item must be in the table.*/
    329 void hash_table_remove_item(hash_table_t *h, ht_link_t *item)
    330 {
    331         assert(item);
    332         assert(h && h->bucket);
    333         assert(link_in_use(&item->link));
    334 
    335         list_remove(&item->link);
    336         --h->item_cnt;
    337         h->op->remove_callback(item);
    338         shrink_if_needed(h);
    339 }
    340 
    341 /** Apply function to all items in hash table.
    342  *
    343  * @param h   Hash table.
    344  * @param f   Function to be applied. Return false if no more items
    345  *            should be visited. The functor may only delete the supplied
    346  *            item. It must not delete the successor of the item passed
    347  *            in the first argument.
    348  * @param arg Argument to be passed to the function.
    349  */
    350 void hash_table_apply(hash_table_t *h, bool (*f)(ht_link_t *, void *), void *arg)
    351 {       
    352         assert(f);
    353         assert(h && h->bucket);
    354        
    355         if (h->item_cnt == 0)
    356165                return;
    357        
    358         h->apply_ongoing = true;
    359        
    360         for (size_t idx = 0; idx < h->bucket_cnt; ++idx) {
    361                 list_foreach_safe(h->bucket[idx], cur, next) {
    362                         ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    363                         /*
    364                          * The next pointer had already been saved. f() may safely
    365                          * delete cur (but not next!).
    366                          */
    367                         if (!f(cur_link, arg))
    368                                 goto out;
    369                 }
    370         }
    371 out:
    372         h->apply_ongoing = false;
    373        
    374         shrink_if_needed(h);
    375         grow_if_needed(h);
    376 }
    377 
    378 /** Rounds up size to the nearest suitable table size. */
    379 static size_t round_up_size(size_t size)
    380 {
    381         size_t rounded_size = HT_MIN_BUCKETS;
    382        
    383         while (rounded_size < size) {
    384                 rounded_size = 2 * rounded_size + 1;
    385166        }
    386167       
    387         return rounded_size;
    388 }
    389 
    390 /** Allocates and initializes the desired number of buckets. True if successful.*/
    391 static bool alloc_table(size_t bucket_cnt, list_t **pbuckets)
    392 {
    393         assert(pbuckets && HT_MIN_BUCKETS <= bucket_cnt);
    394                
    395         list_t *buckets = malloc(bucket_cnt * sizeof(list_t), FRAME_ATOMIC);
    396         if (!buckets)
    397                 return false;
    398        
    399         for (size_t i = 0; i < bucket_cnt; i++)
    400                 list_initialize(&buckets[i]);
    401 
    402         *pbuckets = buckets;
    403         return true;
    404 }
    405 
    406 
    407 /** Shrinks the table if the table is only sparely populated. */
    408 static inline void shrink_if_needed(hash_table_t *h)
    409 {
    410         if (h->item_cnt <= h->full_item_cnt / 4 && HT_MIN_BUCKETS < h->bucket_cnt) {
    411                 /*
    412                  * Keep the bucket_cnt odd (possibly also prime).
    413                  * Shrink from 2n + 1 to n. Integer division discards the +1.
    414                  */
    415                 size_t new_bucket_cnt = h->bucket_cnt / 2;
    416                 resize(h, new_bucket_cnt);
     168        /*
     169         * Fewer keys were passed.
     170         * Any partially matching entries are to be removed.
     171         */
     172        for (chain = 0; chain < h->entries; chain++) {
     173                link_t *cur;
     174                for (cur = h->entry[chain].head.next; cur != &h->entry[chain].head;
     175                    cur = cur->next) {
     176                        if (h->op->compare(key, keys, cur)) {
     177                                link_t *hlp;
     178                               
     179                                hlp = cur;
     180                                cur = cur->prev;
     181                               
     182                                list_remove(hlp);
     183                                if (h->op->remove_callback)
     184                                        h->op->remove_callback(hlp);
     185                               
     186                                continue;
     187                        }
     188                }
    417189        }
    418190}
    419191
    420 /** Grows the table if table load exceeds the maximum allowed. */
    421 static inline void grow_if_needed(hash_table_t *h)
    422 {
    423         /* Grow the table if the average bucket load exceeds the maximum. */
    424         if (h->full_item_cnt < h->item_cnt) {
    425                 /* Keep the bucket_cnt odd (possibly also prime). */
    426                 size_t new_bucket_cnt = 2 * h->bucket_cnt + 1;
    427                 resize(h, new_bucket_cnt);
    428         }
    429 }
    430 
    431 /** Allocates and rehashes items to a new table. Frees the old table. */
    432 static void resize(hash_table_t *h, size_t new_bucket_cnt)
    433 {
    434         assert(h && h->bucket);
    435         assert(HT_MIN_BUCKETS <= new_bucket_cnt);
    436        
    437         /* We are traversing the table and resizing would mess up the buckets. */
    438         if (h->apply_ongoing)
    439                 return;
    440        
    441         list_t *new_buckets;
    442 
    443         /* Leave the table as is if we cannot resize. */
    444         if (!alloc_table(new_bucket_cnt, &new_buckets))
    445                 return;
    446        
    447         if (0 < h->item_cnt) {
    448                 /* Rehash all the items to the new table. */
    449                 for (size_t old_idx = 0; old_idx < h->bucket_cnt; ++old_idx) {
    450                         list_foreach_safe(h->bucket[old_idx], cur, next) {
    451                                 ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    452 
    453                                 size_t new_idx = h->op->hash(cur_link) % new_bucket_cnt;
    454                                 list_remove(cur);
    455                                 list_append(cur, &new_buckets[new_idx]);
    456                         }
    457                 }
    458         }
    459        
    460         free(h->bucket);
    461         h->bucket = new_buckets;
    462         h->bucket_cnt = new_bucket_cnt;
    463         h->full_item_cnt = h->max_load * h->bucket_cnt;
    464 }
    465 
    466 
    467192/** @}
    468193 */
Note: See TracChangeset for help on using the changeset viewer.