Changeset 0db0df2 in mainline


Ignore:
Timestamp:
2025-04-07T17:53:53Z (14 hours ago)
Author:
Jiří Zárevúcky <zarevucky.jiri@…>
Branches:
master
Children:
0c6fc7a, 708bfeb, 93de384, e92052d
Parents:
8f8818ac
git-author:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 16:41:57)
git-committer:
Jiří Zárevúcky <zarevucky.jiri@…> (2025-04-07 17:53:53)
Message:

Hash table improvements

Implement hash_table_foreach macro, analogous to list_foreach.

Remove superfluous argument to hash_table_find_next().
(If the user needs to recheck the part of the list already
checked by hash_table_find(), they can just rerun that function.)

Add hash argument to hash_table_ops_t::key_equal.
The big change here is that users with big keys can store the hash
value alongside key in their entries, and for the low low cost of
sizeof(size_t) bytes eliminate a bunch of expensive key comparisons.

Also added a hash function for strings and arbitrary data.
Found this one by asking ChatGPT, because the latency of accesses
to my book collection is currently a couple of hours.

+ Some drive-by unused #include removal.

Files:
28 edited

Legend:

Unmodified
Added
Removed
  • common/adt/hash_table.c

    r8f8818ac r0db0df2  
    247247        assert(h && h->bucket);
    248248
    249         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
     249        size_t hash = h->op->key_hash(key);
     250        size_t idx = hash % h->bucket_cnt;
    250251
    251252        list_foreach(h->bucket[idx], link, ht_link_t, cur_link) {
    252                 /*
    253                  * Is this is the item we are looking for? We could have first
    254                  * checked if the hashes match but op->key_equal() may very well be
    255                  * just as fast as op->hash().
    256                  */
    257                 if (h->op->key_equal(key, cur_link)) {
     253                if (h->op->key_equal(key, hash, cur_link))
    258254                        return cur_link;
    259                 }
    260255        }
    261256
     
    265260/** Find the next item equal to item. */
    266261ht_link_t *
    267 hash_table_find_next(const hash_table_t *h, ht_link_t *first, ht_link_t *item)
     262hash_table_find_next(const hash_table_t *h, ht_link_t *item)
    268263{
    269264        assert(item);
     
    271266
    272267        size_t idx = h->op->hash(item) % h->bucket_cnt;
    273 
    274         /* Traverse the circular list until we reach the starting item again. */
    275         for (link_t *cur = item->link.next; cur != &first->link;
    276             cur = cur->next) {
    277                 assert(cur);
    278 
    279                 if (cur == &h->bucket[idx].head)
    280                         continue;
    281 
     268        list_t *list = &h->bucket[idx];
     269        link_t *cur = list_next(&item->link, list);
     270
     271        /* Traverse the list until we reach its end. */
     272        for (; cur != NULL; cur = list_next(cur, list)) {
    282273                ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    283                 /*
    284                  * Is this is the item we are looking for? We could have first
    285                  * checked if the hashes match but op->equal() may very well be
    286                  * just as fast as op->hash().
    287                  */
    288                 if (h->op->equal(cur_link, item)) {
     274
     275                if (h->op->equal(cur_link, item))
    289276                        return cur_link;
    290                 }
    291277        }
    292278
     
    309295        assert(!h->apply_ongoing);
    310296
    311         size_t idx = h->op->key_hash(key) % h->bucket_cnt;
     297        size_t hash = h->op->key_hash(key);
     298        size_t idx = hash % h->bucket_cnt;
    312299
    313300        size_t removed = 0;
     
    316303                ht_link_t *cur_link = member_to_inst(cur, ht_link_t, link);
    317304
    318                 if (h->op->key_equal(key, cur_link)) {
     305                if (h->op->key_equal(key, hash, cur_link)) {
    319306                        ++removed;
    320307                        list_remove(cur);
  • common/include/adt/hash.h

    r8f8818ac r0db0df2  
    108108}
    109109
     110/** Hash a NUL-terminated string.
     111 * The algorithm may change in the future, so never use it for hashes
     112 * that will be stored to a file or sent over a network.
     113 */
     114static inline size_t hash_string(const char *str)
     115{
     116        /* djb2 hash + extra mixing at the end */
     117
     118        char c;
     119        size_t hash = 5381;
     120
     121        while ((c = *(str++)))
     122                hash = (hash << 5) + hash + c;
     123
     124        return hash_mix(hash);
     125}
     126
     127/** Hash an arbitrarily sized sequence of bytes.
     128 * The algorithm may change in the future, so never use it for hashes
     129 * that will be stored to a file or sent over a network.
     130 */
     131static inline size_t hash_bytes(const void *b, size_t len)
     132{
     133        /* djb2 hash + extra mixing at the end */
     134
     135        // TODO: work in bigger chunks for faster hashing
     136
     137        const char *str = b;
     138
     139        size_t hash = 5381;
     140
     141        for (size_t i = 0; i < len; i++)
     142                hash = (hash << 5) + hash + str[i];
     143
     144        return hash_mix(hash);
     145}
     146
    110147#endif
  • common/include/adt/hash_table.h

    r8f8818ac r0db0df2  
    6060
    6161        /** Returns true if the key is equal to the item's lookup key. */
    62         bool (*key_equal)(const void *key, const ht_link_t *item);
     62        bool (*key_equal)(const void *key, size_t hash, const ht_link_t *item);
    6363
    6464        /** Hash table item removal callback.
     
    8585        member_to_inst((item), type, member)
    8686
     87#define hash_table_foreach(ht, key, member, itype, iterator) \
     88        for (itype *iterator = NULL; \
     89            iterator == NULL; iterator = (itype *) INTPTR_MAX) \
     90                for (ht_link_t *__link = hash_table_find((ht), (key)); \
     91                    __link != NULL && ((iterator = member_to_inst(__link, itype, member))); \
     92                        __link = hash_table_find_next((ht), __link))
     93
    8794extern bool hash_table_create(hash_table_t *, size_t, size_t,
    8895    const hash_table_ops_t *);
     
    96103extern bool hash_table_insert_unique(hash_table_t *, ht_link_t *);
    97104extern ht_link_t *hash_table_find(const hash_table_t *, const void *);
    98 extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *,
    99     ht_link_t *);
     105extern ht_link_t *hash_table_find_next(const hash_table_t *, ht_link_t *);
    100106extern size_t hash_table_remove(hash_table_t *, const void *);
    101107extern void hash_table_remove_item(hash_table_t *, ht_link_t *);
  • kernel/genarch/src/mm/page_ht.c

    r8f8818ac r0db0df2  
    5555static size_t ht_hash(const ht_link_t *);
    5656static size_t ht_key_hash(const void *);
    57 static bool ht_key_equal(const void *, const ht_link_t *);
     57static bool ht_key_equal(const void *, size_t, const ht_link_t *);
    5858static void ht_remove_callback(ht_link_t *);
    5959
     
    119119
    120120/** Return true if the key is equal to the item's lookup key. */
    121 bool ht_key_equal(const void *arg, const ht_link_t *item)
     121bool ht_key_equal(const void *arg, size_t hash, const ht_link_t *item)
    122122{
    123123        const uintptr_t *key = arg;
  • kernel/generic/src/cap/cap.c

    r8f8818ac r0db0df2  
    118118}
    119119
    120 static bool caps_key_equal(const void *key, const ht_link_t *item)
     120static bool caps_key_equal(const void *key, size_t hash, const ht_link_t *item)
    121121{
    122122        const cap_handle_t *handle = key;
  • kernel/generic/src/ddi/irq.c

    r8f8818ac r0db0df2  
    7575static size_t irq_ht_key_hash(const void *);
    7676static bool irq_ht_equal(const ht_link_t *, const ht_link_t *);
    77 static bool irq_ht_key_equal(const void *, const ht_link_t *);
     77static bool irq_ht_key_equal(const void *, size_t, const ht_link_t *);
    7878
    7979static const hash_table_ops_t irq_ht_ops = {
     
    141141{
    142142        irq_spinlock_lock(l, false);
    143         ht_link_t *first = hash_table_find(h, &inr);
    144         for (ht_link_t *lnk = first; lnk;
    145             lnk = hash_table_find_next(h, first, lnk)) {
    146                 irq_t *irq = hash_table_get_inst(lnk, irq_t, link);
     143
     144        hash_table_foreach(h, &inr, link, irq_t, irq) {
    147145                irq_spinlock_lock(&irq->lock, false);
    148146                if (irq->claim(irq) == IRQ_ACCEPT) {
     
    153151                irq_spinlock_unlock(&irq->lock, false);
    154152        }
     153
    155154        irq_spinlock_unlock(l, false);
    156155
     
    223222
    224223/** Return true if the key is equal to the item's lookup key. */
    225 bool irq_ht_key_equal(const void *key, const ht_link_t *item)
     224bool irq_ht_key_equal(const void *key, size_t hash, const ht_link_t *item)
    226225{
    227226        const inr_t *inr = key;
  • kernel/generic/src/lib/ra.c

    r8f8818ac r0db0df2  
    7474
    7575/** Return true if the key is equal to the item's lookup key */
    76 static bool used_key_equal(const void *key, const ht_link_t *item)
     76static bool used_key_equal(const void *key, size_t, const ht_link_t *item)
    7777{
    7878        const uintptr_t *base = key;
  • uspace/app/hbench/env.c

    r8f8818ac r0db0df2  
    3434 */
    3535
     36#include <adt/hash.h>
    3637#include <stdlib.h>
    3738#include <stdio.h>
     
    4243        ht_link_t link;
    4344
     45        size_t hash;
    4446        char *key;
    4547        char *value;
     
    4951{
    5052        param_t *param = hash_table_get_inst(item, param_t, link);
    51         return str_size(param->key);
     53        return param->hash;
    5254}
    5355
    5456static size_t param_key_hash(const void *key)
    5557{
    56         const char *key_str = key;
    57         return str_size(key_str);
     58        return hash_string(key);
    5859}
    5960
    60 static bool param_key_equal(const void *key, const ht_link_t *item)
     61static bool param_key_equal(const void *key, size_t hash, const ht_link_t *item)
    6162{
    6263        param_t *param = hash_table_get_inst(item, param_t, link);
     64
     65        if (param->hash != hash)
     66                return false;
     67
    6368        const char *key_str = key;
    64 
    6569        return str_cmp(param->key, key_str) == 0;
    6670}
     
    7175        param_t *b = hash_table_get_inst(link_b, param_t, link);
    7276
    73         return str_cmp(a->key, b->key) == 0;
     77        return a->hash == b->hash && str_cmp(a->key, b->key) == 0;
    7478}
    7579
     
    116120        param->key = str_dup(key);
    117121        param->value = str_dup(value);
     122        param->hash = hash_string(key);
    118123
    119124        if ((param->key == NULL) || (param->value == NULL)) {
     
    132137const char *bench_env_param_get(bench_env_t *env, const char *key, const char *default_value)
    133138{
    134         ht_link_t *item = hash_table_find(&env->parameters, (char *) key);
     139        ht_link_t *item = hash_table_find(&env->parameters, key);
    135140
    136141        if (item == NULL) {
  • uspace/app/trace/ipcp.c

    r8f8818ac r0db0df2  
    8484}
    8585
    86 static bool pending_call_key_equal(const void *key, const ht_link_t *item)
     86static bool pending_call_key_equal(const void *key, size_t hash, const ht_link_t *item)
    8787{
    8888        const cap_call_handle_t *chandle = key;
  • uspace/app/trace/proto.c

    r8f8818ac r0db0df2  
    6969}
    7070
    71 static bool srv_proto_key_equal(const void *key, const ht_link_t *item)
     71static bool srv_proto_key_equal(const void *key, size_t hash, const ht_link_t *item)
    7272{
    7373        const int *n = key;
     
    9696}
    9797
    98 static bool method_oper_key_equal(const void *key, const ht_link_t *item)
     98static bool method_oper_key_equal(const void *key, size_t hash, const ht_link_t *item)
    9999{
    100100        const int *n = key;
  • uspace/lib/block/block.c

    r8f8818ac r0db0df2  
    254254}
    255255
    256 static bool cache_key_equal(const void *key, const ht_link_t *item)
     256static bool cache_key_equal(const void *key, size_t hash, const ht_link_t *item)
    257257{
    258258        const aoff64_t *lba = key;
  • uspace/lib/c/generic/async/ports.c

    r8f8818ac r0db0df2  
    5050#include <as.h>
    5151#include <abi/mm/as.h>
    52 #include "../private/libc.h"
    5352#include "../private/fibril.h"
    5453
     
    115114}
    116115
    117 static bool interface_key_equal(const void *key, const ht_link_t *item)
     116static bool interface_key_equal(const void *key, size_t hash, const ht_link_t *item)
    118117{
    119118        const iface_t *iface = key;
     
    143142}
    144143
    145 static bool port_key_equal(const void *key, const ht_link_t *item)
     144static bool port_key_equal(const void *key, size_t hash, const ht_link_t *item)
    146145{
    147146        const port_id_t *port_id = key;
  • uspace/lib/c/generic/async/server.c

    r8f8818ac r0db0df2  
    242242}
    243243
    244 static bool client_key_equal(const void *key, const ht_link_t *item)
     244static bool client_key_equal(const void *key, size_t, const ht_link_t *item)
    245245{
    246246        const task_id_t *in_task_id = key;
     
    502502}
    503503
    504 static bool notification_key_equal(const void *key, const ht_link_t *item)
     504static bool notification_key_equal(const void *key, size_t hash, const ht_link_t *item)
    505505{
    506506        const sysarg_t *id = key;
  • uspace/lib/ext4/src/ops.c

    r8f8818ac r0db0df2  
    113113}
    114114
    115 static bool open_nodes_key_equal(const void *key_arg, const ht_link_t *item)
     115static bool open_nodes_key_equal(const void *key_arg, size_t hash, const ht_link_t *item)
    116116{
    117117        const node_key_t *key = key_arg;
  • uspace/lib/nic/src/nic_addr_db.c

    r8f8818ac r0db0df2  
    5252typedef struct nic_addr_entry {
    5353        ht_link_t link;
     54        size_t hash;
    5455        uint8_t len;
    55         uint8_t addr[1];
     56        uint8_t addr[];
    5657} nic_addr_entry_t;
    5758
     
    6061 */
    6162typedef struct {
     63        size_t hash;
    6264        size_t len;
    6365        const uint8_t *addr;
    6466} addr_key_t;
    6567
    66 static bool nic_addr_key_equal(const void *key_arg, const ht_link_t *item)
     68static bool nic_addr_key_equal(const void *key_arg, size_t hash, const ht_link_t *item)
    6769{
    6870        const addr_key_t *key = key_arg;
    6971        nic_addr_entry_t *entry = member_to_inst(item, nic_addr_entry_t, link);
    70 
    71         return memcmp(entry->addr, key->addr, entry->len) == 0;
     72        return entry->hash == hash && memcmp(entry->addr, key->addr, entry->len) == 0;
    7273}
    7374
     
    8687{
    8788        const addr_key_t *key = k;
    88         return addr_hash(key->len, key->addr);
     89        return key->hash ? key->hash : addr_hash(key->len, key->addr);
    8990}
    9091
     
    9293{
    9394        nic_addr_entry_t *entry = member_to_inst(item, nic_addr_entry_t, link);
    94         return addr_hash(entry->len, entry->addr);
     95        return entry->hash;
    9596}
    9697
     
    9899{
    99100        nic_addr_entry_t *entry = member_to_inst(item, nic_addr_entry_t, link);
    100 
    101101        free(entry);
    102102}
     
    173173        addr_key_t key = {
    174174                .len = db->addr_len,
    175                 .addr = addr
     175                .addr = addr,
     176                .hash = addr_hash(db->addr_len, addr),
    176177        };
    177178
     
    179180                return EEXIST;
    180181
    181         nic_addr_entry_t *entry = malloc(sizeof(nic_addr_entry_t) + db->addr_len - 1);
     182        nic_addr_entry_t *entry = malloc(offsetof(nic_addr_entry_t, addr) + db->addr_len);
    182183        if (entry == NULL)
    183184                return ENOMEM;
     
    185186        entry->len = (uint8_t) db->addr_len;
    186187        memcpy(entry->addr, addr, db->addr_len);
     188        entry->hash = key.hash;
    187189
    188190        hash_table_insert(&db->set, &entry->link);
  • uspace/lib/nic/src/nic_wol_virtues.c

    r8f8818ac r0db0df2  
    5757}
    5858
    59 static bool nic_wv_key_equal(const void *key, const ht_link_t *item)
     59static bool nic_wv_key_equal(const void *key, size_t hash, const ht_link_t *item)
    6060{
    6161        const nic_wv_id_t *k = key;
  • uspace/srv/devman/devtree.c

    r8f8818ac r0db0df2  
    3232 */
    3333
    34 #include <errno.h>
    3534#include <io/log.h>
    3635
     
    6160}
    6261
    63 static bool devman_devices_key_equal(const void *key, const ht_link_t *item)
     62static bool devman_devices_key_equal(const void *key, size_t hash, const ht_link_t *item)
    6463{
    6564        const devman_handle_t *handle = key;
     
    6867}
    6968
    70 static bool devman_functions_key_equal(const void *key, const ht_link_t *item)
     69static bool devman_functions_key_equal(const void *key, size_t hash, const ht_link_t *item)
    7170{
    7271        const devman_handle_t *handle = key;
     
    8786}
    8887
    89 static bool loc_functions_key_equal(const void *key, const ht_link_t *item)
     88static bool loc_functions_key_equal(const void *key, size_t hash, const ht_link_t *item)
    9089{
    9190        const service_id_t *service_id = key;
  • uspace/srv/fs/cdfs/cdfs_ops.c

    r8f8818ac r0db0df2  
    298298}
    299299
    300 static bool nodes_key_equal(const void *k, const ht_link_t *item)
     300static bool nodes_key_equal(const void *k, size_t hash, const ht_link_t *item)
    301301{
    302302        cdfs_node_t *node = hash_table_get_inst(item, cdfs_node_t, nh_link);
  • uspace/srv/fs/exfat/exfat_idx.c

    r8f8818ac r0db0df2  
    3737
    3838#include "exfat.h"
    39 #include "../../vfs/vfs.h"
    4039#include <errno.h>
    4140#include <str.h>
     
    139138}
    140139
    141 static bool pos_key_equal(const void *key, const ht_link_t *item)
     140static bool pos_key_equal(const void *key, size_t hash, const ht_link_t *item)
    142141{
    143142        const pos_key_t *pos = key;
     
    180179}
    181180
    182 static bool idx_key_equal(const void *key_arg, const ht_link_t *item)
     181static bool idx_key_equal(const void *key_arg, size_t hash,
     182    const ht_link_t *item)
    183183{
    184184        exfat_idx_t *fidx = hash_table_get_inst(item, exfat_idx_t, uih_link);
  • uspace/srv/fs/fat/fat_idx.c

    r8f8818ac r0db0df2  
    3737
    3838#include "fat.h"
    39 #include "../../vfs/vfs.h"
    4039#include <errno.h>
    4140#include <str.h>
     
    139138}
    140139
    141 static bool pos_key_equal(const void *key, const ht_link_t *item)
     140static bool pos_key_equal(const void *key, size_t hash, const ht_link_t *item)
    142141{
    143142        const pos_key_t *pos = key;
     
    180179}
    181180
    182 static bool idx_key_equal(const void *key_arg, const ht_link_t *item)
     181static bool idx_key_equal(const void *key_arg, size_t hash,
     182    const ht_link_t *item)
    183183{
    184184        fat_idx_t *fidx = hash_table_get_inst(item, fat_idx_t, uih_link);
  • uspace/srv/fs/locfs/locfs_ops.c

    r8f8818ac r0db0df2  
    8383}
    8484
    85 static bool services_key_equal(const void *key, const ht_link_t *item)
     85static bool services_key_equal(const void *key, size_t hash,
     86    const ht_link_t *item)
    8687{
    8788        const service_id_t *k = key;
  • uspace/srv/fs/mfs/mfs_ops.c

    r8f8818ac r0db0df2  
    115115
    116116static bool
    117 open_nodes_key_equal(const void *key, const ht_link_t *item)
     117open_nodes_key_equal(const void *key, size_t hash, const ht_link_t *item)
    118118{
    119119        const node_key_t *node_key = key;
  • uspace/srv/fs/tmpfs/tmpfs_ops.c

    r8f8818ac r0db0df2  
    3838
    3939#include "tmpfs.h"
    40 #include "../../vfs/vfs.h"
    4140#include <macros.h>
    4241#include <stdint.h>
     
    159158}
    160159
    161 static bool nodes_key_equal(const void *key_arg, const ht_link_t *item)
     160static bool nodes_key_equal(const void *key_arg, size_t hash,
     161    const ht_link_t *item)
    162162{
    163163        tmpfs_node_t *node = hash_table_get_inst(item, tmpfs_node_t, nh_link);
  • uspace/srv/fs/udf/udf_idx.c

    r8f8818ac r0db0df2  
    3535 */
    3636
    37 #include "../../vfs/vfs.h"
    3837#include <errno.h>
    3938#include <str.h>
     
    6968}
    7069
    71 static bool udf_idx_key_equal(const void *k, const ht_link_t *item)
     70static bool udf_idx_key_equal(const void *k, size_t hash, const ht_link_t *item)
    7271{
    7372        const udf_ht_key_t *key = k;
  • uspace/srv/hid/input/gsp.c

    r8f8818ac r0db0df2  
    7676}
    7777
    78 static bool trans_key_equal(const void *key, const ht_link_t *item)
     78static bool trans_key_equal(const void *key, size_t hash, const ht_link_t *item)
    7979{
    8080        const trans_key_t *trans_key = key;
  • uspace/srv/ns/service.c

    r8f8818ac r0db0df2  
    7979}
    8080
    81 static bool service_key_equal(const void *key, const ht_link_t *item)
     81static bool service_key_equal(const void *key, size_t hash,
     82    const ht_link_t *item)
    8283{
    8384        const service_t *srv = key;
     
    102103}
    103104
    104 static bool iface_key_equal(const void *key, const ht_link_t *item)
     105static bool iface_key_equal(const void *key, size_t hash, const ht_link_t *item)
    105106{
    106107        const iface_t *kiface = key;
  • uspace/srv/ns/task.c

    r8f8818ac r0db0df2  
    6666}
    6767
    68 static bool task_key_equal(const void *key, const ht_link_t *item)
     68static bool task_key_equal(const void *key, size_t hash, const ht_link_t *item)
    6969{
    7070        const task_id_t *tid = key;
     
    111111}
    112112
    113 static bool p2i_key_equal(const void *key, const ht_link_t *item)
     113static bool p2i_key_equal(const void *key, size_t hash, const ht_link_t *item)
    114114{
    115115        const sysarg_t *label = key;
  • uspace/srv/vfs/vfs_node.c

    r8f8818ac r0db0df2  
    6262static size_t nodes_key_hash(const void *);
    6363static size_t nodes_hash(const ht_link_t *);
    64 static bool nodes_key_equal(const void *, const ht_link_t *);
     64static bool nodes_key_equal(const void *, size_t, const ht_link_t *);
    6565static vfs_triplet_t node_triplet(vfs_node_t *node);
    6666
     
    294294}
    295295
    296 static bool nodes_key_equal(const void *key, const ht_link_t *item)
     296static bool nodes_key_equal(const void *key, size_t hash, const ht_link_t *item)
    297297{
    298298        const vfs_triplet_t *tri = key;
Note: See TracChangeset for help on using the changeset viewer.