173

One of the things which I miss while writing programs in C is a dictionary data structure. What's the most convenient way to implement one in C? I am not looking for performance, but ease of coding it from scratch. I don't want it to be generic either -- something like char*int will do. But I do want it to be able to store an arbitrary number of items.

This is intended more as an exercise. I know that there are 3rd party libraries available which one can use. But consider for a moment, that they don't exist. In such a situation what's the quickest way you can implement a dictionary satisfying the above requirements.

5
  • 4
    If you miss having it provided for you, then why do you want to make it from scratch, instead of using a third-party implementation? Dec 8, 2010 at 5:12
  • Yes, that alternative always exists. I posed this question more as an exercise.
    – Rohit
    Dec 8, 2010 at 5:16
  • 18
    Writing a hashtable in C is a fun exercise -- every serious C programmer should do it at least once.
    – Lee
    Dec 8, 2010 at 5:24
  • I think of a dictionary being a datatype rather than a datastructure, since it could be implemented lots of ways -- a list, a hashtable, a tree, a self-balancing tree, etc. Are you asking for a dictionary, or a hashtable? Apr 21, 2013 at 7:54
  • 1
    Related: How to represent a Python-like dictionary in C?[](stackoverflow.com/questions/3269881/…) Jun 23, 2018 at 6:05

12 Answers 12

159

Section 6.6 of The C Programming Language presents a simple dictionary (hashtable) data structure. I don't think a useful dictionary implementation could get any simpler than this. For your convenience, I reproduce the code here.

struct nlist { /* table entry: */
    struct nlist *next; /* next entry in chain */
    char *name; /* defined name */
    char *defn; /* replacement text */
};

#define HASHSIZE 101
static struct nlist *hashtab[HASHSIZE]; /* pointer table */

/* hash: form hash value for string s */
unsigned hash(char *s)
{
    unsigned hashval;
    for (hashval = 0; *s != '\0'; s++)
      hashval = *s + 31 * hashval;
    return hashval % HASHSIZE;
}

/* lookup: look for s in hashtab */
struct nlist *lookup(char *s)
{
    struct nlist *np;
    for (np = hashtab[hash(s)]; np != NULL; np = np->next)
        if (strcmp(s, np->name) == 0)
          return np; /* found */
    return NULL; /* not found */
}

char *strdup(char *);
/* install: put (name, defn) in hashtab */
struct nlist *install(char *name, char *defn)
{
    struct nlist *np;
    unsigned hashval;
    if ((np = lookup(name)) == NULL) { /* not found */
        np = (struct nlist *) malloc(sizeof(*np));
        if (np == NULL || (np->name = strdup(name)) == NULL)
          return NULL;
        hashval = hash(name);
        np->next = hashtab[hashval];
        hashtab[hashval] = np;
    } else /* already there */
        free((void *) np->defn); /*free previous defn */
    if ((np->defn = strdup(defn)) == NULL)
       return NULL;
    return np;
}

char *strdup(char *s) /* make a duplicate of s */
{
    char *p;
    p = (char *) malloc(strlen(s)+1); /* +1 for ’\0’ */
    if (p != NULL)
       strcpy(p, s);
    return p;
}

Note that if the hashes of two strings collide, it may lead to an O(n) lookup time. You can reduce the likelihood of collisions by increasing the value of HASHSIZE. For a complete discussion of the data structure, please consult the book.

9
  • 13
    why is here hashval = *s + 31 * hashval; exactly 31 and not anything else?
    – Incerteza
    Sep 25, 2014 at 8:50
  • 22
    31 is prime. Primes are often used in hash functions to reduce probability of collisions. It has something to do with integer factorization (i.e. you cannot factor a prime).
    – jnovacho
    Oct 4, 2014 at 11:08
  • 3
    @アレックス 31 performed well on test data. Choosing a prime has its benefits, but would not be necessary if the size of the hash table itself was prime.
    – G. Bach
    Apr 3, 2016 at 16:43
  • 5
    Note that the K&R C hashing algorithm is an appalling hash algorithm. See: programmers.stackexchange.com/questions/49550/… for details on how truly terrible is really is. Stop using it!!
    – carveone
    Aug 6, 2016 at 11:01
  • 2
    @Overdrivr: Not necessary in this instance. hashtab is of static duration. Uninitialized variables with static duration (that is, those declared outside of functions, and those declared with the storage class static), are guaranteed to start out as a zero of the right type (ie: 0 or NULL or 0.0)
    – carveone
    Aug 6, 2016 at 11:20
29

The quickest way would be to use an already-existing implementation, like uthash.

And, if you really want to code it yourself, the algorithms from uthash can be examined and re-used. It's BSD-licensed so, other than the requirement to convey the copyright notice, you're pretty well unlimited in what you can do with it.

0
14

For ease of implementation, it's hard to beat naively searching through an array. Aside from some error checking, this is a complete implementation (untested).

typedef struct dict_entry_s {
    const char *key;
    int value;
} dict_entry_s;

typedef struct dict_s {
    int len;
    int cap;
    dict_entry_s *entry;
} dict_s, *dict_t;

int dict_find_index(dict_t dict, const char *key) {
    for (int i = 0; i < dict->len; i++) {
        if (!strcmp(dict->entry[i], key)) {
            return i;
        }
    }
    return -1;
}

int dict_find(dict_t dict, const char *key, int def) {
    int idx = dict_find_index(dict, key);
    return idx == -1 ? def : dict->entry[idx].value;
}

void dict_add(dict_t dict, const char *key, int value) {
   int idx = dict_find_index(dict, key);
   if (idx != -1) {
       dict->entry[idx].value = value;
       return;
   }
   if (dict->len == dict->cap) {
       dict->cap *= 2;
       dict->entry = realloc(dict->entry, dict->cap * sizeof(dict_entry_s));
   }
   dict->entry[dict->len].key = strdup(key);
   dict->entry[dict->len].value = value;
   dict->len++;
}

dict_t dict_new(void) {
    dict_s proto = {0, 10, malloc(10 * sizeof(dict_entry_s))};
    dict_t d = malloc(sizeof(dict_s));
    *d = proto;
    return d;
}

void dict_free(dict_t dict) {
    for (int i = 0; i < dict->len; i++) {
        free(dict->entry[i].key);
    }
    free(dict->entry);
    free(dict);
}
2
  • 6
    "For ease of implementation": You're exactly right: this is the easiest. Plus it implements the OP's request "I do want it to be able to store an arbitrary number of items" - the highest voted answer doesn't do that (unless you believe that picking a compile time constant satisfies "arbitrary"...)
    – davidbak
    Jun 5, 2015 at 4:23
  • 2
    This may be a valid approach depending on the use-case, but the OP explicitly requested a dictionary, and this is definitely not a dictionary. Apr 14, 2019 at 21:20
5

I am surprised no one mentioned hsearch/hcreate set of libraries which although is not available on windows, but is mandated by POSIX, and therefore available in Linux / GNU systems.

The link has a simple and complete basic example that very well explains its usage.

It even has thread safe variant, is easy to use and very performant.

2
  • 3
    Worth noting that people here say it is kind of unusable, although I haven't tried it myself: stackoverflow.com/a/6118591/895245 Apr 26, 2018 at 10:15
  • 1
    Fair enough, however, i have tried the hcreate_r (for multiple hash tables) version in at least one app which ran for a reasonably long enough time to consider it real world. Agreed that its a GNU extension but then that's the case for many other libs too. Though i would still argue that you might still be able to use it for one large key value pair being operated in some real world app
    – fkl
    Apr 27, 2018 at 18:50
3

GLib and gnulib

These are your likely best bets if you don't have more specific requirements, since they are widely available, portable and likely efficient.

See also: Are there any open source C libraries with common data structures?

Zephyr project implementation

This well-known mostly-C embedded OS added one at some point:

2

Create a simple hash function and some linked lists of structures , depending on the hash , assign which linked list to insert the value in . Use the hash for retrieving it as well .

I did a simple implementation some time back :

...
#define K 16 // chaining coefficient

struct dict
{
    char *name; /* name of key */
    int val;   /*  value */
    struct dict *next; /* link field */
};

typedef struct dict dict;
dict *table[K];
int initialized = 0;


void  putval ( char *,int);

void init_dict()
{   
    initialized = 1;
    int i;  
    for(i=0;iname = (char *) malloc (strlen(key_name)+1);
    ptr->val = sval;
    strcpy (ptr->name,key_name);


    ptr->next = (struct dict *)table[hsh];
    table[hsh] = ptr;

}


int getval ( char *key_name )
{   
    int hsh = hash(key_name);   
    dict *ptr;
    for (ptr = table[hsh]; ptr != (dict *) 0;
        ptr = (dict *)ptr->next)
    if (strcmp (ptr->name,key_name) == 0)
        return ptr->val;
    return -1;
}
1
  • 2
    Aren't you missing half the code? where is "hash()" and "putval()"?
    – swdev
    Sep 20, 2015 at 22:12
1

here is a quick implement, i used it to get a 'Matrix'(sruct) from a string. you can have a bigger array and change its values on the run also:

typedef struct  { int** lines; int isDefined; }mat;
mat matA, matB, matC, matD, matE, matF;

/* an auxilary struct to be used in a dictionary */
typedef struct  { char* str; mat *matrix; }stringToMat;

/* creating a 'dictionary' for a mat name to its mat. lower case only! */
stringToMat matCases [] =
{
    { "mat_a", &matA },
    { "mat_b", &matB },
    { "mat_c", &matC },
    { "mat_d", &matD },
    { "mat_e", &matE },
    { "mat_f", &matF },
};

mat* getMat(char * str)
{
    stringToMat* pCase;
    mat * selected = NULL;
    if (str != NULL)
    {
        /* runing on the dictionary to get the mat selected */
        for(pCase = matCases; pCase != matCases + sizeof(matCases) / sizeof(matCases[0]); pCase++ )
        {
            if(!strcmp( pCase->str, str))
                selected = (pCase->matrix);
        }
        if (selected == NULL)
            printf("%s is not a valid matrix name\n", str);
    }
    else
        printf("expected matrix name, got NULL\n");
    return selected;
}
0

A hashtable is the traditional implementation of a simple "Dictionary". If you don't care about speed or size, just search for it. There are many freely available implementations.

Here's the first one I saw -- at a glance, it looks OK to me. (It's pretty basic. If you really want it to hold an unlimited amount of data, then you'll need to add some logic to "realloc" the table memory as it grows.)

-1

Hashing is the key. I think use lookup table and hashing key for this. You can find many hashing function online.

-2

redis is a good reference as redis is a key-value system and it's hash table is a good example

1
  • Your answer could be improved with additional supporting information. Please edit to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers in the help center.
    – Community Bot
    Sep 5, 2022 at 3:11
-3

The quickest method would be using binary tree. Its worst case is also only O(logn).

1
  • 17
    This is incorrect. Worst case lookup for a binary tree is O(n) (degenerate case due to bad insertion order, resulting in a link list, basically) when it is unbalanced. Apr 21, 2013 at 8:13
-3

Additionally, you can use Google CityHash:

#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>

#include <byteswap.h>

#include "city.h"

void swap(uint32* a, uint32* b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}

#define PERMUTE3(a, b, c) swap(&a, &b); swap(&a, &c);

// Magic numbers for 32-bit hashing.  Copied from Murmur3.
static const uint32 c1 = 0xcc9e2d51;
static const uint32 c2 = 0x1b873593;

static uint32 UNALIGNED_LOAD32(const char *p) {
  uint32 result;
  memcpy(&result, p, sizeof(result));
  return result;
}

static uint32 Fetch32(const char *p) {
  return UNALIGNED_LOAD32(p);
}

// A 32-bit to 32-bit integer hash copied from Murmur3.
static uint32 fmix(uint32 h)
{
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;
  return h;
}

static uint32 Rotate32(uint32 val, int shift) {
  // Avoid shifting by 32: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}

static uint32 Mur(uint32 a, uint32 h) {
  // Helper from Murmur3 for combining two 32-bit values.
  a *= c1;
  a = Rotate32(a, 17);
  a *= c2;
  h ^= a;
  h = Rotate32(h, 19);
  return h * 5 + 0xe6546b64;
}

static uint32 Hash32Len13to24(const char *s, size_t len) {
  uint32 a = Fetch32(s - 4 + (len >> 1));
  uint32 b = Fetch32(s + 4);
  uint32 c = Fetch32(s + len - 8);
  uint32 d = Fetch32(s + (len >> 1));
  uint32 e = Fetch32(s);
  uint32 f = Fetch32(s + len - 4);
  uint32 h = len;

  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}

static uint32 Hash32Len0to4(const char *s, size_t len) {
  uint32 b = 0;
  uint32 c = 9;
  for (size_t i = 0; i < len; i++) {
    signed char v = s[i];
    b = b * c1 + v;
    c ^= b;
  }
  return fmix(Mur(b, Mur(len, c)));
}

static uint32 Hash32Len5to12(const char *s, size_t len) {
  uint32 a = len, b = len * 5, c = 9, d = b;
  a += Fetch32(s);
  b += Fetch32(s + len - 4);
  c += Fetch32(s + ((len >> 1) & 4));
  return fmix(Mur(c, Mur(b, Mur(a, d))));
}

uint32 CityHash32(const char *s, size_t len) {
  if (len <= 24) {
    return len <= 12 ?
        (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
        Hash32Len13to24(s, len);
  }

  // len > 24
  uint32 h = len, g = c1 * len, f = g;
  uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
  uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
  uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
  uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
  uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
  h ^= a0;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  h ^= a2;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  g ^= a1;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  g ^= a3;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  f += a4;
  f = Rotate32(f, 19);
  f = f * 5 + 0xe6546b64;
  size_t iters = (len - 1) / 20;
  do {
    uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
    uint32 a1 = Fetch32(s + 4);
    uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
    uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
    uint32 a4 = Fetch32(s + 16);
    h ^= a0;
    h = Rotate32(h, 18);
    h = h * 5 + 0xe6546b64;
    f += a1;
    f = Rotate32(f, 19);
    f = f * c1;
    g += a2;
    g = Rotate32(g, 18);
    g = g * 5 + 0xe6546b64;
    h ^= a3 + a1;
    h = Rotate32(h, 19);
    h = h * 5 + 0xe6546b64;
    g ^= a4;
    g = bswap_32(g) * 5;
    h += a4 * 5;
    h = bswap_32(h);
    f += a0;
    PERMUTE3(f, h, g);
    s += 20;
  } while (--iters != 0);
  g = Rotate32(g, 11) * c1;
  g = Rotate32(g, 17) * c1;
  f = Rotate32(f, 11) * c1;
  f = Rotate32(f, 17) * c1;
  h = Rotate32(h + g, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  h = Rotate32(h + f, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  return h;
}

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Not the answer you're looking for? Browse other questions tagged or ask your own question.