toys

toys.git
git clone git://git.lenczewski.org/toys.git
Log | Files | Refs | README | LICENSE

trie.h (6787B)


      1 #ifndef TRIE_H
      2 #define TRIE_H
      3 
      4 #include <assert.h>
      5 #include <stddef.h>
      6 
      7 #include "arena.h"
      8 #include "list.h"
      9 
     10 /* a trie is a data structure that holds a trie of string prefixes. we can walk
     11  * this trie to search for a particular string. each node optionally contains a
     12  * string prefix, and can have zero or more child nodes.
     13  */
     14 struct trie_node {
     15 	struct trie_node *parent;
     16 	struct list_node children;
     17 
     18 	struct {
     19 		char const *begin, *end;
     20 	} prefix;
     21 
     22 	struct list_node list_node;
     23 };
     24 
     25 /* tries can have a bounded number of children (i.e. fixed radix tries), or can
     26  * be adaptive to the number of characters in the input alphabet (i.e. adaptive
     27  * radix tries).
     28  */
     29 #ifndef TRIE_ADAPTIVE_RADIX
     30 
     31 /* the maximum number of children each trie node can have is known as its
     32  * "radix", and affects the maximum depth of the trie (therefore the worst case
     33  * search time). a value of 2 reults in essentially a binary search tree of
     34  * prefixes. higher values result in shallower and wider trees.
     35  */
     36 #ifndef TRIE_MAX_RADIX
     37 # define TRIE_MAX_RADIX 2
     38 #endif /* TRIE_MAX_RADIX */
     39 
     40 #endif /* TRIE_ADAPTIVE_RADIX */
     41 
     42 inline struct trie_node *
     43 trie_init(struct arena *arena)
     44 {
     45 	struct trie_node *res = ARENA_ALLOC_SIZED(arena, struct trie_node);
     46 	if (!res)
     47 		return NULL;
     48 
     49 	res->parent = NULL;
     50 	res->children = LIST_INIT(res->children);
     51 	res->prefix.begin = res->prefix.end = NULL;
     52 	res->list_node.prev = res->list_node.next = NULL;
     53 
     54 	return res;
     55 }
     56 
     57 inline char const *
     58 _trie_split_prefix(struct trie_node const *trie, char const *key, size_t len)
     59 {
     60 	assert(trie->prefix.begin && trie->prefix.end);
     61 
     62 	char const *cur = trie->prefix.begin, *key_end = key + len;
     63 	while (cur < trie->prefix.end && key < key_end) {
     64 		if (*cur != *key)
     65 			break;
     66 
     67 		cur++;
     68 		key++;
     69 	}
     70 
     71 	return cur;
     72 }
     73 
     74 /* if prefix == NULL:
     75  *  is group node, check all children
     76  *  if failed to add to any children, add new child
     77  * else compare key against prefix:
     78  *  if key == prefix:
     79  *   return node
     80  *  else if key < prefix:
     81  *   prepend parent node with prefix: key
     82  *   set current node prefix to: (prefix - key)
     83  *  else if key > prefix:
     84  *   append child node with prefix: (key - prefix)
     85  *  else if key != prefix:
     86  *   prepend parent node with prefix: NULL
     87  *   append child node to parent node with prefix: key
     88  */
     89 inline struct trie_node *
     90 trie_insert(struct arena *arena, struct trie_node *trie, char const *key, size_t len)
     91 {
     92 	assert(key);
     93 	assert(len);
     94 
     95 	struct trie_node *res = NULL;
     96 
     97 	if (!trie->prefix.begin && !trie->prefix.end) {
     98 		struct trie_node *it;
     99 		LIST_ENTRY_ITER(&trie->children, it, list_node) {
    100 			if ((res = trie_insert(arena, it, key, len)))
    101 				return res;
    102 		}
    103 
    104 		if (!(res = trie_init(arena)))
    105 			return NULL;
    106 
    107 		res->parent = trie;
    108 		res->prefix.begin = key;
    109 		res->prefix.end = key + len;
    110 		list_push_tail(&trie->children, &res->list_node);
    111 
    112 		return res;
    113 	}
    114 
    115 	char const *split = _trie_split_prefix(trie, key, len);
    116 	size_t split_len = split - trie->prefix.begin;
    117 
    118 #if 0
    119 	printf("node prefix: \"%.*s\", prefix len: %zu\n",
    120 			(int) (trie->prefix.end - trie->prefix.begin),
    121 			trie->prefix.begin,
    122 			trie->prefix.end - trie->prefix.begin);
    123 
    124 	printf("before split: \"%.*s\", after split: \"%.*s\", split len: %zu\n",
    125 			(int) (split - trie->prefix.begin), trie->prefix.begin,
    126 			(int) (trie->prefix.end - split), split,
    127 			split_len);
    128 
    129 	printf("key: \"%.*s\", key len: %zu\n", (int) len, key, len);
    130 #endif
    131 
    132 	if (split == trie->prefix.end && split_len == len) { /* key == prefix */
    133 		return trie;
    134 	}
    135 
    136 	if (split == trie->prefix.end && split_len < len) { /* key > prefix */
    137 		struct trie_node *it;
    138 		LIST_ENTRY_ITER(&trie->children, it, list_node) {
    139 			if ((res = trie_insert(arena, it,
    140 					       key + split_len,
    141 					       len - split_len)))
    142 				return res;
    143 		}
    144 
    145 		if (!(res = trie_init(arena)))
    146 			return NULL;
    147 
    148 		res->parent = trie;
    149 		res->prefix.begin = key + split_len;
    150 		res->prefix.end = key + len;
    151 		list_push_tail(&trie->children, &res->list_node);
    152 
    153 		return res;
    154 	}
    155 
    156 	if (split == trie->prefix.begin) { /* key != prefix */
    157 		return NULL;
    158 	}
    159 
    160 	/* key < prefix */
    161 
    162 	list_node_unlink(&trie->list_node);
    163 
    164 	struct trie_node *new_parent;
    165 	if (!(new_parent = trie_init(arena)))
    166 		return NULL;
    167 
    168 	new_parent->parent = trie->parent;
    169 	new_parent->prefix.begin = key;
    170 	new_parent->prefix.end = key + split_len;
    171 	list_push_tail(&trie->parent->children, &new_parent->list_node);
    172 
    173 	/* trim old parent */
    174 	trie->parent = new_parent;
    175 	trie->prefix.begin = trie->prefix.begin + split_len;
    176 	trie->list_node.prev = trie->list_node.next = NULL;
    177 	list_push_tail(&new_parent->children, &trie->list_node);
    178 
    179 	if (split_len == len) /* no remainder */
    180 		return new_parent;
    181 
    182 	/* handle remainder of key */
    183 	if (!(res = trie_init(arena)))
    184 		return NULL;
    185 
    186 	res->parent = new_parent;
    187 	res->prefix.begin = key + split_len;
    188 	res->prefix.end = key + len;
    189 	list_push_tail(&new_parent->children, &res->list_node);
    190 
    191 	return res;
    192 }
    193 
    194 inline struct trie_node const *
    195 trie_search(struct trie_node const *trie, char const *key, size_t len)
    196 {
    197 	struct trie_node const *res = NULL;
    198 
    199 	if (!trie->prefix.begin && !trie->prefix.end) {
    200 		struct trie_node *it;
    201 		LIST_ENTRY_ITER(&trie->children, it, list_node) {
    202 			if ((res = trie_search(it, key, len)))
    203 				return res;
    204 		}
    205 
    206 		return NULL;
    207 	}
    208 
    209 	char const *split = _trie_split_prefix(trie, key, len);
    210 	size_t split_len = split - trie->prefix.begin;
    211 
    212 	if (split == trie->prefix.end && split_len == len)
    213 		return trie;
    214 
    215 	if (split == trie->prefix.end && split_len < len) {
    216 		struct trie_node *it;
    217 		LIST_ENTRY_ITER(&trie->children, it, list_node) {
    218 			if ((res = trie_search(it, key + split_len, len - split_len)))
    219 				return res;
    220 		}
    221 
    222 		return NULL;
    223 	}
    224 
    225 	if (split == trie->prefix.begin)
    226 		return NULL;
    227 
    228 	struct trie_node *it;
    229 	LIST_ENTRY_ITER(&trie->children, it, list_node) {
    230 		if ((res = trie_search(it, key + split_len, len - split_len)))
    231 			return res;
    232 	}
    233 
    234 	return NULL;
    235 }
    236 
    237 inline void
    238 trie_display(struct trie_node const *trie, size_t indent)
    239 {
    240 	for (size_t i = 0; i < indent; i++)
    241 		printf("  ");
    242 
    243 	printf("{ %p, prefix: \"%.*s\", }\n",
    244 			trie, (int) (trie->prefix.end - trie->prefix.begin), trie->prefix.begin);
    245 
    246 	struct trie_node *it;
    247 	LIST_ENTRY_ITER(&trie->children, it, list_node) {
    248 		trie_display(it, indent + 1);
    249 	}
    250 }
    251 
    252 #endif /* TRIE_H */
    253 
    254 #ifdef HEADER_IMPL
    255 
    256 extern inline struct trie_node *
    257 trie_init(struct arena *arena);
    258 
    259 extern inline char const *
    260 _trie_split_prefix(struct trie_node const *trie, char const *key, size_t len);
    261 
    262 extern inline struct trie_node *
    263 trie_insert(struct arena *arena, struct trie_node *trie, char const *key, size_t len);
    264 
    265 extern inline struct trie_node const *
    266 trie_search(struct trie_node const *trie, char const *key, size_t len);
    267 
    268 extern inline void
    269 trie_display(struct trie_node const *trie, size_t indent);
    270 
    271 #endif /* HEADER_IMPL */