trie.h (6787B)
1 #ifndef TRIE_H 2 #define TRIE_H 3 4 #include <assert.h> 5 #include <stddef.h> 6 7 #include "arena.h" 8 #include "list.h" 9 10 /* a trie is a data structure that holds a trie of string prefixes. we can walk 11 * this trie to search for a particular string. each node optionally contains a 12 * string prefix, and can have zero or more child nodes. 13 */ 14 struct trie_node { 15 struct trie_node *parent; 16 struct list_node children; 17 18 struct { 19 char const *begin, *end; 20 } prefix; 21 22 struct list_node list_node; 23 }; 24 25 /* tries can have a bounded number of children (i.e. fixed radix tries), or can 26 * be adaptive to the number of characters in the input alphabet (i.e. adaptive 27 * radix tries). 28 */ 29 #ifndef TRIE_ADAPTIVE_RADIX 30 31 /* the maximum number of children each trie node can have is known as its 32 * "radix", and affects the maximum depth of the trie (therefore the worst case 33 * search time). a value of 2 reults in essentially a binary search tree of 34 * prefixes. higher values result in shallower and wider trees. 35 */ 36 #ifndef TRIE_MAX_RADIX 37 # define TRIE_MAX_RADIX 2 38 #endif /* TRIE_MAX_RADIX */ 39 40 #endif /* TRIE_ADAPTIVE_RADIX */ 41 42 inline struct trie_node * 43 trie_init(struct arena *arena) 44 { 45 struct trie_node *res = ARENA_ALLOC_SIZED(arena, struct trie_node); 46 if (!res) 47 return NULL; 48 49 res->parent = NULL; 50 res->children = LIST_INIT(res->children); 51 res->prefix.begin = res->prefix.end = NULL; 52 res->list_node.prev = res->list_node.next = NULL; 53 54 return res; 55 } 56 57 inline char const * 58 _trie_split_prefix(struct trie_node const *trie, char const *key, size_t len) 59 { 60 assert(trie->prefix.begin && trie->prefix.end); 61 62 char const *cur = trie->prefix.begin, *key_end = key + len; 63 while (cur < trie->prefix.end && key < key_end) { 64 if (*cur != *key) 65 break; 66 67 cur++; 68 key++; 69 } 70 71 return cur; 72 } 73 74 /* if prefix == NULL: 75 * is group node, check all children 76 * if failed to add to any children, add new child 77 * else compare key against prefix: 78 * if key == prefix: 79 * return node 80 * else if key < prefix: 81 * prepend parent node with prefix: key 82 * set current node prefix to: (prefix - key) 83 * else if key > prefix: 84 * append child node with prefix: (key - prefix) 85 * else if key != prefix: 86 * prepend parent node with prefix: NULL 87 * append child node to parent node with prefix: key 88 */ 89 inline struct trie_node * 90 trie_insert(struct arena *arena, struct trie_node *trie, char const *key, size_t len) 91 { 92 assert(key); 93 assert(len); 94 95 struct trie_node *res = NULL; 96 97 if (!trie->prefix.begin && !trie->prefix.end) { 98 struct trie_node *it; 99 LIST_ENTRY_ITER(&trie->children, it, list_node) { 100 if ((res = trie_insert(arena, it, key, len))) 101 return res; 102 } 103 104 if (!(res = trie_init(arena))) 105 return NULL; 106 107 res->parent = trie; 108 res->prefix.begin = key; 109 res->prefix.end = key + len; 110 list_push_tail(&trie->children, &res->list_node); 111 112 return res; 113 } 114 115 char const *split = _trie_split_prefix(trie, key, len); 116 size_t split_len = split - trie->prefix.begin; 117 118 #if 0 119 printf("node prefix: \"%.*s\", prefix len: %zu\n", 120 (int) (trie->prefix.end - trie->prefix.begin), 121 trie->prefix.begin, 122 trie->prefix.end - trie->prefix.begin); 123 124 printf("before split: \"%.*s\", after split: \"%.*s\", split len: %zu\n", 125 (int) (split - trie->prefix.begin), trie->prefix.begin, 126 (int) (trie->prefix.end - split), split, 127 split_len); 128 129 printf("key: \"%.*s\", key len: %zu\n", (int) len, key, len); 130 #endif 131 132 if (split == trie->prefix.end && split_len == len) { /* key == prefix */ 133 return trie; 134 } 135 136 if (split == trie->prefix.end && split_len < len) { /* key > prefix */ 137 struct trie_node *it; 138 LIST_ENTRY_ITER(&trie->children, it, list_node) { 139 if ((res = trie_insert(arena, it, 140 key + split_len, 141 len - split_len))) 142 return res; 143 } 144 145 if (!(res = trie_init(arena))) 146 return NULL; 147 148 res->parent = trie; 149 res->prefix.begin = key + split_len; 150 res->prefix.end = key + len; 151 list_push_tail(&trie->children, &res->list_node); 152 153 return res; 154 } 155 156 if (split == trie->prefix.begin) { /* key != prefix */ 157 return NULL; 158 } 159 160 /* key < prefix */ 161 162 list_node_unlink(&trie->list_node); 163 164 struct trie_node *new_parent; 165 if (!(new_parent = trie_init(arena))) 166 return NULL; 167 168 new_parent->parent = trie->parent; 169 new_parent->prefix.begin = key; 170 new_parent->prefix.end = key + split_len; 171 list_push_tail(&trie->parent->children, &new_parent->list_node); 172 173 /* trim old parent */ 174 trie->parent = new_parent; 175 trie->prefix.begin = trie->prefix.begin + split_len; 176 trie->list_node.prev = trie->list_node.next = NULL; 177 list_push_tail(&new_parent->children, &trie->list_node); 178 179 if (split_len == len) /* no remainder */ 180 return new_parent; 181 182 /* handle remainder of key */ 183 if (!(res = trie_init(arena))) 184 return NULL; 185 186 res->parent = new_parent; 187 res->prefix.begin = key + split_len; 188 res->prefix.end = key + len; 189 list_push_tail(&new_parent->children, &res->list_node); 190 191 return res; 192 } 193 194 inline struct trie_node const * 195 trie_search(struct trie_node const *trie, char const *key, size_t len) 196 { 197 struct trie_node const *res = NULL; 198 199 if (!trie->prefix.begin && !trie->prefix.end) { 200 struct trie_node *it; 201 LIST_ENTRY_ITER(&trie->children, it, list_node) { 202 if ((res = trie_search(it, key, len))) 203 return res; 204 } 205 206 return NULL; 207 } 208 209 char const *split = _trie_split_prefix(trie, key, len); 210 size_t split_len = split - trie->prefix.begin; 211 212 if (split == trie->prefix.end && split_len == len) 213 return trie; 214 215 if (split == trie->prefix.end && split_len < len) { 216 struct trie_node *it; 217 LIST_ENTRY_ITER(&trie->children, it, list_node) { 218 if ((res = trie_search(it, key + split_len, len - split_len))) 219 return res; 220 } 221 222 return NULL; 223 } 224 225 if (split == trie->prefix.begin) 226 return NULL; 227 228 struct trie_node *it; 229 LIST_ENTRY_ITER(&trie->children, it, list_node) { 230 if ((res = trie_search(it, key + split_len, len - split_len))) 231 return res; 232 } 233 234 return NULL; 235 } 236 237 inline void 238 trie_display(struct trie_node const *trie, size_t indent) 239 { 240 for (size_t i = 0; i < indent; i++) 241 printf(" "); 242 243 printf("{ %p, prefix: \"%.*s\", }\n", 244 trie, (int) (trie->prefix.end - trie->prefix.begin), trie->prefix.begin); 245 246 struct trie_node *it; 247 LIST_ENTRY_ITER(&trie->children, it, list_node) { 248 trie_display(it, indent + 1); 249 } 250 } 251 252 #endif /* TRIE_H */ 253 254 #ifdef HEADER_IMPL 255 256 extern inline struct trie_node * 257 trie_init(struct arena *arena); 258 259 extern inline char const * 260 _trie_split_prefix(struct trie_node const *trie, char const *key, size_t len); 261 262 extern inline struct trie_node * 263 trie_insert(struct arena *arena, struct trie_node *trie, char const *key, size_t len); 264 265 extern inline struct trie_node const * 266 trie_search(struct trie_node const *trie, char const *key, size_t len); 267 268 extern inline void 269 trie_display(struct trie_node const *trie, size_t indent); 270 271 #endif /* HEADER_IMPL */