X-Git-Url: http://git.madism.org/?a=blobdiff_plain;f=common%2Ftrie.c;h=124c072a068993c089dfed1125c60a0f1dd108c4;hb=8de978eb48332de45a5ad254b0475e74613fcfbd;hp=2e3ada156b3c0f12c562d3dfb032b632faaf42af;hpb=9c02ad80dbfeaa3d2ee7cdc7e26ebf25ff872a6c;p=apps%2Fpfixtools.git diff --git a/common/trie.c b/common/trie.c index 2e3ada1..124c072 100644 --- a/common/trie.c +++ b/common/trie.c @@ -41,25 +41,29 @@ typedef struct trie_entry_t trie_entry_t; struct trie_entry_t { - int c_offset; - int c_len; + int32_t c_offset; + int32_t c_len; - int children_offset; - int children_len; + int32_t children_offset; + int32_t children_len; }; struct trie_t { trie_entry_t *entries; - int entries_len; - int entries_size; + int32_t entries_len; + int32_t entries_size; char *c; - int c_len; - int c_size; + int32_t c_len; + int32_t c_size; - char **keys; - int keys_len; - int keys_size; + char *keys; + int32_t keys_len; + int32_t keys_size; + + int *keys_offset; + int32_t keys_offset_len; + int32_t keys_offset_size; bool locked; }; @@ -71,17 +75,17 @@ trie_t *trie_new(void) static inline void trie_cleanup_build_data(trie_t *trie) { - for (int i = 0 ; i < trie->keys_len ; ++i) { - p_delete(&trie->keys[i]); - } p_delete(&trie->keys); + p_delete(&trie->keys_offset); trie->keys_len = trie->keys_size = 0; + trie->keys_offset_len = trie->keys_offset_size = 0; } void trie_delete(trie_t **trie) { if (*trie) { trie_cleanup_build_data(*trie); + trie_unlock(*trie); p_delete(&(*trie)->entries); p_delete(&(*trie)->c); p_delete(trie); @@ -199,7 +203,6 @@ static inline void trie_entry_insert_child(trie_t *trie, trie_entry_t *entry, entry->children_len = 1; } else { if (entry->children_offset + entry->children_len != pchild) { - trie_inspect(trie); printf("Inserting child %d while offset is %d[%d]\n", pchild, entry->children_offset, entry->children_len); abort(); @@ -230,8 +233,15 @@ static inline void trie_entry_split(trie_t *trie, trie_entry_t *entry, int pos) void trie_insert(trie_t *trie, const char* key) { - GROW(trie->keys, 1, trie->keys_len, trie->keys_size); - trie->keys[trie->keys_len++] = strdup(key); + assert(trie->entries == NULL && "Trie already compiled"); + + int len = m_strlen(key) + 1; + GROW(trie->keys, len, trie->keys_len, trie->keys_size); + memcpy(trie->keys + trie->keys_len, key, len); + + GROW(trie->keys_offset, 1, trie->keys_offset_len, trie->keys_offset_size); + trie->keys_offset[trie->keys_offset_len++] = trie->keys_len; + trie->keys_len += len; } @@ -244,20 +254,24 @@ static inline void trie_compile_aux(trie_t *trie, int id, char current = '\0'; for (int off_diff = initial_diff ; fork_pos == 0 ; ++off_diff, ++offset) { - current = trie->keys[first_key][offset]; + current = trie->keys[trie->keys_offset[first_key] + offset]; for (int i = first_key + 1 ; i < last_key ; ++i) { - if (trie->keys[i][offset] != current) { + const char *str = trie->keys + trie->keys_offset[i]; + const char c = str[offset]; + if (c != current) { trie_grow(trie, 2); if (fork_pos == 0) { trie_entry_split(trie, &trie->entries[id], off_diff); } trie_entry_insert_child(trie, &trie->entries[id], - trie_add_leaf(trie, trie->keys[i] + - offset)); + trie_add_leaf(trie, str + offset)); forks[fork_pos++] = i; - current = trie->keys[i][offset]; + current = c; } } + if (fork_pos == 0 && current == '\0') { + return; + } } forks[fork_pos] = last_key; @@ -271,44 +285,24 @@ static inline void trie_compile_aux(trie_t *trie, int id, } } - -static inline void trie_shrink(trie_t *trie) -{ - p_shrink(&trie->entries, trie->entries_len, &trie->entries_size); - p_shrink(&trie->c, trie->c_len, &trie->c_size); -} - -static inline void trie_lock(trie_t *trie) -{ - if (mlock(trie->entries, sizeof(trie_entry_t) * trie->entries_len) != 0) { - UNIXERR("mlock"); - return; - } - if (mlock(trie->c, trie->c_len) != 0) { - UNIXERR("mlock"); - munlock(trie->entries, sizeof(trie_entry_t) * trie->entries_len); - return; - } - trie->locked = true; -} - -typedef char *str_t; - void trie_compile(trie_t *trie, bool memlock) { + assert(trie->entries == NULL && "Trie already compiled"); + assert(trie->keys != NULL && "Trying to compile an empty trie"); { -# define QSORT_TYPE str_t -# define QSORT_BASE trie->keys -# define QSORT_NELT trie->keys_len -# define QSORT_LT(a,b) strcmp(*a, *b) < 0 +# define QSORT_TYPE int +# define QSORT_BASE trie->keys_offset +# define QSORT_NELT trie->keys_offset_len +# define QSORT_LT(a,b) strcmp(trie->keys + *a, trie->keys + *b) < 0 # include "qsort.c" } trie_grow(trie, trie->keys_len); - trie_compile_aux(trie, trie_add_leaf(trie, trie->keys[0]), - 0, trie->keys_len, 0, 0); + trie_compile_aux(trie, trie_add_leaf(trie, trie->keys), + 0, trie->keys_offset_len, 0, 0); trie_cleanup_build_data(trie); - trie_shrink(trie); + p_shrink(&trie->entries, trie->entries_len, &trie->entries_size); + p_shrink(&trie->c, trie->c_len, &trie->c_size); if (memlock) { trie_lock(trie); } @@ -316,6 +310,7 @@ void trie_compile(trie_t *trie, bool memlock) bool trie_lookup(const trie_t *trie, const char *key) { + assert(trie->keys == NULL && "Can't lookup: trie not compiled"); if (trie->entries_len == 0) { return false; } else { @@ -337,45 +332,85 @@ bool trie_lookup(const trie_t *trie, const char *key) } } +void trie_lock(trie_t *trie) +{ + if (trie->locked) { + return; + } + if (mlock(trie->entries, sizeof(trie_entry_t) * trie->entries_len) != 0) { + UNIXERR("mlock"); + return; + } + if (mlock(trie->c, trie->c_len) != 0) { + UNIXERR("mlock"); + munlock(trie->entries, sizeof(trie_entry_t) * trie->entries_len); + return; + } + trie->locked = true; +} + +void trie_unlock(trie_t *trie) +{ + if (!trie->locked) { + return; + } + munlock(trie->entries, sizeof(trie_entry_t) * trie->entries_len); + munlock(trie->c, trie->c_len); + trie->locked = false; +} /* Debug {{{1 */ -static inline void trie_entry_inspect(const trie_t *trie, +static inline void trie_entry_inspect(const trie_t *trie, bool show_content, const trie_entry_t *entry, int level) { - static int c_sum = 0; - static int nodes = 0; + static int max_depth = 0; + static int leaves = 0; + static int depth_sum = 0; - ++nodes; - c_sum += entry->c_len; - for (int i = 0 ; i < level ; ++i) { - fputs(" ", stdout); + if (trie_entry_is_leaf(entry)) { + if (level > max_depth) { + max_depth = level; + } + ++leaves; + depth_sum += level; } - if (entry->c_len == 0) { - fputs("(nil)", stdout); - } else { - const char *c = trie->c + entry->c_offset; - printf("(%d) ", entry->c_len); - for (int i = 0 ; i < entry->c_len ; ++i) { - if (c[i]) { - printf("%c ", c[i]); - } else { - fputs("\\0 ", stdout); + if (show_content) { + for (int i = 0 ; i < level ; ++i) { + fputs(" ", stdout); + } + if (entry->c_len == 0) { + fputs("(nil)", stdout); + } else { + const char *c = trie->c + entry->c_offset; + printf("(%d) ", entry->c_len); + for (int i = 0 ; i < entry->c_len ; ++i) { + if (c[i]) { + printf("%c ", c[i]); + } else { + fputs("\\0 ", stdout); + } } } + fputs("\n", stdout); } - fputs("\n", stdout); for (int i = entry->children_offset ; i < entry->children_offset + entry->children_len ; ++i) { - trie_entry_inspect(trie, &trie->entries[i], level + 1); + trie_entry_inspect(trie, show_content, &trie->entries[i], level + 1); } if (level == 0) { - printf("Mean char per node: %d\n", c_sum / nodes); + printf("Average char per node: %d\n", trie->c_len / trie->entries_len); + printf("Number of nodes: %d\n", trie->entries_len); + printf("Number of leaves: %d\n", leaves); + printf("Max depth: %d\n", max_depth); + printf("Average leaf depth: %d\n", depth_sum / leaves); + printf("Memory used: %d\n", (trie->entries_size * sizeof(trie_entry_t)) + + (trie->c_size) + sizeof(trie_t)); } } -void trie_inspect(const trie_t *trie) +void trie_inspect(const trie_t *trie, bool show_content) { - trie_entry_inspect(trie, trie->entries, 0); + trie_entry_inspect(trie, show_content, trie->entries, 0); }