X-Git-Url: http://git.madism.org/?p=apps%2Fpfixtools.git;a=blobdiff_plain;f=common%2Ftst-trie.c;h=999114e1da437cac26803b71b20b932a7e8713ad;hp=b4b064e4bfb8b9411c4110f50915bfa46648fc32;hb=1dc8eb6ccc6f70a51ce0c42773892fcb20806c48;hpb=9f42e34eb27b0aefdac897b6c37ce11e20db52e7 diff --git a/common/tst-trie.c b/common/tst-trie.c index b4b064e..999114e 100644 --- a/common/tst-trie.c +++ b/common/tst-trie.c @@ -33,54 +33,79 @@ * Copyright © 2008 Florent Bruneau */ -#include -#include -#include -#include -#include - +#include +#include #include "common.h" #include "str.h" #include "trie.h" +#include "file.h" static trie_t *create_trie_from_file(const char *file) { trie_t *db; - const char *map, *p, *end; - struct stat st; - int fd; + file_map_t map; + const char *p, *end; char line[BUFSIZ]; - fd = open(file, O_RDONLY, 0000); - if (fd < 0) { - UNIXERR("open"); + if (!file_map_open(&map, file, false)) { return NULL; } - - if (fstat(fd, &st) < 0) { - UNIXERR("fstat"); - close(fd); - return NULL; + p = map.map; + end = map.end; + while (end > p && end[-1] != '\n') { + --end; + } + if (end != map.end) { + warn("file %s miss a final \\n, ignoring last line", file); } - p = map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { - UNIXERR("mmap"); - close(fd); - return NULL; + db = trie_new(); + while (p < end && p != NULL) { + const char *eol = (char *)memchr(p, '\n', end - p); + if (eol == NULL) { + eol = end; + } + if (eol - p > BUFSIZ) { + p = eol - BUFSIZ; + } + int i = 0; +#if 1 + for (const char *s = eol - 1 ; s >= p ; --s) { + line[i++] = ascii_tolower(*s); + } +#else + memcpy(line, p, eol - p); + i = eol - p; +#endif + line[i] = '\0'; + trie_insert(db, line); + p = eol + 1; } - close(fd); + file_map_close(&map); + trie_compile(db, false); + return db; +} + + +__attribute__((used)) +static void check_trie_with_file(const trie_t *db, const char *file) +{ + file_map_t map; + const char *p, *end; + char line[BUFSIZ]; - end = map + st.st_size; - while (end > map && end[-1] != '\n') { + if (!file_map_open(&map, file, false)) { + return; + } + p = map.map; + end = map.end; + while (end > p && end[-1] != '\n') { --end; } - if (end != map + st.st_size) { - syslog(LOG_WARNING, "file %s miss a final \\n, ignoring last line", - file); + if (end != map.end) { + warn("file %s miss a final \\n, ignoring last line", file); } - db = trie_new(); while (p < end && p != NULL) { const char *eol = (char *)memchr(p, '\n', end - p); if (eol == NULL) { @@ -90,7 +115,7 @@ static trie_t *create_trie_from_file(const char *file) p = eol - BUFSIZ; } int i = 0; -#if 0 +#if 1 for (const char *s = eol - 1 ; s >= p ; --s) { line[i++] = ascii_tolower(*s); } @@ -99,25 +124,126 @@ static trie_t *create_trie_from_file(const char *file) i = eol - p; #endif line[i] = '\0'; - trie_insert(db, line); + if (!trie_lookup(db, line)) { + warn("'%s' not found in the trie", line); + } p = eol + 1; } - munmap((void*)map, st.st_size); - return db; + file_map_close(&map); +} + + +static bool test_linear(const uint8_t *start, uint32_t len, uint8_t data) { + const uint8_t *end = start + len; + while (start < end) { + const uint8_t val = *start; + if (val == data) { + return true; + } else if (val > data) { + return false; + } + ++start; + } + return false; +} + +static bool test_dicho(const uint8_t *start, uint32_t len, uint8_t data) { + const uint8_t *end = start + len; + + while (start < end) { + const uint8_t *mid = start + ((end - start) >> 1); + const uint8_t val = *mid; + + if (val == data) { + return true; + } else if (data < val) { + end = mid; + } else { + start = mid + 1; + } + } + return false; +} + +__attribute__((used)) +static void test_lookup(void) { + bool set[64]; + uint8_t data[64]; + + printf("size,dicho,linear\n"); + for (int i = 1 ; i < 64 ; ++i) { + if (i > 32) { + int selected = 64; + memset(set, 1, 64 * sizeof(bool)); + while (selected > i) { + int val = rand() % 64; + if (set[val]) { + set[val] = false; + --selected; + } + } + } else { + int selected = 0; + memset(set, 0, 64 * sizeof(bool)); + while (selected < i) { + int val = rand() % 64; + if (!set[val]) { + set[val] = true; + ++selected; + } + } + } + int pos = 0; + for (int j = 0 ; j < 64 ; ++j) { + if (set[j]) { + data[pos] = j; + ++pos; + } + } + + struct timeval start, end; + double diff_dicho, diff_linear; + const int iterations = 50000000; + + gettimeofday(&start, NULL); + for (int k = 0 ; k < iterations ; ++k) { + for (int j = 0 ; j < 64 ; ++j) { + test_dicho(data, i, j); + } + } + gettimeofday(&end, NULL); + diff_dicho = ((end.tv_sec - start.tv_sec) * 10.0) + + (double)(end.tv_usec - start.tv_usec) / 10e5; + + gettimeofday(&start, NULL); + for (int k = 0 ; k < iterations ; ++k) { + for (int j = 0 ; j < 64 ; ++j) { + test_linear(data, i, j); + } + } + gettimeofday(&end, NULL); + diff_linear = ((end.tv_sec - start.tv_sec) * 10.0) + + (double)(end.tv_usec - start.tv_usec) / 10e5; + printf("%d,%d,%d\n", i, (int)diff_dicho, (int)diff_linear); + } } int main(int argc, char *argv[]) { + /* test_lookup(); */ + /* Trivial tests */ trie_t *trie = trie_new(); - trie_insert(trie, "abcdefghi"); - trie_insert(trie, "abcde123654789"); trie_insert(trie, "abcde123456789"); trie_insert(trie, "abcde123654789"); + trie_insert(trie, "abcdefghi"); trie_insert(trie, "coucou"); trie_insert(trie, "coucou chez vous"); + trie_insert(trie, "debout !"); + trie_compile(trie, false); + trie_inspect(trie, true); #define ASSERT_TRUE(str) \ if (!trie_lookup(trie, str)) { \ @@ -133,16 +259,36 @@ int main(int argc, char *argv[]) ASSERT_FALSE("coucou "); ASSERT_FALSE("abcde123"); ASSERT_FALSE("abcde"); + ASSERT_FALSE("coucou chez vous tous"); + ASSERT_TRUE("abcde123456789"); + ASSERT_TRUE("abcde123456789"); + ASSERT_TRUE("abcde123654789"); ASSERT_TRUE("abcdefghi"); ASSERT_TRUE("coucou"); - ASSERT_FALSE("coucou chez vous tous"); + ASSERT_TRUE("coucou chez vous"); + ASSERT_TRUE("debout !"); + trie_delete(&trie); /* Perf test */ if (argc > 1) { trie = create_trie_from_file(argv[1]); -// trie_inspect(trie); + trie_inspect(trie, false); + check_trie_with_file(trie, argv[1]); + if (argc > 2) { + const uint32_t how_many = 8 * 1000 * 1000; + struct timeval start, end; + double diff; + + gettimeofday(&start, NULL); + for (uint32_t i = 0 ; i < how_many ; ++i) { + trie_lookup(trie, argv[2]); + } + gettimeofday(&end, NULL); + diff = (end.tv_sec - start.tv_sec) + (double)(end.tv_usec - start.tv_usec) / 10e6; + printf("%u lookups per second\n", (int)(how_many / diff)); + } trie_delete(&trie); } return 0;