From 1b8fbbd843ddeb5fc81c9303db9c590a436d499b Mon Sep 17 00:00:00 2001 From: William Casarin Date: Mon, 9 Jul 2018 12:10:32 -0700 Subject: progress --- ccan/htable/tools/stringspeed.c | 240 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 ccan/htable/tools/stringspeed.c (limited to 'ccan/htable/tools/stringspeed.c') diff --git a/ccan/htable/tools/stringspeed.c b/ccan/htable/tools/stringspeed.c new file mode 100644 index 0000000..c6ca10f --- /dev/null +++ b/ccan/htable/tools/stringspeed.c @@ -0,0 +1,240 @@ +/* Simple speed tests for a hash of strings. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static size_t hashcount; + +static const char *strkey(const char *str) +{ + return str; +} + +static size_t hash_str(const char *key) +{ + hashcount++; + return hash(key, strlen(key), 0); +} + +static bool cmp(const char *obj, const char *key) +{ + return strcmp(obj, key) == 0; +} + +HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, htable_str); + +/* Nanoseconds per operation */ +static size_t normalize(const struct timeabs *start, + const struct timeabs *stop, + unsigned int num) +{ + return time_to_nsec(time_divide(time_between(*stop, *start), num)); +} + +int main(int argc, char *argv[]) +{ + size_t i, j, num; + struct timeabs start, stop; + struct htable_str ht; + char **words, **misswords; + + words = tal_strsplit(NULL, grab_file(NULL, + argv[1] ? argv[1] : "/usr/share/dict/words"), "\n", + STR_NO_EMPTY); + htable_str_init(&ht); + num = tal_count(words) - 1; + /* Note that on my system, num is just > 98304, where we double! */ + printf("%zu words\n", num); + + /* Append and prepend last char for miss testing. */ + misswords = tal_arr(words, char *, num); + for (i = 0; i < num; i++) { + char lastc; + if (strlen(words[i])) + lastc = words[i][strlen(words[i])-1]; + else + lastc = 'z'; + misswords[i] = tal_fmt(misswords, "%c%s%c%c", + lastc, words[i], lastc, lastc); + } + + printf("#01: Initial insert: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + htable_str_add(&ht, words[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Bytes allocated: %zu\n", + sizeof(ht.raw.table[0]) << ht.raw.bits); + + printf("#02: Initial lookup (match): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (htable_str_get(&ht, words[i]) != words[i]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#03: Initial lookup (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) { + if (htable_str_get(&ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Lookups in order are very cache-friendly for judy; try random */ + printf("#04: Initial lookup (random): "); + fflush(stdout); + start = time_now(); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) + if (htable_str_get(&ht, words[j]) != words[j]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("#05: Initial delete all: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (!htable_str_del(&ht, words[i])) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#06: Initial re-inserting: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + htable_str_add(&ht, words[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("#07: Deleting first half: "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i+=2) + if (!htable_str_del(&ht, words[i])) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#08: Adding (a different) half: "); + fflush(stdout); + + start = time_now(); + for (i = 0; i < num; i+=2) + htable_str_add(&ht, misswords[i]); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#09: Lookup after half-change (match): "); + fflush(stdout); + start = time_now(); + for (i = 1; i < num; i+=2) + if (htable_str_get(&ht, words[i]) != words[i]) + abort(); + for (i = 0; i < num; i+=2) { + if (htable_str_get(&ht, misswords[i]) != misswords[i]) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#10: Lookup after half-change (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i+=2) + if (htable_str_get(&ht, words[i])) + abort(); + for (i = 1; i < num; i+=2) { + if (htable_str_get(&ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Hashtables with delete markers can fill with markers over time. + * so do some changes to see how it operates in long-term. */ + printf("#11: Churn 1: "); + start = time_now(); + for (j = 0; j < num; j+=2) { + if (!htable_str_del(&ht, misswords[j])) + abort(); + if (!htable_str_add(&ht, words[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#12: Churn 2: "); + start = time_now(); + for (j = 1; j < num; j+=2) { + if (!htable_str_del(&ht, words[j])) + abort(); + if (!htable_str_add(&ht, misswords[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#13: Churn 3: "); + start = time_now(); + for (j = 1; j < num; j+=2) { + if (!htable_str_del(&ht, misswords[j])) + abort(); + if (!htable_str_add(&ht, words[j])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Now it's back to normal... */ + printf("#14: Post-Churn lookup (match): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) + if (htable_str_get(&ht, words[i]) != words[i]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("#15: Post-Churn lookup (miss): "); + fflush(stdout); + start = time_now(); + for (i = 0; i < num; i++) { + if (htable_str_get(&ht, misswords[i])) + abort(); + } + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Lookups in order are very cache-friendly for judy; try random */ + printf("#16: Post-Churn lookup (random): "); + fflush(stdout); + start = time_now(); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) + if (htable_str_get(&ht, words[j]) != words[j]) + abort(); + stop = time_now(); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + return 0; +} -- cgit v1.2.3