diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h index 0e719832c..d3bf1cf57 100644 --- a/include/haproxy/tools.h +++ b/include/haproxy/tools.h @@ -45,6 +45,7 @@ #include #include #include +#include /****** string-specific macros and functions ******/ /* if a > max, then bound to . The macro returns the new */ @@ -55,6 +56,19 @@ #define SWAP(a, b) do { typeof(a) t; t = a; a = b; b = t; } while(0) +/* return the hash of a string and length for a given key. All keys are valid. */ +#define HA_ANON(key, str, len) (XXH32(str, len, key) & 0xFFFFFF) + +/* use if you want to return a simple hash. Key 0 doesn't hash. */ +#define HA_ANON_STR(key, str) hash_anon(key, str, "", "") + +/* use if you want to return a hash like : IP('hash'). Key 0 doesn't hash. */ +#define HA_ANON_ID(key, str) hash_anon(key, str, "ID(", ")") + +/* use if you want to return a hash like : PATH('hash'). Key 0 doesn't hash. */ +#define HA_ANON_PATH(key, str) hash_anon(key, str, "PATH(", ")") + + /* * copies at most chars from to . Last char is always * set to 0, unless is 0. The number of chars copied is returned @@ -480,6 +494,12 @@ unsigned int inetaddr_host(const char *text); unsigned int inetaddr_host_lim(const char *text, const char *stop); unsigned int inetaddr_host_lim_ret(char *text, char *stop, char **ret); +/* Function that hashes or not a string according to the anonymizing key (scramble). */ +const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix); + +/* Function that hashes or not an ip according to the ipstring entered */ +const char * hash_ipanon(uint32_t scramble, char *ipstring); + static inline char *cut_crlf(char *s) { while (*s != '\r' && *s != '\n') { diff --git a/src/tools.c b/src/tools.c index 2311f725a..34b1ab099 100644 --- a/src/tools.c +++ b/src/tools.c @@ -76,6 +76,9 @@ extern void *__elf_aux_vector; */ #define RET0_UNLESS(__x) do { if (!(__x)) return 0; } while (0) +/* Define the number of line of hash_word */ +#define NB_L_HASH_WORD 7 + /* enough to store NB_ITOA_STR integers of : * 2^64-1 = 18446744073709551615 or * -2^63 = -9223372036854775808 @@ -102,6 +105,10 @@ THREAD_LOCAL unsigned int statistical_prng_state = 2463534242U; /* set to true if this is a static build */ int build_is_static = 0; +/* A global static table to store hashed words */ +static THREAD_LOCAL char hash_word[NB_L_HASH_WORD][20]; +static THREAD_LOCAL int index_hash = 0; + /* * unsigned long long ASCII representation * @@ -5874,6 +5881,100 @@ void update_word_fingerprint(uint8_t *fp, const char *word) fp[32 * from + to]++; } +/* This function hashes a word, scramble is the anonymizing key, returns + * the hashed word when the key (scramble) != 0, else returns the word. + * This function can be called NB_L_HASH_WORD times in a row, don't call + * it if you called it more than NB_L_HASH_WORD. + */ +const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix) +{ + index_hash++; + if (index_hash > NB_L_HASH_WORD) + index_hash = 0; + + /* don't hash empty strings */ + if (!string2hash[0] || (string2hash[0] == ' ' && string2hash[1] == 0)) + return string2hash; + + if (scramble != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "%s%06x%s", + prefix, HA_ANON(scramble, string2hash, strlen(string2hash)), suffix); + return hash_word[index_hash]; + } + else + return string2hash; +} + +/* This function hashes or not an ip address ipstring, scramble is the anonymizing + * key, returns the hashed ip with his port or ipstring when there is nothing to hash. + */ +const char *hash_ipanon(uint32_t scramble, char *ipstring) +{ + char *errmsg = NULL; + struct sockaddr_storage *sa; + char addr[46]; + int port; + + index_hash++; + if (index_hash > NB_L_HASH_WORD) { + index_hash = 0; + } + + if (strncmp(ipstring, "localhost", 1) == 0) { + return ipstring; + } + else { + sa = str2sa_range(ipstring, NULL, NULL, NULL, NULL, NULL, &errmsg, NULL, NULL, + PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT | PA_O_PORT_RANGE); + if (sa == NULL) { + return ipstring; + } + else { + addr_to_str(sa, addr, sizeof(addr)); + port = get_host_port(sa); + + switch(sa->ss_family) { + case AF_INET: + if (strncmp(addr, "127", 3) == 0 || strncmp(addr, "255", 3) == 0 || strncmp(addr, "0", 1) == 0) { + return ipstring; + } + else { + if (port != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port); + return hash_word[index_hash]; + } + else { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x)", HA_ANON(scramble, addr, strlen(addr))); + return hash_word[index_hash]; + } + } + break; + + case AF_INET6: + if (strcmp(addr, "::1") == 0) { + return ipstring; + } + else { + if (port != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port); + return hash_word[index_hash]; + } + else { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x)", HA_ANON(scramble, addr, strlen(addr))); + return hash_word[index_hash]; + } + } + break; + + default: + return ipstring; + break; + }; + } + } + return ipstring; +} + /* Initialize array with the fingerprint of word by counting the * transitions between characters. is a 1024-entries array indexed as * 32*from+to. Positions for 'from' and 'to' are: