diff --git a/doc/configuration.txt b/doc/configuration.txt index 886b5fd54..33fba9b85 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -6283,8 +6283,16 @@ balance url_param [check_post] will take away N-1 of the highest loaded servers at the expense of performance. With very high values, the algorithm will converge towards the leastconn's result but much slower. + In addition, for large server farms with very low loads (or + perfect balance), comparing loads will often lead to a tie, + so in case of equal loads between all measured servers, their + request rate over the last second are compared, which allows + to better balance server usage over time in the same spirit + as roundrobin does, and smooth consistent hash unfairness. The default value is 2, which generally shows very good - distribution and performance. This algorithm is also known as + distribution and performance. For large farms with low loads + (less than a few requests per second per server), it may help + to raise it to 3 or even 4. This algorithm is also known as the Power of Two Random Choices and is described here : http://www.eecs.harvard.edu/~michaelm/postscripts/handbook2001.pdf diff --git a/src/backend.c b/src/backend.c index 3f00e1d19..73b39306b 100644 --- a/src/backend.c +++ b/src/backend.c @@ -576,9 +576,20 @@ struct server *get_server_rnd(struct stream *s, const struct server *avoid) /* compare the new server to the previous best choice and pick * the one with the least currently served requests. */ - if (prev && prev != curr && - curr->served * prev->cur_eweight > prev->served * curr->cur_eweight) - curr = prev; + if (prev && prev != curr) { + uint64_t wcurr = (uint64_t)curr->served * prev->cur_eweight; + uint64_t wprev = (uint64_t)prev->served * curr->cur_eweight; + + if (wcurr > wprev) + curr = prev; + else if (wcurr == wprev && curr->counters.shared.tg && prev->counters.shared.tg) { + /* same load: pick the lowest weighted request rate */ + wcurr = read_freq_ctr_period_estimate(&curr->counters._sess_per_sec, MS_TO_TICKS(1000)); + wprev = read_freq_ctr_period_estimate(&prev->counters._sess_per_sec, MS_TO_TICKS(1000)); + if (wprev * curr->cur_eweight < wcurr * prev->cur_eweight) + curr = prev; + } + } } while (--draws > 0); /* if the selected server is full, pretend we have none so that we reach