2009-10-01 05:19:37 -04:00
|
|
|
/*
|
|
|
|
|
* Fast Weighted Least Connection load balancing algorithm.
|
|
|
|
|
*
|
|
|
|
|
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2020-06-04 17:20:13 -04:00
|
|
|
#include <import/eb32tree.h>
|
2020-05-27 06:58:42 -04:00
|
|
|
#include <haproxy/api.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/backend.h>
|
2020-06-04 16:59:39 -04:00
|
|
|
#include <haproxy/queue.h>
|
2020-06-04 17:20:13 -04:00
|
|
|
#include <haproxy/server-t.h>
|
2009-10-01 05:19:37 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Remove a server from a tree. It must have previously been dequeued. This
|
|
|
|
|
* function is meant to be called when a server is going down or has its
|
|
|
|
|
* weight disabled.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
|
|
|
|
* The server's lock and the lbprm's lock must be held.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
|
|
|
|
static inline void fwlc_remove_from_tree(struct server *s)
|
|
|
|
|
{
|
|
|
|
|
s->lb_tree = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-21 13:44:53 -04:00
|
|
|
/* simply removes a server from a tree.
|
|
|
|
|
*
|
2021-02-17 10:14:00 -05:00
|
|
|
* The lbprm's lock must be held.
|
2018-08-21 13:44:53 -04:00
|
|
|
*/
|
2009-10-01 05:19:37 -04:00
|
|
|
static inline void fwlc_dequeue_srv(struct server *s)
|
|
|
|
|
{
|
|
|
|
|
eb32_delete(&s->lb_node);
|
|
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
/* Queue a server in its associated tree, assuming the <eweight> is >0.
|
2018-12-14 02:33:28 -05:00
|
|
|
* Servers are sorted by (#conns+1)/weight. To ensure maximum accuracy,
|
|
|
|
|
* we use (#conns+1)*SRV_EWGHT_MAX/eweight as the sorting key. The reason
|
|
|
|
|
* for using #conns+1 is to sort by weights in case the server is picked
|
|
|
|
|
* and not before it is picked. This provides a better load accuracy for
|
|
|
|
|
* low connection counts when weights differ and makes sure the round-robin
|
2019-09-06 11:04:04 -04:00
|
|
|
* applies between servers of highest weight first. However servers with no
|
|
|
|
|
* connection are always picked first so that under low loads, it's not
|
|
|
|
|
* always the single server with the highest weight that gets picked.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
* NOTE: Depending on the calling context, we use s->next_eweight or
|
|
|
|
|
* s->cur_eweight. The next value is used when the server state is updated
|
|
|
|
|
* (because the weight changed for instance). During this step, the server
|
|
|
|
|
* state is not yet committed. The current value is used to reposition the
|
|
|
|
|
* server in the tree. This happens when the server is used.
|
|
|
|
|
*
|
2021-02-17 10:14:00 -05:00
|
|
|
* The lbprm's lock must be held.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
static inline void fwlc_queue_srv(struct server *s, unsigned int eweight)
|
2009-10-01 05:19:37 -04:00
|
|
|
{
|
2021-02-17 10:14:00 -05:00
|
|
|
unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->nbpend);
|
2020-10-22 11:41:45 -04:00
|
|
|
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
s->lb_node.key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / eweight : 0;
|
2009-10-01 05:19:37 -04:00
|
|
|
eb32_insert(s->lb_tree, &s->lb_node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-position the server in the FWLC tree after it has been assigned one
|
|
|
|
|
* connection or after it has released one. Note that it is possible that
|
|
|
|
|
* the server has been moved out of the tree due to failed health-checks.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
2021-02-17 10:01:37 -05:00
|
|
|
* <locked> must reflect the server's lock ownership. The lbprm's lock will
|
|
|
|
|
* be used.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
2021-02-17 10:01:37 -05:00
|
|
|
static void fwlc_srv_reposition(struct server *s, int locked)
|
2009-10-01 05:19:37 -04:00
|
|
|
{
|
OPTIM: lb-leastconn: do not unlink the server if it did not change
Due to the two-phase server reservation, there are 3 calls to
fwlc_srv_reposition() per request, one during assign_server() to reserve
the slot, one in connect_server() to commit it, and one in process_stream()
to release it. However only one of the first two will change the key, so
it's needlessly costly to take the lock, remove a server and insert it
again at the same place when we can already figure we ought not to even
have taken the lock.
Furthermore, even when the server needs to move, there can be quite some
contention on the lbprm lock forcing the thread to wait. During this time
the served and nbpend server values might have changed, just like the
lb_node.key itself. Thus we measure the values again under the lock
before updating the tree. Measurements have shown that under contention
with 16 servers and 16 threads, 50% of the updates can be avoided there.
This patch makes the function compute the new key and compare it to
the current one before deciding to move the entry (and does it again
under the lock forthe second test).
This removes between 40 and 50% of the tree updates depending on the
thread contention and the number of servers. The performance gain due
to this (on 16 threads) was:
16 servers: 415 krps -> 440 krps (6%, contention on lbprm)
4 servers: 554 krps -> 714 krps (+29%, little contention)
One point worth thinking about is that it's not logic to update the
tree 2-3 times per request while it's only read once. half to 2/3 of
these updates are not needed. An experiment consisting in subscribing
the server to a list and letting the readers reinsert them on the fly
showed further degradation instead of an improvement.
A better approach would probably consist in avoinding writes to shared
cache lines by having the leastconn nodes distinct from the servers,
with one node per value, and having them hold an mt-list with all the
servers having that number of connections. The connection count tree
would then be read-mostly instead of facing heavy writes, and most
write operations would be performed on 1-3 list heads which are way
cheaper to migrate than a tree node, and do not require updating the
last two updated neighbors' cache lines.
2021-02-17 10:26:55 -05:00
|
|
|
unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->nbpend);
|
|
|
|
|
unsigned int new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / s->cur_eweight : 0;
|
|
|
|
|
|
|
|
|
|
/* some calls will be made for no change (e.g connect_server() after
|
|
|
|
|
* assign_server(). Let's check that first.
|
|
|
|
|
*/
|
|
|
|
|
if (s->lb_node.node.leaf_p && s->lb_node.key == new_key)
|
|
|
|
|
return;
|
|
|
|
|
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
|
2019-06-19 04:50:38 -04:00
|
|
|
if (s->lb_tree) {
|
OPTIM: lb-leastconn: do not unlink the server if it did not change
Due to the two-phase server reservation, there are 3 calls to
fwlc_srv_reposition() per request, one during assign_server() to reserve
the slot, one in connect_server() to commit it, and one in process_stream()
to release it. However only one of the first two will change the key, so
it's needlessly costly to take the lock, remove a server and insert it
again at the same place when we can already figure we ought not to even
have taken the lock.
Furthermore, even when the server needs to move, there can be quite some
contention on the lbprm lock forcing the thread to wait. During this time
the served and nbpend server values might have changed, just like the
lb_node.key itself. Thus we measure the values again under the lock
before updating the tree. Measurements have shown that under contention
with 16 servers and 16 threads, 50% of the updates can be avoided there.
This patch makes the function compute the new key and compare it to
the current one before deciding to move the entry (and does it again
under the lock forthe second test).
This removes between 40 and 50% of the tree updates depending on the
thread contention and the number of servers. The performance gain due
to this (on 16 threads) was:
16 servers: 415 krps -> 440 krps (6%, contention on lbprm)
4 servers: 554 krps -> 714 krps (+29%, little contention)
One point worth thinking about is that it's not logic to update the
tree 2-3 times per request while it's only read once. half to 2/3 of
these updates are not needed. An experiment consisting in subscribing
the server to a list and letting the readers reinsert them on the fly
showed further degradation instead of an improvement.
A better approach would probably consist in avoinding writes to shared
cache lines by having the leastconn nodes distinct from the servers,
with one node per value, and having them hold an mt-list with all the
servers having that number of connections. The connection count tree
would then be read-mostly instead of facing heavy writes, and most
write operations would be performed on 1-3 list heads which are way
cheaper to migrate than a tree node, and do not require updating the
last two updated neighbors' cache lines.
2021-02-17 10:26:55 -05:00
|
|
|
/* we might have been waiting for a while on the lock above
|
|
|
|
|
* so it's worth testing again because other threads are very
|
|
|
|
|
* likely to have released a connection or taken one leading
|
|
|
|
|
* to our target value (50% of the case in measurements).
|
|
|
|
|
*/
|
|
|
|
|
inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->nbpend);
|
|
|
|
|
new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / s->cur_eweight : 0;
|
|
|
|
|
if (!s->lb_node.node.leaf_p || s->lb_node.key != new_key) {
|
|
|
|
|
eb32_delete(&s->lb_node);
|
|
|
|
|
s->lb_node.key = new_key;
|
|
|
|
|
eb32_insert(s->lb_tree, &s->lb_node);
|
|
|
|
|
}
|
2019-06-19 04:50:38 -04:00
|
|
|
}
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function updates the server trees according to server <srv>'s new
|
|
|
|
|
* state. It should be called when server <srv>'s status changes to down.
|
|
|
|
|
* It is not important whether the server was already down or not. It is not
|
|
|
|
|
* important either that the new state is completely down (the caller may not
|
|
|
|
|
* know all the variables of a server's state).
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
|
|
|
|
* The server's lock must be held. The lbprm's lock will be used.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
|
|
|
|
static void fwlc_set_server_status_down(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
struct proxy *p = srv->proxy;
|
|
|
|
|
|
2014-05-13 13:27:31 -04:00
|
|
|
if (!srv_lb_status_changed(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
return;
|
|
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (srv_willbe_usable(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
goto out_update_state;
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2009-10-01 05:19:37 -04:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (!srv_currently_usable(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
/* server was already down */
|
|
|
|
|
goto out_update_backend;
|
|
|
|
|
|
2014-05-13 09:54:22 -04:00
|
|
|
if (srv->flags & SRV_F_BACKUP) {
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wbck -= srv->cur_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
p->srv_bck--;
|
|
|
|
|
|
|
|
|
|
if (srv == p->lbprm.fbck) {
|
|
|
|
|
/* we lost the first backup server in a single-backup
|
|
|
|
|
* configuration, we must search another one.
|
|
|
|
|
*/
|
|
|
|
|
struct server *srv2 = p->lbprm.fbck;
|
|
|
|
|
do {
|
|
|
|
|
srv2 = srv2->next;
|
|
|
|
|
} while (srv2 &&
|
2014-05-13 09:54:22 -04:00
|
|
|
!((srv2->flags & SRV_F_BACKUP) &&
|
2017-08-31 08:41:55 -04:00
|
|
|
srv_willbe_usable(srv2)));
|
2009-10-01 05:19:37 -04:00
|
|
|
p->lbprm.fbck = srv2;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wact -= srv->cur_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
p->srv_act--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fwlc_dequeue_srv(srv);
|
|
|
|
|
fwlc_remove_from_tree(srv);
|
|
|
|
|
|
|
|
|
|
out_update_backend:
|
|
|
|
|
/* check/update tot_used, tot_weight */
|
|
|
|
|
update_backend_weight(p);
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2009-10-01 05:19:37 -04:00
|
|
|
out_update_state:
|
2014-05-13 13:27:31 -04:00
|
|
|
srv_lb_commit_status(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function updates the server trees according to server <srv>'s new
|
|
|
|
|
* state. It should be called when server <srv>'s status changes to up.
|
|
|
|
|
* It is not important whether the server was already down or not. It is not
|
|
|
|
|
* important either that the new state is completely UP (the caller may not
|
|
|
|
|
* know all the variables of a server's state). This function will not change
|
|
|
|
|
* the weight of a server which was already up.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
|
|
|
|
* The server's lock must be held. The lbprm's lock will be used.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
|
|
|
|
static void fwlc_set_server_status_up(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
struct proxy *p = srv->proxy;
|
|
|
|
|
|
2014-05-13 13:27:31 -04:00
|
|
|
if (!srv_lb_status_changed(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
return;
|
|
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (!srv_willbe_usable(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
goto out_update_state;
|
|
|
|
|
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if (srv_currently_usable(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
/* server was already up */
|
|
|
|
|
goto out_update_backend;
|
|
|
|
|
|
2014-05-13 09:54:22 -04:00
|
|
|
if (srv->flags & SRV_F_BACKUP) {
|
2009-10-01 05:19:37 -04:00
|
|
|
srv->lb_tree = &p->lbprm.fwlc.bck;
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wbck += srv->next_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
p->srv_bck++;
|
|
|
|
|
|
|
|
|
|
if (!(p->options & PR_O_USE_ALL_BK)) {
|
|
|
|
|
if (!p->lbprm.fbck) {
|
|
|
|
|
/* there was no backup server anymore */
|
|
|
|
|
p->lbprm.fbck = srv;
|
|
|
|
|
} else {
|
|
|
|
|
/* we may have restored a backup server prior to fbck,
|
|
|
|
|
* in which case it should replace it.
|
|
|
|
|
*/
|
|
|
|
|
struct server *srv2 = srv;
|
|
|
|
|
do {
|
|
|
|
|
srv2 = srv2->next;
|
|
|
|
|
} while (srv2 && (srv2 != p->lbprm.fbck));
|
|
|
|
|
if (srv2)
|
|
|
|
|
p->lbprm.fbck = srv;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
srv->lb_tree = &p->lbprm.fwlc.act;
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wact += srv->next_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
p->srv_act++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* note that eweight cannot be 0 here */
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
fwlc_queue_srv(srv, srv->next_eweight);
|
2009-10-01 05:19:37 -04:00
|
|
|
|
|
|
|
|
out_update_backend:
|
|
|
|
|
/* check/update tot_used, tot_weight */
|
|
|
|
|
update_backend_weight(p);
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2009-10-01 05:19:37 -04:00
|
|
|
out_update_state:
|
2014-05-13 13:27:31 -04:00
|
|
|
srv_lb_commit_status(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function must be called after an update to server <srv>'s effective
|
|
|
|
|
* weight. It may be called after a state change too.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
|
|
|
|
* The server's lock must be held. The lbprm's lock will be used.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
|
|
|
|
static void fwlc_update_server_weight(struct server *srv)
|
|
|
|
|
{
|
|
|
|
|
int old_state, new_state;
|
|
|
|
|
struct proxy *p = srv->proxy;
|
|
|
|
|
|
2014-05-13 13:27:31 -04:00
|
|
|
if (!srv_lb_status_changed(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* If changing the server's weight changes its state, we simply apply
|
|
|
|
|
* the procedures we already have for status change. If the state
|
|
|
|
|
* remains down, the server is not in any tree, so it's as easy as
|
|
|
|
|
* updating its values. If the state remains up with different weights,
|
|
|
|
|
* there are some computations to perform to find a new place and
|
|
|
|
|
* possibly a new tree for this server.
|
|
|
|
|
*/
|
|
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
old_state = srv_currently_usable(srv);
|
|
|
|
|
new_state = srv_willbe_usable(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
|
|
|
|
|
if (!old_state && !new_state) {
|
2014-05-13 13:27:31 -04:00
|
|
|
srv_lb_commit_status(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
else if (!old_state && new_state) {
|
|
|
|
|
fwlc_set_server_status_up(srv);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
else if (old_state && !new_state) {
|
|
|
|
|
fwlc_set_server_status_down(srv);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2009-10-01 05:19:37 -04:00
|
|
|
if (srv->lb_tree)
|
|
|
|
|
fwlc_dequeue_srv(srv);
|
|
|
|
|
|
2014-05-13 09:54:22 -04:00
|
|
|
if (srv->flags & SRV_F_BACKUP) {
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
srv->lb_tree = &p->lbprm.fwlc.bck;
|
|
|
|
|
} else {
|
2017-08-31 08:41:55 -04:00
|
|
|
p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
|
2009-10-01 05:19:37 -04:00
|
|
|
srv->lb_tree = &p->lbprm.fwlc.act;
|
|
|
|
|
}
|
|
|
|
|
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
fwlc_queue_srv(srv, srv->next_eweight);
|
2009-10-01 05:19:37 -04:00
|
|
|
|
|
|
|
|
update_backend_weight(p);
|
2020-10-17 12:48:47 -04:00
|
|
|
HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2018-08-21 13:44:53 -04:00
|
|
|
|
2014-05-13 13:27:31 -04:00
|
|
|
srv_lb_commit_status(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* This function is responsible for building the trees in case of fast
|
|
|
|
|
* weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
|
|
|
|
|
* uweight ratio. Both active and backup groups are initialized.
|
|
|
|
|
*/
|
|
|
|
|
void fwlc_init_server_tree(struct proxy *p)
|
|
|
|
|
{
|
|
|
|
|
struct server *srv;
|
|
|
|
|
struct eb_root init_head = EB_ROOT;
|
|
|
|
|
|
|
|
|
|
p->lbprm.set_server_status_up = fwlc_set_server_status_up;
|
|
|
|
|
p->lbprm.set_server_status_down = fwlc_set_server_status_down;
|
|
|
|
|
p->lbprm.update_server_eweight = fwlc_update_server_weight;
|
|
|
|
|
p->lbprm.server_take_conn = fwlc_srv_reposition;
|
|
|
|
|
p->lbprm.server_drop_conn = fwlc_srv_reposition;
|
|
|
|
|
|
|
|
|
|
p->lbprm.wdiv = BE_WEIGHT_SCALE;
|
|
|
|
|
for (srv = p->srv; srv; srv = srv->next) {
|
2017-08-31 08:41:55 -04:00
|
|
|
srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
|
2014-05-13 13:27:31 -04:00
|
|
|
srv_lb_commit_status(srv);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
recount_servers(p);
|
|
|
|
|
update_backend_weight(p);
|
|
|
|
|
|
|
|
|
|
p->lbprm.fwlc.act = init_head;
|
|
|
|
|
p->lbprm.fwlc.bck = init_head;
|
|
|
|
|
|
|
|
|
|
/* queue active and backup servers in two distinct groups */
|
|
|
|
|
for (srv = p->srv; srv; srv = srv->next) {
|
2017-08-31 08:41:55 -04:00
|
|
|
if (!srv_currently_usable(srv))
|
2009-10-01 05:19:37 -04:00
|
|
|
continue;
|
2014-05-13 09:54:22 -04:00
|
|
|
srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
|
BUG/MEDIUM: lb-leastconn: Reposition a server using the right eweight
Depending on the context, the current eweight or the next one must be used
to reposition a server in the tree. When the server state is updated, for
instance its weight, the next eweight must be used because it is not yet
committed. However, when the server is used, on normal conditions, the
current eweight must be used.
In fact, it is only a bug on the 1.8. On newer versions, the changes on a
server are performed synchronously. But it is safer to rely on the right
eweight value to avoid any futur bugs.
On the 1.8, it is important to do so, because the server state is updated
and committed inside the rendez-vous point. Thus, the next server state may
be unsync with the current state for a short time, waiting all threads join
the rendez-vous point. It is especially a problem if the next eweight is set
to 0. Because otherwise, it must not be used to reposition the server in the
tree, leading to a divide by 0.
This patch must be backported as far as 1.8.
2020-12-11 09:36:01 -05:00
|
|
|
fwlc_queue_srv(srv, srv->next_eweight);
|
2009-10-01 05:19:37 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
|
|
|
|
|
* return NULL. Saturated servers are skipped.
|
2018-08-21 13:44:53 -04:00
|
|
|
*
|
2021-06-01 10:58:31 -04:00
|
|
|
* The lbprm's lock will be used in R/O mode. The server's lock is not used.
|
2009-10-01 05:19:37 -04:00
|
|
|
*/
|
|
|
|
|
struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
|
|
|
|
|
{
|
|
|
|
|
struct server *srv, *avoided;
|
|
|
|
|
struct eb32_node *node;
|
|
|
|
|
|
|
|
|
|
srv = avoided = NULL;
|
|
|
|
|
|
2020-10-17 13:32:09 -04:00
|
|
|
HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2009-10-01 05:19:37 -04:00
|
|
|
if (p->srv_act)
|
|
|
|
|
node = eb32_first(&p->lbprm.fwlc.act);
|
2017-06-09 08:17:53 -04:00
|
|
|
else if (p->lbprm.fbck) {
|
|
|
|
|
srv = p->lbprm.fbck;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2009-10-01 05:19:37 -04:00
|
|
|
else if (p->srv_bck)
|
|
|
|
|
node = eb32_first(&p->lbprm.fwlc.bck);
|
2017-06-09 08:17:53 -04:00
|
|
|
else {
|
|
|
|
|
srv = NULL;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
2009-10-01 05:19:37 -04:00
|
|
|
|
|
|
|
|
while (node) {
|
|
|
|
|
/* OK, we have a server. However, it may be saturated, in which
|
|
|
|
|
* case we don't want to reconsider it for now, so we'll simply
|
|
|
|
|
* skip it. Same if it's the server we try to avoid, in which
|
|
|
|
|
* case we simply remember it for later use if needed.
|
|
|
|
|
*/
|
|
|
|
|
struct server *s;
|
|
|
|
|
|
|
|
|
|
s = eb32_entry(node, struct server, lb_node);
|
2020-10-22 11:19:07 -04:00
|
|
|
if (!s->maxconn || s->served + s->nbpend < srv_dynamic_maxconn(s) + s->maxqueue) {
|
2009-10-01 05:19:37 -04:00
|
|
|
if (s != srvtoavoid) {
|
|
|
|
|
srv = s;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
avoided = s;
|
|
|
|
|
}
|
|
|
|
|
node = eb32_next(node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!srv)
|
|
|
|
|
srv = avoided;
|
2017-06-09 08:17:53 -04:00
|
|
|
out:
|
2020-10-17 13:32:09 -04:00
|
|
|
HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
|
2009-10-01 05:19:37 -04:00
|
|
|
return srv;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* c-indent-level: 8
|
|
|
|
|
* c-basic-offset: 8
|
|
|
|
|
* End:
|
|
|
|
|
*/
|