2006-06-25 20:48:02 -04:00
|
|
|
/*
|
|
|
|
|
* Queue management functions.
|
|
|
|
|
*
|
2009-10-04 17:12:44 -04:00
|
|
|
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
|
2006-06-25 20:48:02 -04:00
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2018-07-25 09:21:00 -04:00
|
|
|
/* Short explanation on the locking, which is far from being trivial : a
|
|
|
|
|
* pendconn is a list element which necessarily is associated with an existing
|
|
|
|
|
* stream. It has pendconn->strm always valid. A pendconn may only be in one of
|
|
|
|
|
* these three states :
|
|
|
|
|
* - unlinked : in this case it is an empty list head ;
|
|
|
|
|
* - linked into the server's queue ;
|
|
|
|
|
* - linked into the proxy's queue.
|
|
|
|
|
*
|
|
|
|
|
* A stream does not necessarily have such a pendconn. Thus the pendconn is
|
|
|
|
|
* designated by the stream->pend_pos pointer. This results in some properties :
|
|
|
|
|
* - pendconn->strm->pend_pos is never NULL for any valid pendconn
|
2018-05-11 12:52:31 -04:00
|
|
|
* - if p->node.node.leaf_p is NULL, the element is unlinked,
|
2018-07-25 09:21:00 -04:00
|
|
|
* otherwise it necessarily belongs to one of the other lists ; this may
|
|
|
|
|
* not be atomically checked under threads though ;
|
|
|
|
|
* - pendconn->px is never NULL if pendconn->list is not empty
|
2018-07-26 01:38:54 -04:00
|
|
|
* - pendconn->srv is never NULL if pendconn->list is in the server's queue,
|
2018-07-25 09:21:00 -04:00
|
|
|
* and is always NULL if pendconn->list is in the backend's queue or empty.
|
2018-07-26 01:38:54 -04:00
|
|
|
* - pendconn->target is NULL while the element is queued, and points to the
|
|
|
|
|
* assigned server when the pendconn is picked.
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
|
|
|
|
* Threads complicate the design a little bit but rules remain simple :
|
|
|
|
|
* - the server's queue lock must be held at least when manipulating the
|
|
|
|
|
* server's queue, which is when adding a pendconn to the queue and when
|
|
|
|
|
* removing a pendconn from the queue. It protects the queue's integrity.
|
|
|
|
|
*
|
|
|
|
|
* - the proxy's queue lock must be held at least when manipulating the
|
|
|
|
|
* proxy's queue, which is when adding a pendconn to the queue and when
|
|
|
|
|
* removing a pendconn from the queue. It protects the queue's integrity.
|
|
|
|
|
*
|
2018-07-26 02:23:24 -04:00
|
|
|
* - both locks are compatible and may be held at the same time.
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
|
|
|
|
* - a pendconn_add() is only performed by the stream which will own the
|
|
|
|
|
* pendconn ; the pendconn is allocated at this moment and returned ; it is
|
|
|
|
|
* added to either the server or the proxy's queue while holding this
|
2020-06-04 16:50:02 -04:00
|
|
|
s * queue's lock.
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
|
|
|
|
* - the pendconn is then met by a thread walking over the proxy or server's
|
|
|
|
|
* queue with the respective lock held. This lock is exclusive and the
|
|
|
|
|
* pendconn can only appear in one queue so by definition a single thread
|
|
|
|
|
* may find this pendconn at a time.
|
|
|
|
|
*
|
|
|
|
|
* - the pendconn is unlinked either by its own stream upon success/abort/
|
|
|
|
|
* free, or by another one offering it its server slot. This is achieved by
|
|
|
|
|
* pendconn_process_next_strm() under either the server or proxy's lock,
|
2024-12-17 09:40:41 -05:00
|
|
|
* pendconn_redistribute() under the server's lock, or pendconn_unlink()
|
|
|
|
|
* under either the proxy's or the server's lock depending
|
|
|
|
|
* on the queue the pendconn is attached to.
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
|
|
|
|
* - no single operation except the pendconn initialisation prior to the
|
2018-07-26 02:23:24 -04:00
|
|
|
* insertion are performed without eithre a queue lock held or the element
|
|
|
|
|
* being unlinked and visible exclusively to its stream.
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
2024-12-17 09:40:41 -05:00
|
|
|
* - pendconn_process_next_strm() assign ->target so that the stream knows
|
|
|
|
|
* what server to work with (via pendconn_dequeue() which sets it on
|
|
|
|
|
* strm->target).
|
2018-07-25 09:21:00 -04:00
|
|
|
*
|
|
|
|
|
* - a pendconn doesn't switch between queues, it stays where it is.
|
|
|
|
|
*/
|
|
|
|
|
|
2020-06-04 17:46:14 -04:00
|
|
|
#include <import/eb32tree.h>
|
2020-05-27 06:58:42 -04:00
|
|
|
#include <haproxy/api.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/backend.h>
|
2020-06-04 05:40:28 -04:00
|
|
|
#include <haproxy/http_rules.h>
|
2020-06-02 03:38:52 -04:00
|
|
|
#include <haproxy/pool.h>
|
2020-06-04 16:59:39 -04:00
|
|
|
#include <haproxy/queue.h>
|
2020-06-04 09:33:47 -04:00
|
|
|
#include <haproxy/sample.h>
|
2020-06-04 17:20:13 -04:00
|
|
|
#include <haproxy/server-t.h>
|
2020-06-04 17:46:14 -04:00
|
|
|
#include <haproxy/stream.h>
|
2020-06-04 11:25:40 -04:00
|
|
|
#include <haproxy/task.h>
|
2020-06-04 11:42:48 -04:00
|
|
|
#include <haproxy/tcp_rules.h>
|
2020-05-28 09:29:19 -04:00
|
|
|
#include <haproxy/thread.h>
|
2020-06-09 03:07:15 -04:00
|
|
|
#include <haproxy/time.h>
|
2021-05-08 07:59:05 -04:00
|
|
|
#include <haproxy/tools.h>
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
#define NOW_OFFSET_BOUNDARY() ((now_ms - (TIMER_LOOK_BACK >> 12)) & 0xfffff)
|
|
|
|
|
#define KEY_CLASS(key) ((u32)key & 0xfff00000)
|
|
|
|
|
#define KEY_OFFSET(key) ((u32)key & 0x000fffff)
|
|
|
|
|
#define KEY_CLASS_OFFSET_BOUNDARY(key) (KEY_CLASS(key) | NOW_OFFSET_BOUNDARY())
|
|
|
|
|
#define MAKE_KEY(class, offset) (((u32)(class + 0x7ff) << 20) | ((u32)(now_ms + offset) & 0xfffff))
|
|
|
|
|
|
2018-11-26 05:58:30 -05:00
|
|
|
DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn));
|
2006-06-25 20:48:02 -04:00
|
|
|
|
|
|
|
|
/* returns the effective dynamic maxconn for a server, considering the minconn
|
2006-12-28 18:10:33 -05:00
|
|
|
* and the proxy's usage relative to its dynamic connections limit. It is
|
2007-11-30 11:42:05 -05:00
|
|
|
* expected that 0 < s->minconn <= s->maxconn when this is called. If the
|
|
|
|
|
* server is currently warming up, the slowstart is also applied to the
|
|
|
|
|
* resulting value, which can be lower than minconn in this case, but never
|
|
|
|
|
* less than 1.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2006-10-15 09:17:57 -04:00
|
|
|
unsigned int srv_dynamic_maxconn(const struct server *s)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
2007-11-30 11:42:05 -05:00
|
|
|
unsigned int max;
|
|
|
|
|
|
2006-12-28 18:10:33 -05:00
|
|
|
if (s->proxy->beconn >= s->proxy->fullconn)
|
|
|
|
|
/* no fullconn or proxy is full */
|
2007-11-30 11:42:05 -05:00
|
|
|
max = s->maxconn;
|
|
|
|
|
else if (s->minconn == s->maxconn)
|
2006-12-28 18:10:33 -05:00
|
|
|
/* static limit */
|
2007-11-30 11:42:05 -05:00
|
|
|
max = s->maxconn;
|
|
|
|
|
else max = MAX(s->minconn,
|
|
|
|
|
s->proxy->beconn * s->maxconn / s->proxy->fullconn);
|
2006-12-28 18:10:33 -05:00
|
|
|
|
2017-08-31 08:41:55 -04:00
|
|
|
if ((s->cur_state == SRV_ST_STARTING) &&
|
2024-04-30 06:04:57 -04:00
|
|
|
ns_to_sec(now_ns) < s->counters.last_change + s->slowstart &&
|
|
|
|
|
ns_to_sec(now_ns) >= s->counters.last_change) {
|
2007-11-30 11:42:05 -05:00
|
|
|
unsigned int ratio;
|
2024-04-30 06:04:57 -04:00
|
|
|
ratio = 100 * (ns_to_sec(now_ns) - s->counters.last_change) / s->slowstart;
|
2008-09-14 11:43:27 -04:00
|
|
|
max = MAX(1, max * ratio / 100);
|
2007-11-30 11:42:05 -05:00
|
|
|
}
|
|
|
|
|
return max;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2020-10-21 05:20:07 -04:00
|
|
|
/* Remove the pendconn from the server's queue. At this stage, the connection
|
2020-10-21 06:01:28 -04:00
|
|
|
* is not really dequeued. It will be done during the process_stream. It is
|
|
|
|
|
* up to the caller to atomically decrement the pending counts.
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
*
|
2020-10-21 05:20:07 -04:00
|
|
|
* The caller must own the lock on the server queue. The pendconn must still be
|
|
|
|
|
* queued (p->node.leaf_p != NULL) and must be in a server (p->srv != NULL).
|
2017-06-09 08:26:38 -04:00
|
|
|
*/
|
2020-10-21 05:20:07 -04:00
|
|
|
static void __pendconn_unlink_srv(struct pendconn *p)
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
{
|
2021-06-23 10:43:45 -04:00
|
|
|
p->strm->logs.srv_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
|
2020-10-21 05:20:07 -04:00
|
|
|
eb32_delete(&p->node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove the pendconn from the proxy's queue. At this stage, the connection
|
2020-10-21 06:01:28 -04:00
|
|
|
* is not really dequeued. It will be done during the process_stream. It is
|
|
|
|
|
* up to the caller to atomically decrement the pending counts.
|
2020-10-21 05:20:07 -04:00
|
|
|
*
|
|
|
|
|
* The caller must own the lock on the proxy queue. The pendconn must still be
|
|
|
|
|
* queued (p->node.leaf_p != NULL) and must be in the proxy (p->srv == NULL).
|
|
|
|
|
*/
|
|
|
|
|
static void __pendconn_unlink_prx(struct pendconn *p)
|
|
|
|
|
{
|
2021-06-23 10:43:45 -04:00
|
|
|
p->strm->logs.prx_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
|
2018-05-11 12:52:31 -04:00
|
|
|
eb32_delete(&p->node);
|
2017-06-09 08:26:38 -04:00
|
|
|
}
|
|
|
|
|
|
2018-07-26 02:03:14 -04:00
|
|
|
/* Locks the queue the pendconn element belongs to. This relies on both p->px
|
|
|
|
|
* and p->srv to be properly initialized (which is always the case once the
|
|
|
|
|
* element has been added).
|
|
|
|
|
*/
|
|
|
|
|
static inline void pendconn_queue_lock(struct pendconn *p)
|
|
|
|
|
{
|
2021-06-23 10:43:45 -04:00
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &p->queue->lock);
|
2018-07-26 02:03:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Unlocks the queue the pendconn element belongs to. This relies on both p->px
|
|
|
|
|
* and p->srv to be properly initialized (which is always the case once the
|
|
|
|
|
* element has been added).
|
|
|
|
|
*/
|
|
|
|
|
static inline void pendconn_queue_unlock(struct pendconn *p)
|
|
|
|
|
{
|
2021-06-23 10:43:45 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &p->queue->lock);
|
2018-07-26 02:03:14 -04:00
|
|
|
}
|
|
|
|
|
|
2018-07-25 02:04:20 -04:00
|
|
|
/* Removes the pendconn from the server/proxy queue. At this stage, the
|
|
|
|
|
* connection is not really dequeued. It will be done during process_stream().
|
2019-11-14 08:58:39 -05:00
|
|
|
* This function takes all the required locks for the operation. The pendconn
|
|
|
|
|
* must be valid, though it doesn't matter if it was already unlinked. Prefer
|
2021-08-31 11:21:39 -04:00
|
|
|
* pendconn_cond_unlink() to first check <p>. It also forces a serialization
|
|
|
|
|
* on p->del_lock to make sure another thread currently waking it up finishes
|
|
|
|
|
* first.
|
2018-07-25 02:04:20 -04:00
|
|
|
*/
|
|
|
|
|
void pendconn_unlink(struct pendconn *p)
|
|
|
|
|
{
|
2021-06-23 10:43:45 -04:00
|
|
|
struct queue *q = p->queue;
|
|
|
|
|
struct proxy *px = q->px;
|
|
|
|
|
struct server *sv = q->sv;
|
2021-06-23 10:54:16 -04:00
|
|
|
uint oldidx;
|
|
|
|
|
int done = 0;
|
2020-10-21 06:01:28 -04:00
|
|
|
|
2021-06-23 10:54:16 -04:00
|
|
|
oldidx = _HA_ATOMIC_LOAD(&p->queue->idx);
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
|
2021-08-31 11:21:39 -04:00
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
|
|
|
|
|
|
2021-06-23 10:54:16 -04:00
|
|
|
if (p->node.node.leaf_p) {
|
|
|
|
|
eb32_delete(&p->node);
|
|
|
|
|
done = 1;
|
2020-10-21 05:20:07 -04:00
|
|
|
}
|
2021-08-31 11:21:39 -04:00
|
|
|
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
|
2021-06-23 10:54:16 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
|
|
|
|
|
|
|
|
|
|
if (done) {
|
|
|
|
|
oldidx -= p->queue_idx;
|
|
|
|
|
if (sv)
|
|
|
|
|
p->strm->logs.srv_queue_pos += oldidx;
|
|
|
|
|
else
|
|
|
|
|
p->strm->logs.prx_queue_pos += oldidx;
|
|
|
|
|
|
|
|
|
|
_HA_ATOMIC_DEC(&q->length);
|
|
|
|
|
_HA_ATOMIC_DEC(&px->totpend);
|
2020-10-21 05:20:07 -04:00
|
|
|
}
|
2018-07-25 02:04:20 -04:00
|
|
|
}
|
|
|
|
|
|
2021-06-24 01:20:26 -04:00
|
|
|
/* Retrieve the first pendconn from tree <pendconns>. Classes are always
|
|
|
|
|
* considered first, then the time offset. The time does wrap, so the
|
|
|
|
|
* lookup is performed twice, one to retrieve the first class and a second
|
|
|
|
|
* time to retrieve the earliest time in this class.
|
2018-05-11 12:52:31 -04:00
|
|
|
*/
|
2021-06-24 01:20:26 -04:00
|
|
|
static struct pendconn *pendconn_first(struct eb_root *pendconns)
|
2018-05-11 12:52:31 -04:00
|
|
|
{
|
|
|
|
|
struct eb32_node *node, *node2 = NULL;
|
|
|
|
|
u32 key;
|
|
|
|
|
|
2021-06-24 01:20:26 -04:00
|
|
|
node = eb32_first(pendconns);
|
|
|
|
|
if (!node)
|
2018-05-11 12:52:31 -04:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
key = KEY_CLASS_OFFSET_BOUNDARY(node->key);
|
2021-06-24 01:20:26 -04:00
|
|
|
node2 = eb32_lookup_ge(pendconns, key);
|
2018-05-11 12:52:31 -04:00
|
|
|
|
|
|
|
|
if (!node2 ||
|
|
|
|
|
KEY_CLASS(node2->key) != KEY_CLASS(node->key)) {
|
|
|
|
|
/* no other key in the tree, or in this class */
|
|
|
|
|
return eb32_entry(node, struct pendconn, node);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* found a better key */
|
|
|
|
|
return eb32_entry(node2, struct pendconn, node);
|
|
|
|
|
}
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
/* Process the next pending connection from either a server or a proxy, and
|
2018-03-19 10:22:09 -04:00
|
|
|
* returns a strictly positive value on success (see below). If no pending
|
|
|
|
|
* connection is found, 0 is returned. Note that neither <srv> nor <px> may be
|
|
|
|
|
* NULL. Priority is given to the oldest request in the queue if both <srv> and
|
|
|
|
|
* <px> have pending requests. This ensures that no request will be left
|
|
|
|
|
* unserved. The <px> queue is not considered if the server (or a tracked
|
|
|
|
|
* server) is not RUNNING, is disabled, or has a null weight (server going
|
|
|
|
|
* down). The <srv> queue is still considered in this case, because if some
|
|
|
|
|
* connections remain there, it means that some requests have been forced there
|
|
|
|
|
* after it was seen down (eg: due to option persist). The stream is
|
|
|
|
|
* immediately marked as "assigned", and both its <srv> and <srv_conn> are set
|
|
|
|
|
* to <srv>.
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
*
|
2021-06-18 13:45:17 -04:00
|
|
|
* The proxy's queue will be consulted only if px_ok is non-zero.
|
|
|
|
|
*
|
2021-08-31 11:21:39 -04:00
|
|
|
* This function must only be called if the server queue is locked _AND_ the
|
|
|
|
|
* proxy queue is not. Today it is only called by process_srv_queue.
|
2021-06-18 13:45:17 -04:00
|
|
|
* When a pending connection is dequeued, this function returns 1 if a pendconn
|
|
|
|
|
* is dequeued, otherwise 0.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
2021-06-18 13:45:17 -04:00
|
|
|
static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int px_ok)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
struct pendconn *p = NULL;
|
2018-05-11 12:52:31 -04:00
|
|
|
struct pendconn *pp = NULL;
|
2018-05-11 12:52:31 -04:00
|
|
|
u32 pkey, ppkey;
|
2024-12-17 08:30:46 -05:00
|
|
|
int served;
|
|
|
|
|
int maxconn;
|
|
|
|
|
int got_it = 0;
|
2010-08-06 04:08:23 -04:00
|
|
|
|
2021-06-24 01:20:26 -04:00
|
|
|
p = NULL;
|
2021-06-24 01:21:59 -04:00
|
|
|
if (srv->queue.length)
|
2021-06-24 01:20:26 -04:00
|
|
|
p = pendconn_first(&srv->queue.head);
|
|
|
|
|
|
|
|
|
|
pp = NULL;
|
2021-06-24 02:04:24 -04:00
|
|
|
if (px_ok && px->queue.length) {
|
|
|
|
|
/* the lock only remains held as long as the pp is
|
|
|
|
|
* in the proxy's queue.
|
|
|
|
|
*/
|
2021-06-24 10:00:18 -04:00
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &px->queue.lock);
|
2021-06-24 01:20:26 -04:00
|
|
|
pp = pendconn_first(&px->queue.head);
|
2021-06-24 02:04:24 -04:00
|
|
|
if (!pp)
|
2021-06-24 10:00:18 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
|
2021-06-24 02:04:24 -04:00
|
|
|
}
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
|
2021-06-24 01:22:03 -04:00
|
|
|
if (!p && !pp)
|
2021-06-24 01:27:01 -04:00
|
|
|
return 0;
|
2024-12-17 08:30:46 -05:00
|
|
|
|
|
|
|
|
served = _HA_ATOMIC_LOAD(&srv->served);
|
|
|
|
|
maxconn = srv_dynamic_maxconn(srv);
|
|
|
|
|
|
|
|
|
|
while (served < maxconn && !got_it)
|
|
|
|
|
got_it = _HA_ATOMIC_CAS(&srv->served, &served, served + 1);
|
|
|
|
|
|
|
|
|
|
/* No more slot available, give up */
|
|
|
|
|
if (!got_it) {
|
|
|
|
|
if (pp)
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now we know we'll have something available.
|
|
|
|
|
* Let's try to allocate a slot on the server.
|
|
|
|
|
*/
|
|
|
|
|
if (!pp)
|
2021-02-11 05:13:33 -05:00
|
|
|
goto use_p; /* p != NULL */
|
|
|
|
|
else if (!p)
|
|
|
|
|
goto use_pp; /* pp != NULL */
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
|
2021-02-11 05:13:33 -05:00
|
|
|
/* p != NULL && pp != NULL*/
|
2018-05-11 12:52:31 -04:00
|
|
|
|
|
|
|
|
if (KEY_CLASS(p->node.key) < KEY_CLASS(pp->node.key))
|
|
|
|
|
goto use_p;
|
|
|
|
|
|
|
|
|
|
if (KEY_CLASS(pp->node.key) < KEY_CLASS(p->node.key))
|
|
|
|
|
goto use_pp;
|
|
|
|
|
|
|
|
|
|
pkey = KEY_OFFSET(p->node.key);
|
|
|
|
|
ppkey = KEY_OFFSET(pp->node.key);
|
|
|
|
|
|
|
|
|
|
if (pkey < NOW_OFFSET_BOUNDARY())
|
|
|
|
|
pkey += 0x100000; // key in the future
|
|
|
|
|
|
|
|
|
|
if (ppkey < NOW_OFFSET_BOUNDARY())
|
|
|
|
|
ppkey += 0x100000; // key in the future
|
|
|
|
|
|
|
|
|
|
if (pkey <= ppkey)
|
|
|
|
|
goto use_p;
|
|
|
|
|
|
|
|
|
|
use_pp:
|
2021-08-31 11:21:39 -04:00
|
|
|
/* we'd like to release the proxy lock ASAP to let other threads
|
|
|
|
|
* work with other servers. But for this we must first hold the
|
|
|
|
|
* pendconn alive to prevent a removal from its owning stream.
|
|
|
|
|
*/
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &pp->del_lock);
|
|
|
|
|
|
|
|
|
|
/* now the element won't go, we can release the proxy */
|
2020-10-21 05:20:07 -04:00
|
|
|
__pendconn_unlink_prx(pp);
|
2021-08-31 11:21:39 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
|
|
|
|
|
|
|
|
|
|
pp->strm_flags |= SF_ASSIGNED;
|
|
|
|
|
pp->target = srv;
|
|
|
|
|
stream_add_srv_conn(pp->strm, srv);
|
|
|
|
|
|
|
|
|
|
/* we must wake the task up before releasing the lock as it's the only
|
|
|
|
|
* way to make sure the task still exists. The pendconn cannot vanish
|
|
|
|
|
* under us since the task will need to take the lock anyway and to wait
|
|
|
|
|
* if it wakes up on a different thread.
|
|
|
|
|
*/
|
2022-06-16 10:10:05 -04:00
|
|
|
task_wakeup(pp->strm->task, TASK_WOKEN_RES);
|
2021-08-31 11:21:39 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &pp->del_lock);
|
|
|
|
|
|
2021-06-18 03:22:21 -04:00
|
|
|
_HA_ATOMIC_DEC(&px->queue.length);
|
2021-06-18 04:51:58 -04:00
|
|
|
_HA_ATOMIC_INC(&px->queue.idx);
|
2021-08-31 11:21:39 -04:00
|
|
|
return 1;
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
use_p:
|
2021-08-31 11:21:39 -04:00
|
|
|
/* we don't need the px queue lock anymore, we have the server's lock */
|
2021-06-24 02:04:24 -04:00
|
|
|
if (pp)
|
2021-08-31 11:21:39 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
p->strm_flags |= SF_ASSIGNED;
|
2018-07-26 01:38:54 -04:00
|
|
|
p->target = srv;
|
2021-06-24 01:27:01 -04:00
|
|
|
stream_add_srv_conn(p->strm, srv);
|
|
|
|
|
|
2021-08-31 11:21:39 -04:00
|
|
|
/* we must wake the task up before releasing the lock as it's the only
|
|
|
|
|
* way to make sure the task still exists. The pendconn cannot vanish
|
|
|
|
|
* under us since the task will need to take the lock anyway and to wait
|
|
|
|
|
* if it wakes up on a different thread.
|
|
|
|
|
*/
|
2022-06-16 10:10:05 -04:00
|
|
|
task_wakeup(p->strm->task, TASK_WOKEN_RES);
|
2021-08-31 11:21:39 -04:00
|
|
|
__pendconn_unlink_srv(p);
|
2021-06-24 01:27:01 -04:00
|
|
|
|
2021-08-31 11:21:39 -04:00
|
|
|
_HA_ATOMIC_DEC(&srv->queue.length);
|
|
|
|
|
_HA_ATOMIC_INC(&srv->queue.idx);
|
2021-06-24 01:27:01 -04:00
|
|
|
return 1;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
/* Manages a server's connection queue. This function will try to dequeue as
|
2021-06-22 12:47:51 -04:00
|
|
|
* many pending streams as possible, and wake them up.
|
2017-06-06 04:34:51 -04:00
|
|
|
*/
|
2024-12-23 09:17:25 -05:00
|
|
|
int process_srv_queue(struct server *s)
|
2017-06-06 04:34:51 -04:00
|
|
|
{
|
2021-06-18 13:45:17 -04:00
|
|
|
struct server *ref = s->track ? s->track : s;
|
2017-06-06 04:34:51 -04:00
|
|
|
struct proxy *p = s->proxy;
|
2018-07-26 12:47:27 -04:00
|
|
|
int maxconn;
|
2021-06-24 09:51:12 -04:00
|
|
|
int stop = 0;
|
2021-06-24 01:47:08 -04:00
|
|
|
int done = 0;
|
2021-06-18 13:45:17 -04:00
|
|
|
int px_ok;
|
|
|
|
|
|
|
|
|
|
/* if a server is not usable or backup and must not be used
|
|
|
|
|
* to dequeue backend requests.
|
|
|
|
|
*/
|
|
|
|
|
px_ok = srv_currently_usable(ref) &&
|
|
|
|
|
(!(s->flags & SRV_F_BACKUP) ||
|
|
|
|
|
(!p->srv_act &&
|
|
|
|
|
(s == p->lbprm.fbck || (p->options & PR_O_USE_ALL_BK))));
|
2017-06-06 04:34:51 -04:00
|
|
|
|
2021-06-24 02:30:07 -04:00
|
|
|
/* let's repeat that under the lock on each round. Threads competing
|
|
|
|
|
* for the same server will give up, knowing that at least one of
|
2022-07-30 03:53:22 -04:00
|
|
|
* them will check the conditions again before quitting. In order
|
|
|
|
|
* to avoid the deadly situation where one thread spends its time
|
|
|
|
|
* dequeueing for others, we limit the number of rounds it does.
|
|
|
|
|
* However we still re-enter the loop for one pass if there's no
|
|
|
|
|
* more served, otherwise we could end up with no other thread
|
|
|
|
|
* trying to dequeue them.
|
BUG/MEDIUM: queue: implement a flag to check for the dequeuing
As unveiled in GH issue #2711, commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()") does have some
side effects in that it can occasionally cause an endless loop.
As Christopher analysed it, the problem is that process_srv_queue(),
which uses a trylock in order to leave only one thread in charge of
the dequeueing process, can lose the lock race against pendconn_add().
If this happens on the last served request, then there's no more thread
to deal with the dequeuing, and assign_server_and_queue() will loop
forever on a condition that was initially exepected to be extremely
rare (and still is, except that now it can become sticky). Previously
what was happening is that such queued requests would just time out
and since that was very rare, nobody would notice.
The root of the problem really is that trylock. It was added so that
only one thread dequeues at a time but it doesn't offer only that
guarantee since it also prevents a thread from dequeuing if another
one is in the process of queuing. We need a different criterion.
What we're doing now is to set a flag "dequeuing" in the server, which
indicates that one thread is currently in the process of dequeuing
requests. This one is atomically tested, and only if no thread is in
this process, then the thread grabs the queue's lock and dequeues.
This way it will be serialized with pendconn_add() and no request
addition will be missed.
It is not certain whether the original race covered by the fix above
can still happen with this change, so better keep that fix for now.
Thanks to @Yenya (Jan Kasprzak) for the precise and complete report
allowing to spot the problem.
This patch should be backported wherever the patch above was backported.
2024-09-11 03:37:53 -04:00
|
|
|
*
|
|
|
|
|
* There's one racy part: we don't want to have more than one thread
|
|
|
|
|
* in charge of dequeuing, hence the dequeung flag. We cannot rely
|
|
|
|
|
* on a trylock here because it would compete against pendconn_add()
|
|
|
|
|
* and would occasionally leave entries in the queue that are never
|
|
|
|
|
* dequeued. Nobody else uses the dequeuing flag so when seeing it
|
|
|
|
|
* non-null, we're certain that another thread is waiting on it.
|
2021-06-24 02:30:07 -04:00
|
|
|
*/
|
2022-07-30 03:53:22 -04:00
|
|
|
while (!stop && (done < global.tune.maxpollevents || !s->served) &&
|
|
|
|
|
s->served < (maxconn = srv_dynamic_maxconn(s))) {
|
BUG/MEDIUM: queue: implement a flag to check for the dequeuing
As unveiled in GH issue #2711, commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()") does have some
side effects in that it can occasionally cause an endless loop.
As Christopher analysed it, the problem is that process_srv_queue(),
which uses a trylock in order to leave only one thread in charge of
the dequeueing process, can lose the lock race against pendconn_add().
If this happens on the last served request, then there's no more thread
to deal with the dequeuing, and assign_server_and_queue() will loop
forever on a condition that was initially exepected to be extremely
rare (and still is, except that now it can become sticky). Previously
what was happening is that such queued requests would just time out
and since that was very rare, nobody would notice.
The root of the problem really is that trylock. It was added so that
only one thread dequeues at a time but it doesn't offer only that
guarantee since it also prevents a thread from dequeuing if another
one is in the process of queuing. We need a different criterion.
What we're doing now is to set a flag "dequeuing" in the server, which
indicates that one thread is currently in the process of dequeuing
requests. This one is atomically tested, and only if no thread is in
this process, then the thread grabs the queue's lock and dequeues.
This way it will be serialized with pendconn_add() and no request
addition will be missed.
It is not certain whether the original race covered by the fix above
can still happen with this change, so better keep that fix for now.
Thanks to @Yenya (Jan Kasprzak) for the precise and complete report
allowing to spot the problem.
This patch should be backported wherever the patch above was backported.
2024-09-11 03:37:53 -04:00
|
|
|
if (HA_ATOMIC_XCHG(&s->dequeuing, 1))
|
2017-06-06 04:34:51 -04:00
|
|
|
break;
|
2021-06-24 02:30:07 -04:00
|
|
|
|
BUG/MEDIUM: queue: implement a flag to check for the dequeuing
As unveiled in GH issue #2711, commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()") does have some
side effects in that it can occasionally cause an endless loop.
As Christopher analysed it, the problem is that process_srv_queue(),
which uses a trylock in order to leave only one thread in charge of
the dequeueing process, can lose the lock race against pendconn_add().
If this happens on the last served request, then there's no more thread
to deal with the dequeuing, and assign_server_and_queue() will loop
forever on a condition that was initially exepected to be extremely
rare (and still is, except that now it can become sticky). Previously
what was happening is that such queued requests would just time out
and since that was very rare, nobody would notice.
The root of the problem really is that trylock. It was added so that
only one thread dequeues at a time but it doesn't offer only that
guarantee since it also prevents a thread from dequeuing if another
one is in the process of queuing. We need a different criterion.
What we're doing now is to set a flag "dequeuing" in the server, which
indicates that one thread is currently in the process of dequeuing
requests. This one is atomically tested, and only if no thread is in
this process, then the thread grabs the queue's lock and dequeues.
This way it will be serialized with pendconn_add() and no request
addition will be missed.
It is not certain whether the original race covered by the fix above
can still happen with this change, so better keep that fix for now.
Thanks to @Yenya (Jan Kasprzak) for the precise and complete report
allowing to spot the problem.
This patch should be backported wherever the patch above was backported.
2024-09-11 03:37:53 -04:00
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &s->queue.lock);
|
2021-06-24 02:30:07 -04:00
|
|
|
while (s->served < maxconn) {
|
2024-12-17 08:30:46 -05:00
|
|
|
/*
|
|
|
|
|
* pendconn_process_next_strm() will increment
|
|
|
|
|
* the served field, only if it is < maxconn.
|
|
|
|
|
*/
|
2021-06-24 09:51:12 -04:00
|
|
|
stop = !pendconn_process_next_strm(s, p, px_ok);
|
|
|
|
|
if (stop)
|
2021-06-24 02:30:07 -04:00
|
|
|
break;
|
|
|
|
|
done++;
|
2022-07-30 03:53:22 -04:00
|
|
|
if (done >= global.tune.maxpollevents)
|
|
|
|
|
break;
|
2021-06-24 02:30:07 -04:00
|
|
|
}
|
BUG/MEDIUM: queue: implement a flag to check for the dequeuing
As unveiled in GH issue #2711, commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()") does have some
side effects in that it can occasionally cause an endless loop.
As Christopher analysed it, the problem is that process_srv_queue(),
which uses a trylock in order to leave only one thread in charge of
the dequeueing process, can lose the lock race against pendconn_add().
If this happens on the last served request, then there's no more thread
to deal with the dequeuing, and assign_server_and_queue() will loop
forever on a condition that was initially exepected to be extremely
rare (and still is, except that now it can become sticky). Previously
what was happening is that such queued requests would just time out
and since that was very rare, nobody would notice.
The root of the problem really is that trylock. It was added so that
only one thread dequeues at a time but it doesn't offer only that
guarantee since it also prevents a thread from dequeuing if another
one is in the process of queuing. We need a different criterion.
What we're doing now is to set a flag "dequeuing" in the server, which
indicates that one thread is currently in the process of dequeuing
requests. This one is atomically tested, and only if no thread is in
this process, then the thread grabs the queue's lock and dequeues.
This way it will be serialized with pendconn_add() and no request
addition will be missed.
It is not certain whether the original race covered by the fix above
can still happen with this change, so better keep that fix for now.
Thanks to @Yenya (Jan Kasprzak) for the precise and complete report
allowing to spot the problem.
This patch should be backported wherever the patch above was backported.
2024-09-11 03:37:53 -04:00
|
|
|
HA_ATOMIC_STORE(&s->dequeuing, 0);
|
2021-06-24 10:00:18 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
|
2017-06-06 04:34:51 -04:00
|
|
|
}
|
2021-06-24 01:47:08 -04:00
|
|
|
|
|
|
|
|
if (done) {
|
|
|
|
|
_HA_ATOMIC_SUB(&p->totpend, done);
|
|
|
|
|
_HA_ATOMIC_ADD(&p->served, done);
|
|
|
|
|
__ha_barrier_atomic_store();
|
|
|
|
|
if (p->lbprm.server_take_conn)
|
|
|
|
|
p->lbprm.server_take_conn(s);
|
|
|
|
|
}
|
2024-12-23 09:17:25 -05:00
|
|
|
return done;
|
2017-06-06 04:34:51 -04:00
|
|
|
}
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
/* Adds the stream <strm> to the pending connection queue of server <strm>->srv
|
REORG/MAJOR: session: rename the "session" entity to "stream"
With HTTP/2, we'll have to support multiplexed streams. A stream is in
fact the largest part of what we currently call a session, it has buffers,
logs, etc.
In order to catch any error, this commit removes any reference to the
struct session and tries to rename most "session" occurrences in function
names to "stream" and "sess" to "strm" when that's related to a session.
The files stream.{c,h} were added and session.{c,h} removed.
The session will be reintroduced later and a few parts of the stream
will progressively be moved overthere. It will more or less contain
only what we need in an embryonic session.
Sample fetch functions and converters will have to change a bit so
that they'll use an L5 (session) instead of what's currently called
"L4" which is in fact L6 for now.
Once all changes are completed, we should see approximately this :
L7 - http_txn
L6 - stream
L5 - session
L4 - connection | applet
There will be at most one http_txn per stream, and a same session will
possibly be referenced by multiple streams. A connection will point to
a session and to a stream. The session will hold all the information
we need to keep even when we don't yet have a stream.
Some more cleanup is needed because some code was already far from
being clean. The server queue management still refers to sessions at
many places while comments talk about connections. This will have to
be cleaned up once we have a server-side connection pool manager.
Stream flags "SN_*" still need to be renamed, it doesn't seem like
any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
|
|
|
* or to the one of <strm>->proxy if srv is NULL. All counters and back pointers
|
2006-06-25 20:48:02 -04:00
|
|
|
* are updated accordingly. Returns NULL if no memory is available, otherwise the
|
REORG/MAJOR: session: rename the "session" entity to "stream"
With HTTP/2, we'll have to support multiplexed streams. A stream is in
fact the largest part of what we currently call a session, it has buffers,
logs, etc.
In order to catch any error, this commit removes any reference to the
struct session and tries to rename most "session" occurrences in function
names to "stream" and "sess" to "strm" when that's related to a session.
The files stream.{c,h} were added and session.{c,h} removed.
The session will be reintroduced later and a few parts of the stream
will progressively be moved overthere. It will more or less contain
only what we need in an embryonic session.
Sample fetch functions and converters will have to change a bit so
that they'll use an L5 (session) instead of what's currently called
"L4" which is in fact L6 for now.
Once all changes are completed, we should see approximately this :
L7 - http_txn
L6 - stream
L5 - session
L4 - connection | applet
There will be at most one http_txn per stream, and a same session will
possibly be referenced by multiple streams. A connection will point to
a session and to a stream. The session will hold all the information
we need to keep even when we don't yet have a stream.
Some more cleanup is needed because some code was already far from
being clean. The server queue management still refers to sessions at
many places while comments talk about connections. This will have to
be cleaned up once we have a server-side connection pool manager.
Stream flags "SN_*" still need to be renamed, it doesn't seem like
any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
|
|
|
* pendconn itself. If the stream was already marked as served, its flag is
|
|
|
|
|
* cleared. It is illegal to call this function with a non-NULL strm->srv_conn.
|
2018-05-11 12:52:31 -04:00
|
|
|
* The stream's queue position is counted with an offset of -1 because we want
|
|
|
|
|
* to make sure that being at the first position in the queue reports 1.
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
*
|
2018-05-11 12:52:31 -04:00
|
|
|
* The queue is sorted by the composition of the priority_class, and the current
|
|
|
|
|
* timestamp offset by strm->priority_offset. The timestamp is in milliseconds
|
|
|
|
|
* and truncated to 20 bits, so will wrap every 17m28s575ms.
|
|
|
|
|
* The offset can be positive or negative, and an offset of 0 puts it in the
|
|
|
|
|
* middle of this range (~ 8 min). Note that this also means if the adjusted
|
|
|
|
|
* timestamp wraps around, the request will be misinterpreted as being of
|
2018-11-25 14:26:48 -05:00
|
|
|
* the highest priority for that priority class.
|
2018-05-11 12:52:31 -04:00
|
|
|
*
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
* This function must be called by the stream itself, so in the context of
|
|
|
|
|
* process_stream.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
REORG/MAJOR: session: rename the "session" entity to "stream"
With HTTP/2, we'll have to support multiplexed streams. A stream is in
fact the largest part of what we currently call a session, it has buffers,
logs, etc.
In order to catch any error, this commit removes any reference to the
struct session and tries to rename most "session" occurrences in function
names to "stream" and "sess" to "strm" when that's related to a session.
The files stream.{c,h} were added and session.{c,h} removed.
The session will be reintroduced later and a few parts of the stream
will progressively be moved overthere. It will more or less contain
only what we need in an embryonic session.
Sample fetch functions and converters will have to change a bit so
that they'll use an L5 (session) instead of what's currently called
"L4" which is in fact L6 for now.
Once all changes are completed, we should see approximately this :
L7 - http_txn
L6 - stream
L5 - session
L4 - connection | applet
There will be at most one http_txn per stream, and a same session will
possibly be referenced by multiple streams. A connection will point to
a session and to a stream. The session will hold all the information
we need to keep even when we don't yet have a stream.
Some more cleanup is needed because some code was already far from
being clean. The server queue management still refers to sessions at
many places while comments talk about connections. This will have to
be cleaned up once we have a server-side connection pool manager.
Stream flags "SN_*" still need to be renamed, it doesn't seem like
any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
|
|
|
struct pendconn *pendconn_add(struct stream *strm)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
|
|
|
|
struct pendconn *p;
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
struct proxy *px;
|
|
|
|
|
struct server *srv;
|
2021-06-18 04:21:20 -04:00
|
|
|
struct queue *q;
|
|
|
|
|
unsigned int *max_ptr;
|
|
|
|
|
unsigned int old_max, new_max;
|
2006-06-25 20:48:02 -04:00
|
|
|
|
2017-11-24 11:34:44 -05:00
|
|
|
p = pool_alloc(pool_head_pendconn);
|
2006-06-25 20:48:02 -04:00
|
|
|
if (!p)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2018-07-26 01:38:54 -04:00
|
|
|
p->target = NULL;
|
2018-05-11 12:52:31 -04:00
|
|
|
p->node.key = MAKE_KEY(strm->priority_class, strm->priority_offset);
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
p->strm = strm;
|
|
|
|
|
p->strm_flags = strm->flags;
|
2021-08-31 11:21:39 -04:00
|
|
|
HA_SPIN_INIT(&p->del_lock);
|
2021-06-18 04:33:47 -04:00
|
|
|
strm->pend_pos = p;
|
2011-03-10 10:55:02 -05:00
|
|
|
|
2021-06-23 10:43:45 -04:00
|
|
|
px = strm->be;
|
|
|
|
|
if (strm->flags & SF_ASSIGNED)
|
|
|
|
|
srv = objt_server(strm->target);
|
|
|
|
|
else
|
|
|
|
|
srv = NULL;
|
|
|
|
|
|
2018-07-26 02:03:14 -04:00
|
|
|
if (srv) {
|
2021-06-18 04:21:20 -04:00
|
|
|
q = &srv->queue;
|
|
|
|
|
max_ptr = &srv->counters.nbpend_max;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
else {
|
2021-06-18 04:21:20 -04:00
|
|
|
q = &px->queue;
|
|
|
|
|
max_ptr = &px->be_counters.nbpend_max;
|
|
|
|
|
}
|
2021-06-18 04:21:20 -04:00
|
|
|
|
2021-06-23 10:33:52 -04:00
|
|
|
p->queue = q;
|
2021-06-18 04:51:58 -04:00
|
|
|
p->queue_idx = _HA_ATOMIC_LOAD(&q->idx) - 1; // for logging only
|
2021-06-18 04:21:20 -04:00
|
|
|
new_max = _HA_ATOMIC_ADD_FETCH(&q->length, 1);
|
|
|
|
|
old_max = _HA_ATOMIC_LOAD(max_ptr);
|
|
|
|
|
while (new_max > old_max) {
|
|
|
|
|
if (likely(_HA_ATOMIC_CAS(max_ptr, &old_max, new_max)))
|
|
|
|
|
break;
|
2021-06-24 01:22:15 -04:00
|
|
|
}
|
2021-06-18 04:21:20 -04:00
|
|
|
__ha_barrier_atomic_store();
|
|
|
|
|
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
|
|
|
|
|
eb32_insert(&q->head, &p->node);
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
|
2018-07-26 02:03:14 -04:00
|
|
|
|
2021-04-06 07:53:36 -04:00
|
|
|
_HA_ATOMIC_INC(&px->totpend);
|
2006-06-25 20:48:02 -04:00
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-16 05:48:10 -04:00
|
|
|
/* Redistribute pending connections when a server goes down. The number of
|
2021-06-18 03:45:27 -04:00
|
|
|
* connections redistributed is returned. It will take the server queue lock
|
|
|
|
|
* and does not use nor depend on other locks.
|
2014-05-16 05:48:10 -04:00
|
|
|
*/
|
|
|
|
|
int pendconn_redistribute(struct server *s)
|
|
|
|
|
{
|
2018-05-11 12:52:31 -04:00
|
|
|
struct pendconn *p;
|
2019-05-27 02:10:11 -04:00
|
|
|
struct eb32_node *node, *nodeb;
|
BUG/MEDIUM: queue: always dequeue the backend when redistributing the last server
An interesting bug was revealed by commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()"). When shutting
down a server to redistribute its connections, no check is made on the
backend's queue. If we're turning off the last server and the backend
has pending connections, these ones will wait there till the queue
timeout. But worse, since the commit above, we can enter an endless loop
in the following situation:
- streams are present in the backend's queue
- streams are purged on the last server via srv_shutdown_streams()
- that one calls pendconn_redistribute(srv) which does not purge
the backend's pendconns
- a stream performs some load balancing and enters assign_server_and_queue()
- assign_server() is called in turn
- the LB algo is non-deterministic and there are entries in the
backend's queue. The function notices it and returns SRV_STATUS_FULL
- assign_server_and_queue() calls pendconn_add() to add the connection
to the backend's queue
- on return, pendconn_must_try_again() is called, it figures there's
no stream served anymore on the server nor the proxy, so it removes
the pendconn from the queue and returns 1
- assign_server_and_queue() loops back to the beginning to try again,
while the conditions have not changed, resulting in an endless loop.
Ideally a change count should be used in the queues so that it's possible
to detect that some dequeuing happened and/or that a last stream has left.
But that wouldn't completely solve the problem that is that we must never
ever add to a queue when there's no server streams to dequeue the new
entries.
The current solution consists in making pendconn_redistribute() take care
of the proxy after the server in case there's no more server available on
the proxy. It at least ensures that no pending streams are left in the
backend's queue when shutting streams down or when the last server goes
down. The try_again loop remains necessary to deal with inevitable races
during pendconn additions. It could be limited to a few rounds, though,
but it should never trigger if the conditions are sufficient to permit
it to converge.
One way to reproduce the issue is to run a config with a single server
with maxconn 1 and plenty of threads, then run in loops series of:
"disable server px/s;shutdown sessions server px/s;
wait 100ms server-removable px/s; show servers conn px;
enable server px/s"
on the CLI at ~10/s while injecting with around 40 concurrent conns at
40-100k RPS. In this case in 10s - 1mn the crash can appear with a
backtrace like this one for at least 1 thread:
#0 pendconn_add (strm=strm@entry=0x17f2ce0) at src/queue.c:487
#1 0x000000000064797d in assign_server_and_queue (s=s@entry=0x17f2ce0) at src/backend.c:1064
#2 0x000000000064a928 in srv_redispatch_connect (s=s@entry=0x17f2ce0) at src/backend.c:1962
#3 0x000000000064ac54 in back_handle_st_req (s=s@entry=0x17f2ce0) at src/backend.c:2287
#4 0x00000000005ae1d5 in process_stream (t=t@entry=0x17f4ab0, context=0x17f2ce0, state=<optimized out>) at src/stream.c:2336
It's worth noting that other threads may often appear waiting after the
poller and one in server_atomic_sync() waiting for isolation, because
the event that is processed when shutting the server down is consumed
under isolation, and having less threads available to dequeue remaining
requests increases the probability to trigger the problem, though it is
not at all necessary (some less common traces never show them).
This should carefully be backported wherever the commit above was
backported.
2024-10-01 12:57:51 -04:00
|
|
|
struct proxy *px = s->proxy;
|
|
|
|
|
int px_xferred = 0;
|
2014-05-16 05:48:10 -04:00
|
|
|
int xferred = 0;
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
/* The REDISP option was specified. We will ignore cookie and force to
|
2024-09-27 12:54:07 -04:00
|
|
|
* balance or use the dispatcher.
|
|
|
|
|
*/
|
|
|
|
|
if (!(s->cur_admin & SRV_ADMF_MAINT) &&
|
|
|
|
|
(s->proxy->options & (PR_O_REDISP|PR_O_PERSIST)) != PR_O_REDISP)
|
BUG/MEDIUM: queue: always dequeue the backend when redistributing the last server
An interesting bug was revealed by commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()"). When shutting
down a server to redistribute its connections, no check is made on the
backend's queue. If we're turning off the last server and the backend
has pending connections, these ones will wait there till the queue
timeout. But worse, since the commit above, we can enter an endless loop
in the following situation:
- streams are present in the backend's queue
- streams are purged on the last server via srv_shutdown_streams()
- that one calls pendconn_redistribute(srv) which does not purge
the backend's pendconns
- a stream performs some load balancing and enters assign_server_and_queue()
- assign_server() is called in turn
- the LB algo is non-deterministic and there are entries in the
backend's queue. The function notices it and returns SRV_STATUS_FULL
- assign_server_and_queue() calls pendconn_add() to add the connection
to the backend's queue
- on return, pendconn_must_try_again() is called, it figures there's
no stream served anymore on the server nor the proxy, so it removes
the pendconn from the queue and returns 1
- assign_server_and_queue() loops back to the beginning to try again,
while the conditions have not changed, resulting in an endless loop.
Ideally a change count should be used in the queues so that it's possible
to detect that some dequeuing happened and/or that a last stream has left.
But that wouldn't completely solve the problem that is that we must never
ever add to a queue when there's no server streams to dequeue the new
entries.
The current solution consists in making pendconn_redistribute() take care
of the proxy after the server in case there's no more server available on
the proxy. It at least ensures that no pending streams are left in the
backend's queue when shutting streams down or when the last server goes
down. The try_again loop remains necessary to deal with inevitable races
during pendconn additions. It could be limited to a few rounds, though,
but it should never trigger if the conditions are sufficient to permit
it to converge.
One way to reproduce the issue is to run a config with a single server
with maxconn 1 and plenty of threads, then run in loops series of:
"disable server px/s;shutdown sessions server px/s;
wait 100ms server-removable px/s; show servers conn px;
enable server px/s"
on the CLI at ~10/s while injecting with around 40 concurrent conns at
40-100k RPS. In this case in 10s - 1mn the crash can appear with a
backtrace like this one for at least 1 thread:
#0 pendconn_add (strm=strm@entry=0x17f2ce0) at src/queue.c:487
#1 0x000000000064797d in assign_server_and_queue (s=s@entry=0x17f2ce0) at src/backend.c:1064
#2 0x000000000064a928 in srv_redispatch_connect (s=s@entry=0x17f2ce0) at src/backend.c:1962
#3 0x000000000064ac54 in back_handle_st_req (s=s@entry=0x17f2ce0) at src/backend.c:2287
#4 0x00000000005ae1d5 in process_stream (t=t@entry=0x17f4ab0, context=0x17f2ce0, state=<optimized out>) at src/stream.c:2336
It's worth noting that other threads may often appear waiting after the
poller and one in server_atomic_sync() waiting for isolation, because
the event that is processed when shutting the server down is consumed
under isolation, and having less threads available to dequeue remaining
requests increases the probability to trigger the problem, though it is
not at all necessary (some less common traces never show them).
This should carefully be backported wherever the commit above was
backported.
2024-10-01 12:57:51 -04:00
|
|
|
goto skip_srv_queue;
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
|
2021-06-24 10:00:18 -04:00
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &s->queue.lock);
|
2021-06-18 03:30:30 -04:00
|
|
|
for (node = eb32_first(&s->queue.head); node; node = nodeb) {
|
2019-05-27 02:10:11 -04:00
|
|
|
nodeb = eb32_next(node);
|
|
|
|
|
|
|
|
|
|
p = eb32_entry(node, struct pendconn, node);
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
if (p->strm_flags & SF_FORCE_PRST)
|
|
|
|
|
continue;
|
2014-05-16 05:48:10 -04:00
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
/* it's left to the dispatcher to choose a server */
|
2020-10-21 05:20:07 -04:00
|
|
|
__pendconn_unlink_srv(p);
|
2024-09-27 12:54:07 -04:00
|
|
|
if (!(s->proxy->options & PR_O_REDISP))
|
|
|
|
|
p->strm_flags &= ~(SF_DIRECT | SF_ASSIGNED);
|
2014-05-16 05:48:10 -04:00
|
|
|
|
2022-06-16 10:10:05 -04:00
|
|
|
task_wakeup(p->strm->task, TASK_WOKEN_RES);
|
2020-10-21 05:54:38 -04:00
|
|
|
xferred++;
|
2014-05-16 05:48:10 -04:00
|
|
|
}
|
2021-06-24 10:00:18 -04:00
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
|
2021-06-18 03:45:27 -04:00
|
|
|
|
2020-10-21 06:01:28 -04:00
|
|
|
if (xferred) {
|
2021-06-18 03:30:30 -04:00
|
|
|
_HA_ATOMIC_SUB(&s->queue.length, xferred);
|
2020-10-24 06:57:41 -04:00
|
|
|
_HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
|
2020-10-21 06:01:28 -04:00
|
|
|
}
|
BUG/MEDIUM: queue: always dequeue the backend when redistributing the last server
An interesting bug was revealed by commit 5541d4995d ("BUG/MEDIUM: queue:
deal with a rare TOCTOU in assign_server_and_queue()"). When shutting
down a server to redistribute its connections, no check is made on the
backend's queue. If we're turning off the last server and the backend
has pending connections, these ones will wait there till the queue
timeout. But worse, since the commit above, we can enter an endless loop
in the following situation:
- streams are present in the backend's queue
- streams are purged on the last server via srv_shutdown_streams()
- that one calls pendconn_redistribute(srv) which does not purge
the backend's pendconns
- a stream performs some load balancing and enters assign_server_and_queue()
- assign_server() is called in turn
- the LB algo is non-deterministic and there are entries in the
backend's queue. The function notices it and returns SRV_STATUS_FULL
- assign_server_and_queue() calls pendconn_add() to add the connection
to the backend's queue
- on return, pendconn_must_try_again() is called, it figures there's
no stream served anymore on the server nor the proxy, so it removes
the pendconn from the queue and returns 1
- assign_server_and_queue() loops back to the beginning to try again,
while the conditions have not changed, resulting in an endless loop.
Ideally a change count should be used in the queues so that it's possible
to detect that some dequeuing happened and/or that a last stream has left.
But that wouldn't completely solve the problem that is that we must never
ever add to a queue when there's no server streams to dequeue the new
entries.
The current solution consists in making pendconn_redistribute() take care
of the proxy after the server in case there's no more server available on
the proxy. It at least ensures that no pending streams are left in the
backend's queue when shutting streams down or when the last server goes
down. The try_again loop remains necessary to deal with inevitable races
during pendconn additions. It could be limited to a few rounds, though,
but it should never trigger if the conditions are sufficient to permit
it to converge.
One way to reproduce the issue is to run a config with a single server
with maxconn 1 and plenty of threads, then run in loops series of:
"disable server px/s;shutdown sessions server px/s;
wait 100ms server-removable px/s; show servers conn px;
enable server px/s"
on the CLI at ~10/s while injecting with around 40 concurrent conns at
40-100k RPS. In this case in 10s - 1mn the crash can appear with a
backtrace like this one for at least 1 thread:
#0 pendconn_add (strm=strm@entry=0x17f2ce0) at src/queue.c:487
#1 0x000000000064797d in assign_server_and_queue (s=s@entry=0x17f2ce0) at src/backend.c:1064
#2 0x000000000064a928 in srv_redispatch_connect (s=s@entry=0x17f2ce0) at src/backend.c:1962
#3 0x000000000064ac54 in back_handle_st_req (s=s@entry=0x17f2ce0) at src/backend.c:2287
#4 0x00000000005ae1d5 in process_stream (t=t@entry=0x17f4ab0, context=0x17f2ce0, state=<optimized out>) at src/stream.c:2336
It's worth noting that other threads may often appear waiting after the
poller and one in server_atomic_sync() waiting for isolation, because
the event that is processed when shutting the server down is consumed
under isolation, and having less threads available to dequeue remaining
requests increases the probability to trigger the problem, though it is
not at all necessary (some less common traces never show them).
This should carefully be backported wherever the commit above was
backported.
2024-10-01 12:57:51 -04:00
|
|
|
|
|
|
|
|
skip_srv_queue:
|
|
|
|
|
if (px->lbprm.tot_wact || px->lbprm.tot_wbck)
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &px->queue.lock);
|
|
|
|
|
for (node = eb32_first(&px->queue.head); node; node = nodeb) {
|
|
|
|
|
nodeb = eb32_next(node);
|
|
|
|
|
p = eb32_entry(node, struct pendconn, node);
|
|
|
|
|
|
|
|
|
|
/* force-persist streams may occasionally appear in the
|
|
|
|
|
* proxy's queue, and we certainly don't want them here!
|
|
|
|
|
*/
|
|
|
|
|
p->strm_flags &= ~SF_FORCE_PRST;
|
|
|
|
|
__pendconn_unlink_prx(p);
|
|
|
|
|
|
|
|
|
|
task_wakeup(p->strm->task, TASK_WOKEN_RES);
|
|
|
|
|
px_xferred++;
|
|
|
|
|
}
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
|
|
|
|
|
|
|
|
|
|
if (px_xferred) {
|
|
|
|
|
_HA_ATOMIC_SUB(&px->queue.length, px_xferred);
|
|
|
|
|
_HA_ATOMIC_SUB(&px->totpend, px_xferred);
|
|
|
|
|
}
|
|
|
|
|
done:
|
|
|
|
|
return xferred + px_xferred;
|
2014-05-16 05:48:10 -04:00
|
|
|
}
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
/* Try to dequeue pending connection attached to the stream <strm>. It must
|
|
|
|
|
* always exists here. If the pendconn is still linked to the server or the
|
|
|
|
|
* proxy queue, nothing is done and the function returns 1. Otherwise,
|
|
|
|
|
* <strm>->flags and <strm>->target are updated, the pendconn is released and 0
|
|
|
|
|
* is returned.
|
|
|
|
|
*
|
|
|
|
|
* This function must be called by the stream itself, so in the context of
|
|
|
|
|
* process_stream.
|
2006-06-25 20:48:02 -04:00
|
|
|
*/
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
int pendconn_dequeue(struct stream *strm)
|
2006-06-25 20:48:02 -04:00
|
|
|
{
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
struct pendconn *p;
|
2018-07-26 02:23:24 -04:00
|
|
|
int is_unlinked;
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
|
2022-02-28 13:16:31 -05:00
|
|
|
/* unexpected case because it is called by the stream itself and
|
|
|
|
|
* only the stream can release a pendconn. So it is only
|
|
|
|
|
* possible if a pendconn is released by someone else or if the
|
|
|
|
|
* stream is supposed to be queued but without its associated
|
|
|
|
|
* pendconn. In both cases it is a bug! */
|
|
|
|
|
BUG_ON(!strm->pend_pos);
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
p = strm->pend_pos;
|
|
|
|
|
|
2018-07-26 02:23:24 -04:00
|
|
|
/* note below : we need to grab the queue's lock to check for emptiness
|
2024-12-17 09:40:41 -05:00
|
|
|
* because we don't want a partial process_srv_queue() or redistribute()
|
2018-07-26 02:23:24 -04:00
|
|
|
* to be called in parallel and show an empty list without having the
|
|
|
|
|
* time to finish. With this we know that if we see the element
|
|
|
|
|
* unlinked, these functions were completely done.
|
|
|
|
|
*/
|
|
|
|
|
pendconn_queue_lock(p);
|
2018-05-11 12:52:31 -04:00
|
|
|
is_unlinked = !p->node.node.leaf_p;
|
2018-07-26 02:23:24 -04:00
|
|
|
pendconn_queue_unlock(p);
|
|
|
|
|
|
2021-08-31 11:21:39 -04:00
|
|
|
/* serialize to make sure the element was finished processing */
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
|
|
|
|
|
|
2018-07-26 02:23:24 -04:00
|
|
|
if (!is_unlinked)
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
return 1;
|
|
|
|
|
|
2018-07-26 02:23:24 -04:00
|
|
|
/* the pendconn is not queued anymore and will not be so we're safe
|
|
|
|
|
* to proceed.
|
|
|
|
|
*/
|
MEDIUM: stream: remove the confusing SF_ADDR_SET flag
This flag is no longer needed now that it must always match the presence
of a destination address on the backend conn_stream. Worse, before previous
patch, if it were to be accidently removed while the address is present, it
could result in a leak of that address since alloc_dst_address() would first
be called to flush it.
Its usage has a long history where addresses were stored in an area shared
with the connection, but as this is no longer the case, there's no reason
for putting this burden onto application-level code that should not focus
on setting obscure flags.
The only place where that made a small difference is in the dequeuing code
in case of queue redistribution, because previously the code would first
clear the flag, and only later when trying to deal with the queue, would
release the address. It's not even certain whether there would exist a
code path going to connect_server() without calling pendconn_dequeue()
first (e.g. retries on queue timeout maybe?).
Now the pendconn_dequeue() code will rely on SF_ASSIGNED to decide to
clear and release the address, since that flag is always set while in
a server's queue, and its clearance implies that we don't want to keep
the address. At least it remains consistent and there's no more risk of
leaking it.
2022-05-02 10:36:47 -04:00
|
|
|
strm->flags &= ~(SF_DIRECT | SF_ASSIGNED);
|
|
|
|
|
strm->flags |= p->strm_flags & (SF_DIRECT | SF_ASSIGNED);
|
BUG/MAJOR: queue: set SF_ASSIGNED when setting strm->target on dequeue
Commit 82cd5c13a ("OPTIM: backend: skip LB when we know the backend is
full") has uncovered a long-burried bug in the dequeing code: when a
server releases a connection, it picks a new one from the proxy's or
its queue. Technically speaking it only picks a pendconn which is a
link between a position in the queue and a stream. It then sets this
pendconn's target to itself, and wakes up the stream's task so that
it can try to connect again.
The stream then goes through the regular connection setup phases,
calls back_try_conn_req() which calls pendconn_dequeue(), which
sets the stream's target to the pendconn's and releases the pendconn.
It then reaches assign_server() which sees no SF_ASSIGNED and calls
assign_server_and_queue() to perform load balancing or queuing. This
one first destroys the stream's target and gets ready to perform load
balancing. At this point we're load-balancing for no reason since we
already knew what server was available. And this is where the commit
above comes into play: the check for the backend's queue above may
detect other connections that arrived in between, and will immediately
return FULL, forcing this request back into the queue. If the server
had a very low maxconn (e.g. 1 due to a long slowstart), it's possible
that this evicted connection was the last one on the server and that
no other one will ever be present to process the queue. Usually a
regularly processed request will still have its own srv_conn that will
be used during stream_free() to dequeue other connections. But if the
server had a down-up cycle, then a call to pendconn_grab_from_px()
may start to dequeue entries which had no srv_conn and which will have
no server slot to offer when they expire, thus maintaining the situation
above forever. Worse, as new requests arrive, there are always some
requests in the queue and the situation feeds on itself.
The correct fix here is to properly set SF_ASSIGNED in pendconn_dequeue()
when the stream's target is assigned (as it's what this flag means), so
as to avoid a load-balancing pass when dequeuing.
Many thanks to Pierre Cheynier for the numerous detailed traces he
provided that helped narrow this problem down.
This could be backported to all stable versions, but in practice only
2.3 and above are really affected since the presence of the commit
above. Given how tricky this code is it's better to limit it to those
versions that really need it.
2021-06-16 02:42:23 -04:00
|
|
|
|
BUG/MEDIUM: backend: fix possible sockaddr leak on redispatch
A subtle change of target address allocation was introduced with commit
68cf3959b ("MINOR: backend: rewrite alloc of stream target address") in
2.4. Prior to this patch, a target address was allocated by function
assign_server_address() only if none was previously allocated. After
the change, the allocation became unconditional. Most of the time it
makes no difference, except when we pass multiple times through
connect_server() with SF_ADDR_SET cleared.
The most obvious fix would be to avoid allocating that address there
when already set, but the root cause is that since introduction of
dynamically allocated addresses, the SF_ADDR_SET flag lies. It can
be cleared during redispatch or during a queue redistribution without
the address being released.
This patch instead gives back all its correct meaning to SF_ADDR_SET
and guarantees that when not set no address is allocated, by freeing
that address at the few places the flag is cleared. The flag could
even be removed so that only the address is checked but that would
require to touch many areas for no benefit.
The easiest way to test it is to send requests to a proxy with l7
retries enabled, which forwards to a server returning 500:
defaults
mode http
timeout client 1s
timeout server 1s
timeout connect 1s
retry-on all-retryable-errors
retries 1
option redispatch
listen proxy
bind *:5000
server app 0.0.0.0:5001
frontend dummy-app
bind :5001
http-request return status 500
Issuing "show pools" on the CLI will show that pool "sockaddr" grows
as requests are redispatched, and remains stable with the fix. Even
"ps" will show that the process' RSS grows by ~160B per request.
This fix will need to be backported to 2.4. Note that before 2.5,
there's no strm->si[1].dst, strm->target_addr must be used instead.
This addresses github issue #1499. Special thanks to Daniil Leontiev
for providing a well-documented reproducer.
2021-12-24 05:27:53 -05:00
|
|
|
/* the entry might have been redistributed to another server */
|
MEDIUM: stream: remove the confusing SF_ADDR_SET flag
This flag is no longer needed now that it must always match the presence
of a destination address on the backend conn_stream. Worse, before previous
patch, if it were to be accidently removed while the address is present, it
could result in a leak of that address since alloc_dst_address() would first
be called to flush it.
Its usage has a long history where addresses were stored in an area shared
with the connection, but as this is no longer the case, there's no reason
for putting this burden onto application-level code that should not focus
on setting obscure flags.
The only place where that made a small difference is in the dequeuing code
in case of queue redistribution, because previously the code would first
clear the flag, and only later when trying to deal with the queue, would
release the address. It's not even certain whether there would exist a
code path going to connect_server() without calling pendconn_dequeue()
first (e.g. retries on queue timeout maybe?).
Now the pendconn_dequeue() code will rely on SF_ASSIGNED to decide to
clear and release the address, since that flag is always set while in
a server's queue, and its clearance implies that we don't want to keep
the address. At least it remains consistent and there's no more risk of
leaking it.
2022-05-02 10:36:47 -04:00
|
|
|
if (!(strm->flags & SF_ASSIGNED))
|
2022-05-17 13:40:40 -04:00
|
|
|
sockaddr_free(&strm->scb->dst);
|
BUG/MEDIUM: backend: fix possible sockaddr leak on redispatch
A subtle change of target address allocation was introduced with commit
68cf3959b ("MINOR: backend: rewrite alloc of stream target address") in
2.4. Prior to this patch, a target address was allocated by function
assign_server_address() only if none was previously allocated. After
the change, the allocation became unconditional. Most of the time it
makes no difference, except when we pass multiple times through
connect_server() with SF_ADDR_SET cleared.
The most obvious fix would be to avoid allocating that address there
when already set, but the root cause is that since introduction of
dynamically allocated addresses, the SF_ADDR_SET flag lies. It can
be cleared during redispatch or during a queue redistribution without
the address being released.
This patch instead gives back all its correct meaning to SF_ADDR_SET
and guarantees that when not set no address is allocated, by freeing
that address at the few places the flag is cleared. The flag could
even be removed so that only the address is checked but that would
require to touch many areas for no benefit.
The easiest way to test it is to send requests to a proxy with l7
retries enabled, which forwards to a server returning 500:
defaults
mode http
timeout client 1s
timeout server 1s
timeout connect 1s
retry-on all-retryable-errors
retries 1
option redispatch
listen proxy
bind *:5000
server app 0.0.0.0:5001
frontend dummy-app
bind :5001
http-request return status 500
Issuing "show pools" on the CLI will show that pool "sockaddr" grows
as requests are redispatched, and remains stable with the fix. Even
"ps" will show that the process' RSS grows by ~160B per request.
This fix will need to be backported to 2.4. Note that before 2.5,
there's no strm->si[1].dst, strm->target_addr must be used instead.
This addresses github issue #1499. Special thanks to Daniil Leontiev
for providing a well-documented reproducer.
2021-12-24 05:27:53 -05:00
|
|
|
|
BUG/MAJOR: queue: set SF_ASSIGNED when setting strm->target on dequeue
Commit 82cd5c13a ("OPTIM: backend: skip LB when we know the backend is
full") has uncovered a long-burried bug in the dequeing code: when a
server releases a connection, it picks a new one from the proxy's or
its queue. Technically speaking it only picks a pendconn which is a
link between a position in the queue and a stream. It then sets this
pendconn's target to itself, and wakes up the stream's task so that
it can try to connect again.
The stream then goes through the regular connection setup phases,
calls back_try_conn_req() which calls pendconn_dequeue(), which
sets the stream's target to the pendconn's and releases the pendconn.
It then reaches assign_server() which sees no SF_ASSIGNED and calls
assign_server_and_queue() to perform load balancing or queuing. This
one first destroys the stream's target and gets ready to perform load
balancing. At this point we're load-balancing for no reason since we
already knew what server was available. And this is where the commit
above comes into play: the check for the backend's queue above may
detect other connections that arrived in between, and will immediately
return FULL, forcing this request back into the queue. If the server
had a very low maxconn (e.g. 1 due to a long slowstart), it's possible
that this evicted connection was the last one on the server and that
no other one will ever be present to process the queue. Usually a
regularly processed request will still have its own srv_conn that will
be used during stream_free() to dequeue other connections. But if the
server had a down-up cycle, then a call to pendconn_grab_from_px()
may start to dequeue entries which had no srv_conn and which will have
no server slot to offer when they expire, thus maintaining the situation
above forever. Worse, as new requests arrive, there are always some
requests in the queue and the situation feeds on itself.
The correct fix here is to properly set SF_ASSIGNED in pendconn_dequeue()
when the stream's target is assigned (as it's what this flag means), so
as to avoid a load-balancing pass when dequeuing.
Many thanks to Pierre Cheynier for the numerous detailed traces he
provided that helped narrow this problem down.
This could be backported to all stable versions, but in practice only
2.3 and above are really affected since the presence of the commit
above. Given how tricky this code is it's better to limit it to those
versions that really need it.
2021-06-16 02:42:23 -04:00
|
|
|
if (p->target) {
|
|
|
|
|
/* a server picked this pendconn, it must skip LB */
|
|
|
|
|
strm->target = &p->target->obj_type;
|
|
|
|
|
strm->flags |= SF_ASSIGNED;
|
|
|
|
|
}
|
|
|
|
|
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
strm->pend_pos = NULL;
|
2017-11-24 11:34:44 -05:00
|
|
|
pool_free(pool_head_pendconn, p);
|
BUG/MAJOR: threads/queue: Fix thread-safety issues on the queues management
The management of the servers and the proxies queues was not thread-safe at
all. First, the accesses to <strm>->pend_pos were not protected. So it was
possible to release it on a thread (for instance because the stream is released)
and to use it in same time on another one (because we redispatch pending
connections for a server). Then, the accesses to stream's information (flags and
target) from anywhere is forbidden. To be safe, The stream's state must always
be updated in the context of process_stream.
So to fix these issues, the queue module has been refactored. A lock has been
added in the pendconn structure. And now, when we try to dequeue a pending
connection, we start by unlinking it from the server/proxy queue and we wake up
the stream. Then, it is the stream reponsibility to really dequeue it (or
release it). This way, we are sure that only the stream can create and release
its <pend_pos> field.
However, be careful. This new implementation should be thread-safe
(hopefully...). But it is not optimal and in some situations, it could be really
slower in multi-threaded mode than in single-threaded one. The problem is that,
when we try to dequeue pending connections, we process it from the older one to
the newer one independently to the thread's affinity. So we need to wait the
other threads' wakeup to really process them. If threads are blocked in the
poller, this will add a significant latency. This problem happens when maxconn
values are very low.
This patch must be backported in 1.8.
2018-03-14 11:18:06 -04:00
|
|
|
return 0;
|
2006-06-25 20:48:02 -04:00
|
|
|
}
|
|
|
|
|
|
2024-07-26 13:24:33 -04:00
|
|
|
/* checks after a successful pendconn_add() if the connection ended up being
|
|
|
|
|
* alone with no active connection left to dequeue it. In such a case it will
|
|
|
|
|
* simply remove it from the queue, free it and return non-zero to inform the
|
|
|
|
|
* caller that it must try to add the connection again, otherwise it returns
|
|
|
|
|
* zero, indicating that the connection will be handled normally. The caller
|
|
|
|
|
* might have to drop SF_DIRECT and/or SF_ASSIGNED if the conn was on a proxy.
|
|
|
|
|
*/
|
|
|
|
|
int pendconn_must_try_again(struct pendconn *p)
|
|
|
|
|
{
|
|
|
|
|
struct queue *q = p->queue;
|
|
|
|
|
struct proxy *px = q->px;
|
|
|
|
|
struct server *sv = q->sv;
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
if (likely(!HA_ATOMIC_LOAD(&p->node.node.leaf_p)))
|
|
|
|
|
goto leave;
|
|
|
|
|
|
|
|
|
|
/* for a server, we need at least one conn left on this server to
|
|
|
|
|
* find ours.
|
|
|
|
|
*/
|
|
|
|
|
if (likely(sv && HA_ATOMIC_LOAD(&sv->served)))
|
|
|
|
|
goto leave;
|
|
|
|
|
|
|
|
|
|
/* for a backend, we need at least one conn left on any of this
|
|
|
|
|
* backend's servers to find ours.
|
|
|
|
|
*/
|
|
|
|
|
if (likely(!sv && HA_ATOMIC_LOAD(&px->served)))
|
|
|
|
|
goto leave;
|
|
|
|
|
|
|
|
|
|
/* OK the situation is not safe anymore, we need to check if we're
|
|
|
|
|
* still in the queue under a lock.
|
|
|
|
|
*/
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
|
|
|
|
|
HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
|
|
|
|
|
|
|
|
|
|
if (p->node.node.leaf_p) {
|
|
|
|
|
eb32_delete(&p->node);
|
|
|
|
|
_HA_ATOMIC_DEC(&q->length);
|
|
|
|
|
_HA_ATOMIC_INC(&q->idx);
|
|
|
|
|
_HA_ATOMIC_DEC(&px->totpend);
|
|
|
|
|
ret = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
|
|
|
|
|
HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
|
|
|
|
|
|
|
|
|
|
/* check if the connection was still queued. If not, it means its
|
|
|
|
|
* processing has begun so it's safe.
|
|
|
|
|
*/
|
|
|
|
|
if (!ret)
|
|
|
|
|
goto leave;
|
|
|
|
|
|
|
|
|
|
/* The pendconn is not queued anymore and will not be so we're safe
|
|
|
|
|
* to free it.
|
|
|
|
|
*/
|
|
|
|
|
p->strm->pend_pos = NULL;
|
|
|
|
|
pool_free(pool_head_pendconn, p);
|
|
|
|
|
|
|
|
|
|
leave:
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
static enum act_return action_set_priority_class(struct act_rule *rule, struct proxy *px,
|
|
|
|
|
struct session *sess, struct stream *s, int flags)
|
|
|
|
|
{
|
|
|
|
|
struct sample *smp;
|
|
|
|
|
|
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
|
|
|
|
|
if (!smp)
|
|
|
|
|
return ACT_RET_CONT;
|
|
|
|
|
|
|
|
|
|
s->priority_class = queue_limit_class(smp->data.u.sint);
|
|
|
|
|
return ACT_RET_CONT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum act_return action_set_priority_offset(struct act_rule *rule, struct proxy *px,
|
|
|
|
|
struct session *sess, struct stream *s, int flags)
|
|
|
|
|
{
|
|
|
|
|
struct sample *smp;
|
|
|
|
|
|
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
|
|
|
|
|
if (!smp)
|
|
|
|
|
return ACT_RET_CONT;
|
|
|
|
|
|
|
|
|
|
s->priority_offset = queue_limit_offset(smp->data.u.sint);
|
|
|
|
|
|
|
|
|
|
return ACT_RET_CONT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum act_parse_ret parse_set_priority_class(const char **args, int *arg, struct proxy *px,
|
|
|
|
|
struct act_rule *rule, char **err)
|
|
|
|
|
{
|
|
|
|
|
unsigned int where = 0;
|
|
|
|
|
|
|
|
|
|
rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
|
2020-02-14 10:50:14 -05:00
|
|
|
px->conf.args.line, err, &px->conf.args, NULL);
|
2018-05-11 12:52:31 -04:00
|
|
|
if (!rule->arg.expr)
|
|
|
|
|
return ACT_RET_PRS_ERR;
|
|
|
|
|
|
|
|
|
|
if (px->cap & PR_CAP_FE)
|
|
|
|
|
where |= SMP_VAL_FE_HRQ_HDR;
|
|
|
|
|
if (px->cap & PR_CAP_BE)
|
|
|
|
|
where |= SMP_VAL_BE_HRQ_HDR;
|
|
|
|
|
|
|
|
|
|
if (!(rule->arg.expr->fetch->val & where)) {
|
|
|
|
|
memprintf(err,
|
|
|
|
|
"fetch method '%s' extracts information from '%s', none of which is available here",
|
|
|
|
|
args[0], sample_src_names(rule->arg.expr->fetch->use));
|
|
|
|
|
free(rule->arg.expr);
|
|
|
|
|
return ACT_RET_PRS_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rule->action = ACT_CUSTOM;
|
|
|
|
|
rule->action_ptr = action_set_priority_class;
|
|
|
|
|
return ACT_RET_PRS_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum act_parse_ret parse_set_priority_offset(const char **args, int *arg, struct proxy *px,
|
|
|
|
|
struct act_rule *rule, char **err)
|
|
|
|
|
{
|
|
|
|
|
unsigned int where = 0;
|
|
|
|
|
|
|
|
|
|
rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
|
2020-02-14 10:50:14 -05:00
|
|
|
px->conf.args.line, err, &px->conf.args, NULL);
|
2018-05-11 12:52:31 -04:00
|
|
|
if (!rule->arg.expr)
|
|
|
|
|
return ACT_RET_PRS_ERR;
|
|
|
|
|
|
|
|
|
|
if (px->cap & PR_CAP_FE)
|
|
|
|
|
where |= SMP_VAL_FE_HRQ_HDR;
|
|
|
|
|
if (px->cap & PR_CAP_BE)
|
|
|
|
|
where |= SMP_VAL_BE_HRQ_HDR;
|
|
|
|
|
|
|
|
|
|
if (!(rule->arg.expr->fetch->val & where)) {
|
|
|
|
|
memprintf(err,
|
|
|
|
|
"fetch method '%s' extracts information from '%s', none of which is available here",
|
|
|
|
|
args[0], sample_src_names(rule->arg.expr->fetch->use));
|
|
|
|
|
free(rule->arg.expr);
|
|
|
|
|
return ACT_RET_PRS_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rule->action = ACT_CUSTOM;
|
|
|
|
|
rule->action_ptr = action_set_priority_offset;
|
|
|
|
|
return ACT_RET_PRS_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct action_kw_list tcp_cont_kws = {ILH, {
|
|
|
|
|
{ "set-priority-class", parse_set_priority_class },
|
|
|
|
|
{ "set-priority-offset", parse_set_priority_offset },
|
|
|
|
|
{ /* END */ }
|
|
|
|
|
}};
|
|
|
|
|
|
2018-11-25 13:14:37 -05:00
|
|
|
INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_cont_kws);
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
static struct action_kw_list http_req_kws = {ILH, {
|
|
|
|
|
{ "set-priority-class", parse_set_priority_class },
|
|
|
|
|
{ "set-priority-offset", parse_set_priority_offset },
|
|
|
|
|
{ /* END */ }
|
|
|
|
|
}};
|
|
|
|
|
|
2018-11-25 13:14:37 -05:00
|
|
|
INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
|
|
|
|
|
|
2018-05-11 12:52:31 -04:00
|
|
|
static int
|
|
|
|
|
smp_fetch_priority_class(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
|
|
|
{
|
|
|
|
|
if (!smp->strm)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
smp->data.type = SMP_T_SINT;
|
|
|
|
|
smp->data.u.sint = smp->strm->priority_class;
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
smp_fetch_priority_offset(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
|
|
|
{
|
|
|
|
|
if (!smp->strm)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
smp->data.type = SMP_T_SINT;
|
|
|
|
|
smp->data.u.sint = smp->strm->priority_offset;
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static struct sample_fetch_kw_list smp_kws = {ILH, {
|
|
|
|
|
{ "prio_class", smp_fetch_priority_class, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
|
|
|
|
|
{ "prio_offset", smp_fetch_priority_offset, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
|
|
|
|
|
{ /* END */},
|
|
|
|
|
}};
|
|
|
|
|
|
2018-11-25 13:14:37 -05:00
|
|
|
INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
|
2018-05-11 12:52:31 -04:00
|
|
|
|
2006-06-25 20:48:02 -04:00
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* c-indent-level: 8
|
|
|
|
|
* c-basic-offset: 8
|
|
|
|
|
* End:
|
|
|
|
|
*/
|