haproxy/src/backend.c

3402 lines
101 KiB
C
Raw Normal View History

/*
* Backend variables and functions.
*
* Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <syslog.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <import/ebmbtree.h>
#include <haproxy/api.h>
#include <haproxy/acl.h>
#include <haproxy/activity.h>
#include <haproxy/arg.h>
#include <haproxy/backend.h>
#include <haproxy/channel.h>
#include <haproxy/check.h>
#include <haproxy/frontend.h>
#include <haproxy/global.h>
#include <haproxy/hash.h>
#include <haproxy/http.h>
#include <haproxy/http_ana.h>
#include <haproxy/http_htx.h>
#include <haproxy/htx.h>
#include <haproxy/lb_chash.h>
#include <haproxy/lb_fas.h>
#include <haproxy/lb_fwlc.h>
#include <haproxy/lb_fwrr.h>
#include <haproxy/lb_map.h>
#include <haproxy/log.h>
#include <haproxy/namespace.h>
#include <haproxy/obj_type.h>
#include <haproxy/payload.h>
#include <haproxy/proto_tcp.h>
#include <haproxy/protocol.h>
#include <haproxy/proxy.h>
#include <haproxy/queue.h>
#include <haproxy/sample.h>
#include <haproxy/sc_strm.h>
#include <haproxy/server.h>
#include <haproxy/session.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/stconn.h>
#include <haproxy/stream.h>
#include <haproxy/task.h>
#include <haproxy/ticks.h>
#include <haproxy/time.h>
#include <haproxy/trace.h>
#define TRACE_SOURCE &trace_strm
int be_lastsession(const struct proxy *be)
{
if (be->be_counters.last_sess)
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
return ns_to_sec(now_ns) - be->be_counters.last_sess;
return -1;
}
/* helper function to invoke the correct hash method */
unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len)
{
unsigned int hash;
switch (px->lbprm.algo & BE_LB_HASH_FUNC) {
case BE_LB_HFCN_DJB2:
hash = hash_djb2(key, len);
break;
case BE_LB_HFCN_WT6:
hash = hash_wt6(key, len);
break;
case BE_LB_HFCN_CRC32:
hash = hash_crc32(key, len);
break;
case BE_LB_HFCN_NONE:
/* use key as a hash */
{
const char *_key = key;
hash = read_int64(&_key, _key + len);
}
break;
case BE_LB_HFCN_SDBM:
/* this is the default hash function */
default:
hash = hash_sdbm(key, len);
break;
}
if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
hash = full_hash(hash);
return hash;
}
/*
* This function recounts the number of usable active and backup servers for
* proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
* This function also recomputes the total active and backup weights. However,
* it does not update tot_weight nor tot_used. Use update_backend_weight() for
* this.
* This functions is designed to be called before server's weight and state
* commit so it uses 'next' weight and states values.
*
* threads: this is the caller responsibility to lock data. For now, this
* function is called from lb modules, so it should be ok. But if you need to
* call it from another place, be careful (and update this comment).
*/
void recount_servers(struct proxy *px)
{
struct server *srv;
px->srv_act = px->srv_bck = 0;
px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
px->lbprm.fbck = NULL;
for (srv = px->srv; srv != NULL; srv = srv->next) {
if (!srv_willbe_usable(srv))
continue;
if (srv->flags & SRV_F_BACKUP) {
if (!px->srv_bck &&
!(px->options & PR_O_USE_ALL_BK))
px->lbprm.fbck = srv;
px->srv_bck++;
srv->cumulative_weight = px->lbprm.tot_wbck;
px->lbprm.tot_wbck += srv->next_eweight;
} else {
px->srv_act++;
srv->cumulative_weight = px->lbprm.tot_wact;
px->lbprm.tot_wact += srv->next_eweight;
}
}
}
/* This function simply updates the backend's tot_weight and tot_used values
* after servers weights have been updated. It is designed to be used after
* recount_servers() or equivalent.
*
* threads: this is the caller responsibility to lock data. For now, this
* function is called from lb modules, so it should be ok. But if you need to
* call it from another place, be careful (and update this comment).
*/
void update_backend_weight(struct proxy *px)
{
if (px->srv_act) {
px->lbprm.tot_weight = px->lbprm.tot_wact;
px->lbprm.tot_used = px->srv_act;
}
else if (px->lbprm.fbck) {
/* use only the first backup server */
px->lbprm.tot_weight = px->lbprm.fbck->next_eweight;
px->lbprm.tot_used = 1;
}
else {
px->lbprm.tot_weight = px->lbprm.tot_wbck;
px->lbprm.tot_used = px->srv_bck;
}
}
/*
* This function tries to find a running server for the proxy <px> following
* the source hash method. Depending on the number of active/backup servers,
* it will either look for active servers, or for backup servers.
* If any server is found, it will be returned. If no valid server is found,
* NULL is returned.
*/
struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid)
{
unsigned int h, l;
if (px->lbprm.tot_weight == 0)
return NULL;
l = h = 0;
/* note: we won't hash if there's only one server left */
if (px->lbprm.tot_used == 1)
goto hash_done;
while ((l + sizeof (int)) <= len) {
h ^= ntohl(*(unsigned int *)(&addr[l]));
l += sizeof (int);
}
/* FIXME: why don't we use gen_hash() here as well?
* -> we don't take into account hash function from "hash_type"
* options here..
*/
2013-11-05 11:54:02 -05:00
if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
h = full_hash(h);
hash_done:
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, h, avoid);
else
return map_get_server_hash(px, h);
}
/*
* This function tries to find a running server for the proxy <px> following
* the URI hash method. In order to optimize cache hits, the hash computation
* ends at the question mark. Depending on the number of active/backup servers,
* it will either look for active servers, or for backup servers.
* If any server is found, it will be returned. If no valid server is found,
* NULL is returned. The lbprm.arg_opt{1,2,3} values correspond respectively to
* the "whole" optional argument (boolean, bit0), the "len" argument (numeric)
* and the "depth" argument (numeric).
*
* This code was contributed by Guillaume Dallaire, who also selected this hash
* algorithm out of a tens because it gave him the best results.
*
*/
struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid)
{
unsigned int hash = 0;
int c;
int slashes = 0;
const char *start, *end;
if (px->lbprm.tot_weight == 0)
return NULL;
/* note: we won't hash if there's only one server left */
if (px->lbprm.tot_used == 1)
goto hash_done;
if (px->lbprm.arg_opt2) // "len"
uri_len = MIN(uri_len, px->lbprm.arg_opt2);
start = end = uri;
while (uri_len--) {
c = *end;
if (c == '/') {
slashes++;
if (slashes == px->lbprm.arg_opt3) /* depth+1 */
break;
}
else if (c == '?' && !(px->lbprm.arg_opt1 & 1)) // "whole"
break;
end++;
}
hash = gen_hash(px, start, (end - start));
hash_done:
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
/*
* This function tries to find a running server for the proxy <px> following
* the URL parameter hash method. It looks for a specific parameter in the
* URL and hashes it to compute the server ID. This is useful to optimize
* performance by avoiding bounces between servers in contexts where sessions
* are shared but cookies are not usable. If the parameter is not found, NULL
* is returned. If any server is found, it will be returned. If no valid server
* is found, NULL is returned.
*/
struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid)
{
unsigned int hash = 0;
const char *start, *end;
const char *p;
const char *params;
int plen;
/* when tot_weight is 0 then so is srv_count */
if (px->lbprm.tot_weight == 0)
return NULL;
if ((p = memchr(uri, '?', uri_len)) == NULL)
return NULL;
p++;
uri_len -= (p - uri);
plen = px->lbprm.arg_len;
params = p;
while (uri_len > plen) {
/* Look for the parameter name followed by an equal symbol */
if (params[plen] == '=') {
if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
/* OK, we have the parameter here at <params>, and
* the value after the equal sign, at <p>
* skip the equal symbol
*/
p += plen + 1;
start = end = p;
uri_len -= plen + 1;
while (uri_len && *end != '&') {
uri_len--;
end++;
}
hash = gen_hash(px, start, (end - start));
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
}
/* skip to next parameter */
p = memchr(params, '&', uri_len);
if (!p)
return NULL;
p++;
uri_len -= (p - params);
params = p;
}
return NULL;
}
/*
* this does the same as the previous server_ph, but check the body contents
*/
struct server *get_server_ph_post(struct stream *s, const struct server *avoid)
{
unsigned int hash = 0;
struct channel *req = &s->req;
struct proxy *px = s->be;
struct htx *htx = htxbuf(&req->buf);
struct htx_blk *blk;
unsigned int plen = px->lbprm.arg_len;
unsigned long len;
const char *params, *p, *start, *end;
if (px->lbprm.tot_weight == 0)
return NULL;
p = params = NULL;
len = 0;
for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
enum htx_blk_type type = htx_get_blk_type(blk);
struct ist v;
if (type != HTX_BLK_DATA)
continue;
v = htx_get_blk_value(htx, blk);
p = params = v.ptr;
len = v.len;
break;
}
while (len > plen) {
/* Look for the parameter name followed by an equal symbol */
if (params[plen] == '=') {
if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
/* OK, we have the parameter here at <params>, and
* the value after the equal sign, at <p>
* skip the equal symbol
*/
p += plen + 1;
start = end = p;
len -= plen + 1;
while (len && *end != '&') {
if (unlikely(!HTTP_IS_TOKEN(*p))) {
/* if in a POST, body must be URI encoded or it's not a URI.
* Do not interpret any possible binary data as a parameter.
*/
if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
break;
return NULL; /* oh, no; this is not uri-encoded.
* This body does not contain parameters.
*/
}
len--;
end++;
/* should we break if vlen exceeds limit? */
}
hash = gen_hash(px, start, (end - start));
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
}
/* skip to next parameter */
p = memchr(params, '&', len);
if (!p)
return NULL;
p++;
len -= (p - params);
params = p;
}
return NULL;
}
/*
* This function tries to find a running server for the proxy <px> following
* the Header parameter hash method. It looks for a specific parameter in the
* URL and hashes it to compute the server ID. This is useful to optimize
* performance by avoiding bounces between servers in contexts where sessions
* are shared but cookies are not usable. If the parameter is not found, NULL
* is returned. If any server is found, it will be returned. If no valid server
* is found, NULL is returned. When lbprm.arg_opt1 is set, the hash will only
* apply to the middle part of a domain name ("use_domain_only" option).
*/
struct server *get_server_hh(struct stream *s, const struct server *avoid)
{
unsigned int hash = 0;
struct proxy *px = s->be;
unsigned int plen = px->lbprm.arg_len;
unsigned long len;
const char *p;
const char *start, *end;
struct htx *htx = htxbuf(&s->req.buf);
struct http_hdr_ctx ctx = { .blk = NULL };
/* tot_weight appears to mean srv_count */
if (px->lbprm.tot_weight == 0)
return NULL;
/* note: we won't hash if there's only one server left */
if (px->lbprm.tot_used == 1)
goto hash_done;
http_find_header(htx, ist2(px->lbprm.arg_str, plen), &ctx, 0);
/* if the header is not found or empty, let's fallback to round robin */
if (!ctx.blk || !ctx.value.len)
return NULL;
/* Found a the param_name in the headers.
* we will compute the hash based on this value ctx.val.
*/
len = ctx.value.len;
p = ctx.value.ptr;
if (!px->lbprm.arg_opt1) {
hash = gen_hash(px, p, len);
} else {
int dohash = 0;
p += len;
/* special computation, use only main domain name, not tld/host
* going back from the end of string, start hashing at first
* dot stop at next.
* This is designed to work with the 'Host' header, and requires
* a special option to activate this.
*/
end = p;
while (len) {
if (dohash) {
/* Rewind the pointer until the previous char
* is a dot, this will allow to set the start
* position of the domain. */
if (*(p - 1) == '.')
break;
}
else if (*p == '.') {
/* The pointer is rewinded to the dot before the
* tld, we memorize the end of the domain and
* can enter the domain processing. */
end = p;
dohash = 1;
}
p--;
len--;
}
start = p;
hash = gen_hash(px, start, (end - start));
}
hash_done:
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
/* RDP Cookie HASH. */
struct server *get_server_rch(struct stream *s, const struct server *avoid)
{
unsigned int hash = 0;
struct proxy *px = s->be;
unsigned long len;
int ret;
struct sample smp;
BUG/MAJOR: fix regression on content-based hashing and http-send-name-header The recent split between the buffers and HTTP messages in 1.5-dev9 caused a major trouble : in the past, we used to keep a pointer to HTTP data in the buffer struct itself, which was the cause of most of the pain we had to deal with buffers. Now the two are split but we lost the information about the beginning of the HTTP message once it's being forwarded. While it seems normal, it happens that several parts of the code currently rely on this ability to inspect a buffer containing old contents : - balance uri - balance url_param - balance url_param check_post - balance hdr() - balance rdp-cookie() - http-send-name-header All these happen after the data are scheduled for being forwarded, which also causes a server to be selected. So for a long time we've been relying on supposedly sent data that we still had a pointer to. Now that we don't have such a pointer anymore, we only have one possibility : when we need to inspect such data, we have to rewind the buffer so that ->p points to where it previously was. We're lucky, no data can leave the buffer before it's being connecting outside, and since no inspection can begin until it's empty, we know that the skipped data are exactly ->o. So we rewind the buffer by ->o to get headers and advance it back by the same amount. Proceeding this way is particularly important when dealing with chunked- encoded requests, because the ->som and ->sov fields may be reused by the chunk parser before the connection attempt is made, so we cannot rely on them. Also, we need to be able to come back after retries and redispatches, which might change the size of the request if http-send-name-header is set. All of this is accounted for by the output queue so in the end it does not look like a bad solution. No backport is needed.
2012-05-18 16:12:14 -04:00
int rewind;
/* tot_weight appears to mean srv_count */
if (px->lbprm.tot_weight == 0)
return NULL;
memset(&smp, 0, sizeof(smp));
rewind = co_data(&s->req);
c_rew(&s->req, rewind);
BUG/MAJOR: fix regression on content-based hashing and http-send-name-header The recent split between the buffers and HTTP messages in 1.5-dev9 caused a major trouble : in the past, we used to keep a pointer to HTTP data in the buffer struct itself, which was the cause of most of the pain we had to deal with buffers. Now the two are split but we lost the information about the beginning of the HTTP message once it's being forwarded. While it seems normal, it happens that several parts of the code currently rely on this ability to inspect a buffer containing old contents : - balance uri - balance url_param - balance url_param check_post - balance hdr() - balance rdp-cookie() - http-send-name-header All these happen after the data are scheduled for being forwarded, which also causes a server to be selected. So for a long time we've been relying on supposedly sent data that we still had a pointer to. Now that we don't have such a pointer anymore, we only have one possibility : when we need to inspect such data, we have to rewind the buffer so that ->p points to where it previously was. We're lucky, no data can leave the buffer before it's being connecting outside, and since no inspection can begin until it's empty, we know that the skipped data are exactly ->o. So we rewind the buffer by ->o to get headers and advance it back by the same amount. Proceeding this way is particularly important when dealing with chunked- encoded requests, because the ->som and ->sov fields may be reused by the chunk parser before the connection attempt is made, so we cannot rely on them. Also, we need to be able to come back after retries and redispatches, which might change the size of the request if http-send-name-header is set. All of this is accounted for by the output queue so in the end it does not look like a bad solution. No backport is needed.
2012-05-18 16:12:14 -04:00
ret = fetch_rdp_cookie_name(s, &smp, px->lbprm.arg_str, px->lbprm.arg_len);
len = smp.data.u.str.data;
c_adv(&s->req, rewind);
BUG/MAJOR: fix regression on content-based hashing and http-send-name-header The recent split between the buffers and HTTP messages in 1.5-dev9 caused a major trouble : in the past, we used to keep a pointer to HTTP data in the buffer struct itself, which was the cause of most of the pain we had to deal with buffers. Now the two are split but we lost the information about the beginning of the HTTP message once it's being forwarded. While it seems normal, it happens that several parts of the code currently rely on this ability to inspect a buffer containing old contents : - balance uri - balance url_param - balance url_param check_post - balance hdr() - balance rdp-cookie() - http-send-name-header All these happen after the data are scheduled for being forwarded, which also causes a server to be selected. So for a long time we've been relying on supposedly sent data that we still had a pointer to. Now that we don't have such a pointer anymore, we only have one possibility : when we need to inspect such data, we have to rewind the buffer so that ->p points to where it previously was. We're lucky, no data can leave the buffer before it's being connecting outside, and since no inspection can begin until it's empty, we know that the skipped data are exactly ->o. So we rewind the buffer by ->o to get headers and advance it back by the same amount. Proceeding this way is particularly important when dealing with chunked- encoded requests, because the ->som and ->sov fields may be reused by the chunk parser before the connection attempt is made, so we cannot rely on them. Also, we need to be able to come back after retries and redispatches, which might change the size of the request if http-send-name-header is set. All of this is accounted for by the output queue so in the end it does not look like a bad solution. No backport is needed.
2012-05-18 16:12:14 -04:00
if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || len == 0)
return NULL;
/* note: we won't hash if there's only one server left */
if (px->lbprm.tot_used == 1)
goto hash_done;
/* Found the param_name in the headers.
* we will compute the hash based on this value ctx.val.
*/
hash = gen_hash(px, smp.data.u.str.area, len);
hash_done:
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
/* sample expression HASH. Returns NULL if the sample is not found or if there
* are no server, relying on the caller to fall back to round robin instead.
*/
struct server *get_server_expr(struct stream *s, const struct server *avoid)
{
struct proxy *px = s->be;
struct sample *smp;
unsigned int hash = 0;
if (px->lbprm.tot_weight == 0)
return NULL;
/* note: no need to hash if there's only one server left */
if (px->lbprm.tot_used == 1)
goto hash_done;
smp = sample_fetch_as_type(px, s->sess, s, SMP_OPT_DIR_REQ | SMP_OPT_FINAL, px->lbprm.expr, SMP_T_BIN);
if (!smp)
return NULL;
/* We have the desired data. Let's hash it according to the configured
* options and algorithm.
*/
hash = gen_hash(px, smp->data.u.str.area, smp->data.u.str.data);
hash_done:
if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
return chash_get_server_hash(px, hash, avoid);
else
return map_get_server_hash(px, hash);
}
/* random value */
struct server *get_server_rnd(struct stream *s, const struct server *avoid)
{
unsigned int hash = 0;
struct proxy *px = s->be;
struct server *prev, *curr;
int draws = px->lbprm.arg_opt1; // number of draws
/* tot_weight appears to mean srv_count */
if (px->lbprm.tot_weight == 0)
return NULL;
curr = NULL;
do {
prev = curr;
hash = statistical_prng();
curr = chash_get_server_hash(px, hash, avoid);
if (!curr)
break;
/* compare the new server to the previous best choice and pick
* the one with the least currently served requests.
*/
if (prev && prev != curr &&
curr->served * prev->cur_eweight > prev->served * curr->cur_eweight)
curr = prev;
} while (--draws > 0);
/* if the selected server is full, pretend we have none so that we reach
* the backend's queue instead.
*/
if (curr &&
(curr->queue.length || (curr->maxconn && curr->served >= srv_dynamic_maxconn(curr))))
curr = NULL;
return curr;
}
/*
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* This function applies the load-balancing algorithm to the stream, as
* defined by the backend it is assigned to. The stream is then marked as
* 'assigned'.
*
* This function MAY NOT be called with SF_ASSIGNED already set. If the stream
* had a server previously assigned, it is rebalanced, trying to avoid the same
* server, which should still be present in target_srv(&s->target) before the call.
* The function tries to keep the original connection slot if it reconnects to
* the same server, otherwise it releases it and tries to offer it.
*
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* It is illegal to call this function with a stream in a queue.
*
* It may return :
* SRV_STATUS_OK if everything is OK. ->srv and ->target are assigned.
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* SRV_STATUS_NOSRV if no server is available. Stream is not ASSIGNED
* SRV_STATUS_FULL if all servers are saturated. Stream is not ASSIGNED
* SRV_STATUS_INTERNAL for other unrecoverable errors.
*
* Upon successful return, the stream flag SF_ASSIGNED is set to indicate that
* it does not need to be called anymore. This means that target_srv(&s->target)
* can be trusted in balance and direct modes.
*
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
int assign_server(struct stream *s)
{
struct connection *conn = NULL;
struct server *conn_slot;
struct server *srv = NULL, *prev_srv;
int err;
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3 When haproxy decides that session needs to be redispatched it chose a server, but there is no guarantee for it to be a different one. So, it often happens that selected server is exactly the same that it was previously, so a client ends up with a 503 error anyway, especially when one sever has much bigger weight than others. Changes from the previous version: - drop stupid and unnecessary SN_DIRECT changes - assign_server(): use srvtoavoid to keep the old server and clear s->srv so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again) and get_server_rr_with_conns has chances to work (previously we were passing a NULL here) - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns incrementing as t->srv was guaranteed to be NULL - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code. - fix http_flush_cookie_flags() and move it to assign_server_and_queue() directly. The code here was supposed to set CK_DOWN and clear CK_VALID, but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags ^= (TX_CK_VALID | TX_CK_DOWN); was really a: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags &= TX_CK_VALID Now haproxy logs "--DI" after redispatching connection. - defer srv->redispatches++ and s->be->redispatches++ so there are called only if a conenction was redispatched, not only supposed to. - don't increment lbconn if redispatcher selected the same sarver - don't count unsuccessfully redispatched connections as redispatched connections - don't count redispatched connections as errors, so: - the number of connections effectively served by a server is: srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches and SUM(servers->failed_conns) == be->failed_conns - requires the "Don't increment server connections too much + fix retries" patch - needs little more testing and probably some discussion so reverting to the RFC state Tests #1: retries 4 redispatch i) 1 server(s): b (wght=1, down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request failed ii) server(s): b (wght=1, down), u (wght=1, down) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0 -> request FAILED iii) 2 server(s): b (wght=1, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK iv) 2 server(s): b (wght=100, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK v) 1 server(s): b (down for first 4 SYNS) b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0 -> request OK Tests #2: retries 4 i) 1 server(s): b (down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request FAILED
2008-02-21 21:50:19 -05:00
err = SRV_STATUS_INTERNAL;
if (unlikely(s->pend_pos || s->flags & SF_ASSIGNED))
goto out_err;
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3 When haproxy decides that session needs to be redispatched it chose a server, but there is no guarantee for it to be a different one. So, it often happens that selected server is exactly the same that it was previously, so a client ends up with a 503 error anyway, especially when one sever has much bigger weight than others. Changes from the previous version: - drop stupid and unnecessary SN_DIRECT changes - assign_server(): use srvtoavoid to keep the old server and clear s->srv so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again) and get_server_rr_with_conns has chances to work (previously we were passing a NULL here) - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns incrementing as t->srv was guaranteed to be NULL - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code. - fix http_flush_cookie_flags() and move it to assign_server_and_queue() directly. The code here was supposed to set CK_DOWN and clear CK_VALID, but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags ^= (TX_CK_VALID | TX_CK_DOWN); was really a: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags &= TX_CK_VALID Now haproxy logs "--DI" after redispatching connection. - defer srv->redispatches++ and s->be->redispatches++ so there are called only if a conenction was redispatched, not only supposed to. - don't increment lbconn if redispatcher selected the same sarver - don't count unsuccessfully redispatched connections as redispatched connections - don't count redispatched connections as errors, so: - the number of connections effectively served by a server is: srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches and SUM(servers->failed_conns) == be->failed_conns - requires the "Don't increment server connections too much + fix retries" patch - needs little more testing and probably some discussion so reverting to the RFC state Tests #1: retries 4 redispatch i) 1 server(s): b (wght=1, down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request failed ii) server(s): b (wght=1, down), u (wght=1, down) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0 -> request FAILED iii) 2 server(s): b (wght=1, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK iv) 2 server(s): b (wght=100, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK v) 1 server(s): b (down for first 4 SYNS) b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0 -> request OK Tests #2: retries 4 i) 1 server(s): b (down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request FAILED
2008-02-21 21:50:19 -05:00
prev_srv = objt_server(s->target);
conn_slot = s->srv_conn;
/* We have to release any connection slot before applying any LB algo,
* otherwise we may erroneously end up with no available slot.
*/
if (conn_slot)
sess_change_server(s, NULL);
/* We will now try to find the good server and store it into <objt_server(s->target)>.
* Note that <objt_server(s->target)> may be NULL in case of dispatch or proxy mode,
* as well as if no server is available (check error code).
*/
srv = NULL;
s->target = NULL;
if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI &&
((s->sess->flags & SESS_FL_PREFER_LAST) ||
(s->be->options & PR_O_PREF_LAST))) {
struct sess_srv_list *srv_list;
list_for_each_entry(srv_list, &s->sess->srv_list, srv_list) {
struct server *tmpsrv = objt_server(srv_list->target);
if (tmpsrv && tmpsrv->proxy == s->be &&
((s->sess->flags & SESS_FL_PREFER_LAST) ||
(!s->be->max_ka_queue ||
server_has_room(tmpsrv) || (
tmpsrv->queue.length + 1 < s->be->max_ka_queue))) &&
srv_currently_usable(tmpsrv)) {
list_for_each_entry(conn, &srv_list->conn_list, session_list) {
if (!(conn->flags & CO_FL_WAIT_XPRT)) {
srv = tmpsrv;
s->target = &srv->obj_type;
if (conn->flags & CO_FL_SESS_IDLE) {
conn->flags &= ~CO_FL_SESS_IDLE;
s->sess->idle_conns--;
}
goto out_ok;
}
}
}
}
}
if (s->be->lbprm.algo & BE_LB_KIND) {
/* we must check if we have at least one server available */
if (!s->be->lbprm.tot_weight) {
err = SRV_STATUS_NOSRV;
goto out;
}
/* if there's some queue on the backend, with certain algos we
* know it's because all servers are full.
*/
if (s->be->queue.length && s->be->queue.length != s->be->beconn &&
(((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_FAS)|| // first
((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_RR) || // roundrobin
((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_SRR))) { // static-rr
err = SRV_STATUS_FULL;
goto out;
}
/* First check whether we need to fetch some data or simply call
* the LB lookup function. Only the hashing functions will need
* some input data in fact, and will support multiple algorithms.
*/
switch (s->be->lbprm.algo & BE_LB_LKUP) {
case BE_LB_LKUP_RRTREE:
srv = fwrr_get_next_server(s->be, prev_srv);
break;
case BE_LB_LKUP_FSTREE:
srv = fas_get_next_server(s->be, prev_srv);
break;
case BE_LB_LKUP_LCTREE:
srv = fwlc_get_next_server(s->be, prev_srv);
break;
case BE_LB_LKUP_CHTREE:
case BE_LB_LKUP_MAP:
if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR) {
/* static-rr (map) or random (chash) */
if ((s->be->lbprm.algo & BE_LB_PARM) == BE_LB_RR_RANDOM)
srv = get_server_rnd(s, prev_srv);
else
srv = map_get_server_rr(s->be, prev_srv);
break;
}
else if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI) {
/* unknown balancing algorithm */
err = SRV_STATUS_INTERNAL;
goto out;
}
switch (s->be->lbprm.algo & BE_LB_PARM) {
const struct sockaddr_storage *src;
case BE_LB_HASH_SRC:
src = sc_src(s->scf);
if (src && src->ss_family == AF_INET) {
srv = get_server_sh(s->be,
(void *)&((struct sockaddr_in *)src)->sin_addr,
4, prev_srv);
}
else if (src && src->ss_family == AF_INET6) {
srv = get_server_sh(s->be,
(void *)&((struct sockaddr_in6 *)src)->sin6_addr,
16, prev_srv);
}
break;
case BE_LB_HASH_URI:
/* URI hashing */
if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
struct ist uri;
uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
if (s->be->lbprm.arg_opt1 & 2) {
struct http_uri_parser parser =
http_uri_parser_init(uri);
uri = http_parse_path(&parser);
if (!isttest(uri))
uri = ist("");
}
srv = get_server_uh(s->be, uri.ptr, uri.len, prev_srv);
}
break;
case BE_LB_HASH_PRM:
/* URL Parameter hashing */
if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
struct ist uri;
uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
srv = get_server_ph(s->be, uri.ptr, uri.len, prev_srv);
if (!srv && s->txn->meth == HTTP_METH_POST)
srv = get_server_ph_post(s, prev_srv);
}
break;
case BE_LB_HASH_HDR:
/* Header Parameter hashing */
if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY)
srv = get_server_hh(s, prev_srv);
break;
case BE_LB_HASH_RDP:
/* RDP Cookie hashing */
srv = get_server_rch(s, prev_srv);
break;
case BE_LB_HASH_SMP:
/* sample expression hashing */
srv = get_server_expr(s, prev_srv);
break;
default:
/* unknown balancing algorithm */
err = SRV_STATUS_INTERNAL;
goto out;
}
/* If the hashing parameter was not found, let's fall
* back to round robin on the map.
*/
if (!srv) {
if ((s->be->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
srv = chash_get_next_server(s->be, prev_srv);
else
srv = map_get_server_rr(s->be, prev_srv);
}
/* end of map-based LB */
break;
default:
/* unknown balancing algorithm */
err = SRV_STATUS_INTERNAL;
goto out;
}
if (!srv) {
err = SRV_STATUS_FULL;
goto out;
}
else if (srv != prev_srv) {
_HA_ATOMIC_INC(&s->be->be_counters.cum_lbconn);
_HA_ATOMIC_INC(&srv->counters.cum_lbconn);
}
s->target = &srv->obj_type;
}
else if (s->be->options & (PR_O_DISPATCH | PR_O_TRANSP)) {
s->target = &s->be->obj_type;
}
else {
err = SRV_STATUS_NOSRV;
goto out;
}
out_ok:
s->flags |= SF_ASSIGNED;
err = SRV_STATUS_OK;
out:
/* Either we take back our connection slot, or we offer it to someone
* else if we don't need it anymore.
*/
if (conn_slot) {
if (conn_slot == srv) {
sess_change_server(s, srv);
} else {
if (may_dequeue_tasks(conn_slot, s->be))
process_srv_queue(conn_slot);
}
}
out_err:
return err;
}
/* Allocate an address for the destination endpoint
* The address is taken from the currently assigned server, or from the
* dispatch or transparent address.
*
* Returns SRV_STATUS_OK on success. Does nothing if the address was
* already set.
* On error, no address is allocated and SRV_STATUS_INTERNAL is returned.
*/
static int alloc_dst_address(struct sockaddr_storage **ss,
struct server *srv, struct stream *s)
{
const struct sockaddr_storage *dst;
if (*ss)
return SRV_STATUS_OK;
if ((s->flags & SF_DIRECT) || (s->be->lbprm.algo & BE_LB_KIND)) {
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* A server is necessarily known for this stream */
if (!(s->flags & SF_ASSIGNED))
return SRV_STATUS_INTERNAL;
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
**ss = srv->addr;
set_host_port(*ss, srv->svc_port);
if (!is_addr(*ss)) {
/* if the server has no address, we use the same address
* the client asked, which is handy for remapping ports
* locally on multiple addresses at once. Nothing is done
* for AF_UNIX addresses.
*/
dst = sc_dst(s->scf);
if (dst && dst->ss_family == AF_INET) {
((struct sockaddr_in *)*ss)->sin_family = AF_INET;
((struct sockaddr_in *)*ss)->sin_addr =
((struct sockaddr_in *)dst)->sin_addr;
} else if (dst && dst->ss_family == AF_INET6) {
((struct sockaddr_in6 *)*ss)->sin6_family = AF_INET6;
((struct sockaddr_in6 *)*ss)->sin6_addr =
((struct sockaddr_in6 *)dst)->sin6_addr;
}
}
/* if this server remaps proxied ports, we'll use
* the port the client connected to with an offset. */
if ((srv->flags & SRV_F_MAPPORTS)) {
int base_port;
dst = sc_dst(s->scf);
if (dst) {
/* First, retrieve the port from the incoming connection */
base_port = get_host_port(dst);
/* Second, assign the outgoing connection's port */
base_port += get_host_port(*ss);
set_host_port(*ss, base_port);
}
}
}
else if (s->be->options & PR_O_DISPATCH) {
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
/* connect to the defined dispatch addr */
**ss = s->be->dispatch_addr;
}
else if ((s->be->options & PR_O_TRANSP)) {
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
/* in transparent mode, use the original dest addr if no dispatch specified */
dst = sc_dst(s->scf);
if (dst && (dst->ss_family == AF_INET || dst->ss_family == AF_INET6))
**ss = *dst;
}
else {
/* no server and no LB algorithm ! */
return SRV_STATUS_INTERNAL;
}
return SRV_STATUS_OK;
}
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* This function assigns a server to stream <s> if required, and can add the
* connection to either the assigned server's queue or to the proxy's queue.
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* If ->srv_conn is set, the stream is first released from the server.
* It may also be called with SF_DIRECT and/or SF_ASSIGNED though. It will
* be called before any connection and after any retry or redispatch occurs.
*
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* It is not allowed to call this function with a stream in a queue.
*
* Returns :
*
* SRV_STATUS_OK if everything is OK.
* SRV_STATUS_NOSRV if no server is available. objt_server(s->target) = NULL.
* SRV_STATUS_QUEUED if the connection has been queued.
* SRV_STATUS_FULL if the server(s) is/are saturated and the
* connection could not be queued at the server's,
* which may be NULL if we queue on the backend.
* SRV_STATUS_INTERNAL for other unrecoverable errors.
*
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
int assign_server_and_queue(struct stream *s)
{
struct pendconn *p;
struct server *srv;
int err;
if (s->pend_pos)
return SRV_STATUS_INTERNAL;
err = SRV_STATUS_OK;
if (!(s->flags & SF_ASSIGNED)) {
struct server *prev_srv = objt_server(s->target);
err = assign_server(s);
if (prev_srv) {
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* This stream was previously assigned to a server. We have to
* update the stream's and the server's stats :
* - if the server changed :
* - set TX_CK_DOWN if txn.flags was TX_CK_VALID
* - set SF_REDISP if it was successfully redispatched
* - increment srv->redispatches and be->redispatches
* - if the server remained the same : update retries.
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3 When haproxy decides that session needs to be redispatched it chose a server, but there is no guarantee for it to be a different one. So, it often happens that selected server is exactly the same that it was previously, so a client ends up with a 503 error anyway, especially when one sever has much bigger weight than others. Changes from the previous version: - drop stupid and unnecessary SN_DIRECT changes - assign_server(): use srvtoavoid to keep the old server and clear s->srv so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again) and get_server_rr_with_conns has chances to work (previously we were passing a NULL here) - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns incrementing as t->srv was guaranteed to be NULL - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code. - fix http_flush_cookie_flags() and move it to assign_server_and_queue() directly. The code here was supposed to set CK_DOWN and clear CK_VALID, but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags ^= (TX_CK_VALID | TX_CK_DOWN); was really a: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags &= TX_CK_VALID Now haproxy logs "--DI" after redispatching connection. - defer srv->redispatches++ and s->be->redispatches++ so there are called only if a conenction was redispatched, not only supposed to. - don't increment lbconn if redispatcher selected the same sarver - don't count unsuccessfully redispatched connections as redispatched connections - don't count redispatched connections as errors, so: - the number of connections effectively served by a server is: srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches and SUM(servers->failed_conns) == be->failed_conns - requires the "Don't increment server connections too much + fix retries" patch - needs little more testing and probably some discussion so reverting to the RFC state Tests #1: retries 4 redispatch i) 1 server(s): b (wght=1, down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request failed ii) server(s): b (wght=1, down), u (wght=1, down) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0 -> request FAILED iii) 2 server(s): b (wght=1, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK iv) 2 server(s): b (wght=100, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK v) 1 server(s): b (down for first 4 SYNS) b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0 -> request OK Tests #2: retries 4 i) 1 server(s): b (down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request FAILED
2008-02-21 21:50:19 -05:00
*/
if (prev_srv != objt_server(s->target)) {
if (s->txn && (s->txn->flags & TX_CK_MASK) == TX_CK_VALID) {
s->txn->flags &= ~TX_CK_MASK;
s->txn->flags |= TX_CK_DOWN;
}
s->flags |= SF_REDISP;
_HA_ATOMIC_INC(&prev_srv->counters.redispatches);
_HA_ATOMIC_INC(&s->be->be_counters.redispatches);
} else {
_HA_ATOMIC_INC(&prev_srv->counters.retries);
_HA_ATOMIC_INC(&s->be->be_counters.retries);
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3 When haproxy decides that session needs to be redispatched it chose a server, but there is no guarantee for it to be a different one. So, it often happens that selected server is exactly the same that it was previously, so a client ends up with a 503 error anyway, especially when one sever has much bigger weight than others. Changes from the previous version: - drop stupid and unnecessary SN_DIRECT changes - assign_server(): use srvtoavoid to keep the old server and clear s->srv so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again) and get_server_rr_with_conns has chances to work (previously we were passing a NULL here) - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns incrementing as t->srv was guaranteed to be NULL - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code. - fix http_flush_cookie_flags() and move it to assign_server_and_queue() directly. The code here was supposed to set CK_DOWN and clear CK_VALID, but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags ^= (TX_CK_VALID | TX_CK_DOWN); was really a: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags &= TX_CK_VALID Now haproxy logs "--DI" after redispatching connection. - defer srv->redispatches++ and s->be->redispatches++ so there are called only if a conenction was redispatched, not only supposed to. - don't increment lbconn if redispatcher selected the same sarver - don't count unsuccessfully redispatched connections as redispatched connections - don't count redispatched connections as errors, so: - the number of connections effectively served by a server is: srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches and SUM(servers->failed_conns) == be->failed_conns - requires the "Don't increment server connections too much + fix retries" patch - needs little more testing and probably some discussion so reverting to the RFC state Tests #1: retries 4 redispatch i) 1 server(s): b (wght=1, down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request failed ii) server(s): b (wght=1, down), u (wght=1, down) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0 -> request FAILED iii) 2 server(s): b (wght=1, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK iv) 2 server(s): b (wght=100, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK v) 1 server(s): b (down for first 4 SYNS) b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0 -> request OK Tests #2: retries 4 i) 1 server(s): b (down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request FAILED
2008-02-21 21:50:19 -05:00
}
}
}
switch (err) {
case SRV_STATUS_OK:
/* we have SF_ASSIGNED set */
srv = objt_server(s->target);
if (!srv)
return SRV_STATUS_OK; /* dispatch or proxy mode */
/* If we already have a connection slot, no need to check any queue */
if (s->srv_conn == srv)
return SRV_STATUS_OK;
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* OK, this stream already has an assigned server, but no
* connection slot yet. Either it is a redispatch, or it was
* assigned from persistence information (direct mode).
*/
if ((s->flags & SF_REDIRECTABLE) && srv->rdr_len) {
/* server scheduled for redirection, and already assigned. We
* don't want to go further nor check the queue.
*/
sess_change_server(s, srv); /* not really needed in fact */
return SRV_STATUS_OK;
}
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* We might have to queue this stream if the assigned server is full.
* We know we have to queue it into the server's queue, so if a maxqueue
* is set on the server, we must also check that the server's queue is
* not full, in which case we have to return FULL.
*/
if (srv->maxconn &&
(srv->queue.length || srv->served >= srv_dynamic_maxconn(srv))) {
if (srv->maxqueue > 0 && srv->queue.length >= srv->maxqueue)
return SRV_STATUS_FULL;
p = pendconn_add(s);
if (p)
return SRV_STATUS_QUEUED;
else
return SRV_STATUS_INTERNAL;
}
/* OK, we can use this server. Let's reserve our place */
sess_change_server(s, srv);
return SRV_STATUS_OK;
case SRV_STATUS_FULL:
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* queue this stream into the proxy's queue */
p = pendconn_add(s);
if (p)
return SRV_STATUS_QUEUED;
else
return SRV_STATUS_INTERNAL;
case SRV_STATUS_NOSRV:
return err;
case SRV_STATUS_INTERNAL:
return err;
default:
return SRV_STATUS_INTERNAL;
}
}
/* Allocate an address if an explicit source address must be used for a backend
* connection.
*
* Two parameters are taken into account to check if specific source address is
* configured. The first one is <srv> which is the server instance to connect
* to. It may be NULL when dispatching is used. The second one <be> is the
* backend instance which contains the target server or dispatch.
*
* A stream instance <s> can be used to set the stream owner of the backend
* connection. It is a required parameter if the source address is a dynamic
* parameter.
*
* Returns SRV_STATUS_OK if either no specific source address specified or its
* allocation is done correctly. On error returns SRV_STATUS_INTERNAL.
*/
int alloc_bind_address(struct sockaddr_storage **ss,
struct server *srv, struct proxy *be,
struct stream *s)
{
#if defined(CONFIG_HAP_TRANSPARENT)
const struct sockaddr_storage *addr;
struct conn_src *src = NULL;
struct sockaddr_in *sin;
char *vptr;
size_t vlen;
#endif
/* Ensure the function will not overwrite an allocated address. */
BUG_ON(*ss);
#if defined(CONFIG_HAP_TRANSPARENT)
if (srv && srv->conn_src.opts & CO_SRC_BIND)
src = &srv->conn_src;
else if (be->conn_src.opts & CO_SRC_BIND)
src = &be->conn_src;
/* no transparent mode, no need to allocate an address, returns OK */
if (!src)
return SRV_STATUS_OK;
switch (src->opts & CO_SRC_TPROXY_MASK) {
case CO_SRC_TPROXY_ADDR:
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
**ss = src->tproxy_addr;
break;
case CO_SRC_TPROXY_CLI:
case CO_SRC_TPROXY_CIP:
BUG_ON(!s); /* Dynamic source setting requires a stream instance. */
/* FIXME: what can we do if the client connects in IPv6 or unix socket ? */
addr = sc_src(s->scf);
if (!addr)
return SRV_STATUS_INTERNAL;
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
**ss = *addr;
break;
case CO_SRC_TPROXY_DYN:
BUG_ON(!s); /* Dynamic source setting requires a stream instance. */
if (!src->bind_hdr_occ || !IS_HTX_STRM(s))
return SRV_STATUS_INTERNAL;
if (!sockaddr_alloc(ss, NULL, 0))
return SRV_STATUS_INTERNAL;
/* bind to the IP in a header */
sin = (struct sockaddr_in *)*ss;
sin->sin_family = AF_INET;
sin->sin_port = 0;
sin->sin_addr.s_addr = 0;
if (!http_get_htx_hdr(htxbuf(&s->req.buf),
ist2(src->bind_hdr_name, src->bind_hdr_len),
src->bind_hdr_occ, NULL, &vptr, &vlen)) {
sockaddr_free(ss);
return SRV_STATUS_INTERNAL;
}
sin->sin_addr.s_addr = htonl(inetaddr_host_lim(vptr, vptr + vlen));
break;
default:
;
}
#endif
return SRV_STATUS_OK;
}
/* Attempt to get a backend connection from the specified mt_list array
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
* (safe or idle connections). The <is_safe> argument means what type of
* connection the caller wants.
*/
struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash)
{
struct connection *conn = NULL;
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
int i; // thread number
int found = 0;
int stop;
/* We need to lock even if this is our own list, because another
* thread may be trying to migrate that connection, and we don't want
* to end up with two threads using the same connection.
*/
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
i = tid;
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
if (conn)
conn_delete_from_tree(conn);
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
/* If we failed to pick a connection from the idle list, let's try again with
* the safe list.
*/
if (!conn && !is_safe && srv->curr_safe_nb > 0) {
conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
if (conn) {
conn_delete_from_tree(conn);
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
is_safe = 1;
}
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
/* If we found a connection in our own list, and we don't have to
* steal one from another thread, then we're done.
*/
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
if (conn)
goto done;
/* pool sharing globally disabled ? */
if (!(global.tune.options & GTUNE_IDLE_POOL_SHARED))
goto done;
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
/* Are we allowed to pick from another thread ? We'll still try
* it if we're running low on FDs as we don't want to create
* extra conns in this case, otherwise we can give up if we have
* too few idle conns and the server protocol supports establishing
* connections (i.e. not a reverse-http server for example).
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
*/
if (srv->curr_idle_conns < srv->low_idle_conns &&
ha_used_fds < global.tune.pool_low_count) {
const struct protocol *srv_proto = protocol_lookup(srv->addr.ss_family, PROTO_TYPE_STREAM, 0);
if (srv_proto && srv_proto->connect)
goto done;
}
/* Lookup all other threads for an idle connection, starting from last
* unvisited thread, but always staying in the same group.
*/
stop = srv->per_tgrp[tgid - 1].next_takeover;
if (stop >= tg->count)
stop %= tg->count;
stop += tg->base;
i = stop;
do {
if (!srv->curr_idle_thr[i] || i == tid)
continue;
MEDIUM: backend: use a trylock when trying to grab an idle connection In conn_backend_get() we can cause some extreme contention due to the idle_conns_lock. Indeed, even though it's per-thread, it still causes high contention when running with many threads. The reason is that all threads which do not have any idle connections are quickly skipped, till the point where there are still some, so the first reaching that point will grab the lock and the other ones wait behind. From this point, all threads are synchronized waiting on the same lock, and will follow the leader in small jumps, all hindering each other. Here instead of doing this we're using a trylock. This way when a thread is already checking a list, other ones will continue to next thread. In the worst case, a high contention will lead to a few new connections to be set up, but this may actually be what is required to avoid contention in the first place. With this change, the contention has mostly disappeared on this lock (it's still present in muxes and transport layers due to the takeover). Surprisingly, checking for emptiness of the tree root before taking the lock didn't address any contention. A few improvements are still possible and desirable here. The first one would be to avoid seeing all threads jump to the next one. We could have each thread use a different prime number as the increment so as to spread them across the entire table instead of keeping them synchronized. The second one is that the lock in the muck layers shouldn't be needed to check for the tasklet's context availability.
2021-03-01 01:22:17 -05:00
if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
continue;
conn = srv_lookup_conn(is_safe ? &srv->per_thr[i].safe_conns : &srv->per_thr[i].idle_conns, hash);
while (conn) {
if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
conn_delete_from_tree(conn);
_HA_ATOMIC_INC(&activity[tid].fd_takeover);
found = 1;
break;
}
conn = srv_lookup_conn_next(conn);
}
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
if (!found && !is_safe && srv->curr_safe_nb > 0) {
conn = srv_lookup_conn(&srv->per_thr[i].safe_conns, hash);
while (conn) {
if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
conn_delete_from_tree(conn);
_HA_ATOMIC_INC(&activity[tid].fd_takeover);
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
found = 1;
is_safe = 1;
break;
}
conn = srv_lookup_conn_next(conn);
BUG/MEDIUM: backend: always search in the safe list after failing on the idle one There's a tricky behavior that was lost when the idle connections were made sharable between thread in commit 566df309c ("MEDIUM: connections: Attempt to get idle connections from other threads."), it is the ability to retry from the safe list when looking for any type of idle connection and not finding one in the idle list. It is already important when dealing with long-lived connections since they ultimately all become safe, but that case is already covered by the fact that safe conns not being used end up closing and are not looked up anymore since connect_server() sees there are none. But it's even more important when using server-side connections which periodically close, because the new connections may spend half of their time in safe state and the other half in the idle state, and failing to grab one such connection from the right list results in establishing a new connection. This patch makes sure that a failure to find an idle connection results in a new attempt at finding one from the safe list if available. In order to avoid locking twice, connections are attempted alternatively from the idle then safe list when picking from siblings. Tests have shown a ~2% performance increase by avoiding to lock twice. A typical test with 10000 connections over 16 threads with 210 servers having a 1 millisecond response time and closing every 5 requests shows a degrading performance starting at 120k req/s down to 60-90k and an average reuse rate of 44%. After the fix, the reuse rate raises to 79% and the performance becomes stable at 254k req/s. Similarly the previous test with full keep-alive has now increased from 96% reuse rate to 99% and from 352k to 375k req/s. No backport is needed as this is 2.2-only.
2020-07-01 09:04:38 -04:00
}
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
} while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop);
if (!found)
conn = NULL;
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
done:
if (conn) {
_HA_ATOMIC_STORE(&srv->per_tgrp[tgid - 1].next_takeover, (i + 1 == tg->base + tg->count) ? tg->base : i + 1);
srv_use_conn(srv, conn);
_HA_ATOMIC_DEC(&srv->curr_idle_conns);
_HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
_HA_ATOMIC_DEC(&srv->curr_idle_thr[i]);
conn->flags &= ~CO_FL_LIST_MASK;
__ha_barrier_atomic_store();
if ((s->be->options & PR_O_REUSE_MASK) == PR_O_REUSE_SAFE &&
conn->mux->flags & MX_FL_HOL_RISK) {
/* attach the connection to the session private list
*/
conn->owner = s->sess;
session_add_conn(s->sess, conn, conn->target);
}
else {
srv_add_to_avail_list(srv, conn);
}
}
return conn;
}
static int do_connect_server(struct stream *s, struct connection *conn)
{
int ret = SF_ERR_NONE;
int conn_flags = 0;
if (unlikely(!conn || !conn->ctrl || !conn->ctrl->connect))
return SF_ERR_INTERNAL;
if (co_data(&s->res))
conn_flags |= CONNECT_HAS_DATA;
if (s->conn_retries == s->be->conn_retries)
conn_flags |= CONNECT_CAN_USE_TFO;
if (!conn_ctrl_ready(conn) || !conn_xprt_ready(conn)) {
ret = conn->ctrl->connect(conn, conn_flags);
if (ret != SF_ERR_NONE)
return ret;
/* we're in the process of establishing a connection */
s->scb->state = SC_ST_CON;
}
else {
/* try to reuse the existing connection, it will be
* confirmed once we can send on it.
*/
/* Is the connection really ready ? */
if (conn->mux->ctl(conn, MUX_CTL_STATUS, NULL) & MUX_STATUS_READY)
s->scb->state = SC_ST_RDY;
else
s->scb->state = SC_ST_CON;
}
/* needs src ip/port for logging */
if (s->flags & SF_SRC_ADDR)
conn_get_src(conn);
return ret;
}
/*
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* This function initiates a connection to the server assigned to this stream
* (s->target, (s->scb)->addr.to). It will assign a server if none
* is assigned yet.
* It can return one of :
* - SF_ERR_NONE if everything's OK
* - SF_ERR_SRVTO if there are no more servers
* - SF_ERR_SRVCL if the connection was refused by the server
* - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
* - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
* - SF_ERR_INTERNAL for any other purely internal errors
* Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
* The server-facing stream connector is expected to hold a pre-allocated connection.
*/
int connect_server(struct stream *s)
{
struct connection *cli_conn = objt_conn(strm_orig(s));
struct connection *srv_conn = NULL;
struct server *srv;
int reuse_mode = s->be->options & PR_O_REUSE_MASK;
int reuse = 0;
int init_mux = 0;
int err;
#ifdef USE_OPENSSL
struct sample *sni_smp = NULL;
#endif
struct sockaddr_storage *bind_addr = NULL;
int proxy_line_ret;
int64_t hash = 0;
struct conn_hash_params hash_params;
/* in standard configuration, srv will be valid
* it can be NULL for dispatch mode or transparent backend */
srv = objt_server(s->target);
/* Override reuse-mode if reverse-connect is used. */
if (srv && srv->flags & SRV_F_RHTTP)
reuse_mode = PR_O_REUSE_ALWS;
err = alloc_dst_address(&s->scb->dst, srv, s);
if (err != SRV_STATUS_OK)
return SF_ERR_INTERNAL;
err = alloc_bind_address(&bind_addr, srv, s->be, s);
if (err != SRV_STATUS_OK)
return SF_ERR_INTERNAL;
#ifdef USE_OPENSSL
if (srv && srv->ssl_ctx.sni) {
sni_smp = sample_fetch_as_type(s->be, s->sess, s,
SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
srv->ssl_ctx.sni, SMP_T_STR);
}
#endif
/* do not reuse if mode is not http */
if (!IS_HTX_STRM(s)) {
DBG_TRACE_STATE("skip idle connections reuse: no htx", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto skip_reuse;
}
/* disable reuse if websocket stream and the protocol to use is not the
* same as the main protocol of the server.
*/
if (unlikely(s->flags & SF_WEBSOCKET) && srv) {
if (!srv_check_reuse_ws(srv)) {
DBG_TRACE_STATE("skip idle connections reuse: websocket stream", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto skip_reuse;
}
}
/* first, set unique connection parameters and then calculate hash */
memset(&hash_params, 0, sizeof(hash_params));
/* 1. target */
hash_params.target = s->target;
#ifdef USE_OPENSSL
/* 2. sni
* only test if the sample is not null as smp_make_safe (called before
* ssl_sock_set_servername) can only fails if this is not the case
*/
if (sni_smp) {
hash_params.sni_prehash =
conn_hash_prehash(sni_smp->data.u.str.area,
sni_smp->data.u.str.data);
}
#endif /* USE_OPENSSL */
/* 3. destination address */
if (srv && srv_is_transparent(srv))
hash_params.dst_addr = s->scb->dst;
/* 4. source address */
hash_params.src_addr = bind_addr;
/* 5. proxy protocol */
if (srv && srv->pp_opts) {
proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s);
if (proxy_line_ret) {
hash_params.proxy_prehash =
conn_hash_prehash(trash.area, proxy_line_ret);
}
}
hash = conn_calculate_hash(&hash_params);
/* first, search for a matching connection in the session's idle conns */
srv_conn = session_get_conn(s->sess, s->target, hash);
if (srv_conn) {
DBG_TRACE_STATE("reuse connection from session", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
reuse = 1;
}
if (srv && !reuse && reuse_mode != PR_O_REUSE_NEVR) {
/* Below we pick connections from the safe, idle or
* available (which are safe too) lists based
* on the strategy, the fact that this is a first or second
* (retryable) request, with the indicated priority (1 or 2) :
*
* SAFE AGGR ALWS
*
* +-----+-----+ +-----+-----+ +-----+-----+
* req| 1st | 2nd | req| 1st | 2nd | req| 1st | 2nd |
* ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
* safe| - | 2 | safe| 1 | 2 | safe| 1 | 2 |
* ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
* idle| - | 1 | idle| - | 1 | idle| 2 | 1 |
* ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
*
* Idle conns are necessarily looked up on the same thread so
* that there is no concurrency issues.
*/
if (!eb_is_empty(&srv->per_thr[tid].avail_conns)) {
srv_conn = srv_lookup_conn(&srv->per_thr[tid].avail_conns, hash);
if (srv_conn) {
/* connection cannot be in idle list if used as an avail idle conn. */
BUG_ON(LIST_INLIST(&srv_conn->idle_list));
DBG_TRACE_STATE("reuse connection from avail", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
reuse = 1;
}
}
/* if no available connections found, search for an idle/safe */
if (!srv_conn && srv->max_idle_conns && srv->curr_idle_conns > 0) {
const int not_first_req = s->txn && s->txn->flags & TX_NOT_FIRST;
const int idle = srv->curr_idle_nb > 0;
const int safe = srv->curr_safe_nb > 0;
MINOR: backend: always satisfy the first req reuse rule with l7 retries The "first req" rule consists in not delivering a connection's first request to a connection that's not known for being safe so that we don't deliver a broken page to a client if the server didn't intend to keep it alive. That's what's used by "http-reuse safe" particularly. But the reason this rule was created was precisely because haproxy was not able to re-emit the request to the server in case of connection breakage, which is precisely what l7 retries later brought. As such, there's no reason for enforcing this rule when l7 retries are properly enabled because such a blank page will trigger a retry and will not be delivered to the client. This patch simply checks that the l7 retries are enabled for the 3 cases that can be triggered on a dead or dying connection (failure, empty, and timeout), and if all 3 are enabled, then regular idle connections can be reused. This could almost be marked as a bug fix because a lot of users relying on l7 retries do not necessarily think about using http-reuse always due to the recommendation against it in the doc, while the protection that the safe mode offers is never used in that mode, and it forces the http client not to reuse existing persistent connections since it never sets the "not first" flag. It could also be decided that the protection is not used either when the origin is an applet, as in this case this is internal code that we can decide to let handle the retry by itself (all info are still present). But at least the httpclient will be happy with this alone. It would make sense to backport this at least to 2.6 in order to let the httpclient reuse connections, maybe to older releases if some users report low reuse counts.
2022-09-01 13:58:58 -04:00
const int retry_safe = (s->be->retry_type & (PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT)) ==
(PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT);
/* second column of the tables above,
* search for an idle then safe conn */
MINOR: backend: always satisfy the first req reuse rule with l7 retries The "first req" rule consists in not delivering a connection's first request to a connection that's not known for being safe so that we don't deliver a broken page to a client if the server didn't intend to keep it alive. That's what's used by "http-reuse safe" particularly. But the reason this rule was created was precisely because haproxy was not able to re-emit the request to the server in case of connection breakage, which is precisely what l7 retries later brought. As such, there's no reason for enforcing this rule when l7 retries are properly enabled because such a blank page will trigger a retry and will not be delivered to the client. This patch simply checks that the l7 retries are enabled for the 3 cases that can be triggered on a dead or dying connection (failure, empty, and timeout), and if all 3 are enabled, then regular idle connections can be reused. This could almost be marked as a bug fix because a lot of users relying on l7 retries do not necessarily think about using http-reuse always due to the recommendation against it in the doc, while the protection that the safe mode offers is never used in that mode, and it forces the http client not to reuse existing persistent connections since it never sets the "not first" flag. It could also be decided that the protection is not used either when the origin is an applet, as in this case this is internal code that we can decide to let handle the retry by itself (all info are still present). But at least the httpclient will be happy with this alone. It would make sense to backport this at least to 2.6 in order to let the httpclient reuse connections, maybe to older releases if some users report low reuse counts.
2022-09-01 13:58:58 -04:00
if (not_first_req || retry_safe) {
if (idle || safe)
srv_conn = conn_backend_get(s, srv, 0, hash);
}
/* first column of the tables above */
else if (reuse_mode >= PR_O_REUSE_AGGR) {
/* search for a safe conn */
if (safe)
srv_conn = conn_backend_get(s, srv, 1, hash);
/* search for an idle conn if no safe conn found
* on always reuse mode */
if (!srv_conn &&
reuse_mode == PR_O_REUSE_ALWS && idle) {
/* TODO conn_backend_get should not check the
* safe list is this case */
srv_conn = conn_backend_get(s, srv, 0, hash);
}
}
if (srv_conn) {
DBG_TRACE_STATE("reuse connection from idle/safe", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
reuse = 1;
}
}
}
/* here reuse might have been set above, indicating srv_conn finally
* is OK.
*/
if (ha_used_fds > global.tune.pool_high_count && srv) {
struct connection *tokill_conn = NULL;
/* We can't reuse a connection, and e have more FDs than deemd
* acceptable, attempt to kill an idling connection
*/
/* First, try from our own idle list */
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) {
BUG/MAJOR: backend: fix idle conn crash under low FD Since the following commit, idle conns are stored in a list as secondary storage to retrieve them in usage order : 5afcb686b93c3811bd859a331efd6a8341a61218 MAJOR: connection: purge idle conn by last usage The list usage has been extended wherever connections lookup are done both on idle and safe trees. This reduced the code size by replacing a two tree loops by a single list loop. LIST_ELEM() is used in this context to retrieve the first idle list element from the server list head. However, macro usage was wrong due to an extra '&' operator which returns an invalid connection reference. This will most of the time caused a crash on conn_delete_from_tree() or affiliated functions. This bug only occurs if the FD pool is exhausted and some idle connections are selected to be killed. It can be reproduced using the following config and h2load command : $ h2load -t 8 -c 800 -m 10 -n 800 "http://127.0.0.1:21080/?s=10k" global maxconn 100 defaults mode http timeout connect 20s timeout client 20s timeout server 20s listen li bind :21080 proto h2 server nginx 127.99.0.1:30080 proto h1 This bug has been introduced by the above commit. Thus no need to backport this fix. Note that LIST_ELEM() macro usage was slightly adjusted also in srv_migrate_conns_to_remove(). The function used toremove_list instead of idle_list connection list element. This is not a bug as they are stored in the same union. However, the new code is clearer as it intends to move connection from the idle_list only into the toremove_list mt-list.
2023-10-24 12:31:55 -04:00
tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(tokill_conn);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
/* Release the idle lock before calling mux->destroy.
* It will in turn call srv_release_conn through
* conn_free which also uses it.
*/
tokill_conn->mux->destroy(tokill_conn->ctx);
}
else {
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
/* If not, iterate over other thread's idling pool, and try to grab one */
if (!tokill_conn) {
int i;
for (i = tid; (i = ((i + 1 == global.nbthread) ? 0 : i + 1)) != tid;) {
// just silence stupid gcc which reports an absurd
// out-of-bounds warning for <i> which is always
// exactly zero without threads, but it seems to
// see it possibly larger.
ALREADY_CHECKED(i);
if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
continue;
if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) {
BUG/MAJOR: backend: fix idle conn crash under low FD Since the following commit, idle conns are stored in a list as secondary storage to retrieve them in usage order : 5afcb686b93c3811bd859a331efd6a8341a61218 MAJOR: connection: purge idle conn by last usage The list usage has been extended wherever connections lookup are done both on idle and safe trees. This reduced the code size by replacing a two tree loops by a single list loop. LIST_ELEM() is used in this context to retrieve the first idle list element from the server list head. However, macro usage was wrong due to an extra '&' operator which returns an invalid connection reference. This will most of the time caused a crash on conn_delete_from_tree() or affiliated functions. This bug only occurs if the FD pool is exhausted and some idle connections are selected to be killed. It can be reproduced using the following config and h2load command : $ h2load -t 8 -c 800 -m 10 -n 800 "http://127.0.0.1:21080/?s=10k" global maxconn 100 defaults mode http timeout connect 20s timeout client 20s timeout server 20s listen li bind :21080 proto h2 server nginx 127.99.0.1:30080 proto h1 This bug has been introduced by the above commit. Thus no need to backport this fix. Note that LIST_ELEM() macro usage was slightly adjusted also in srv_migrate_conns_to_remove(). The function used toremove_list instead of idle_list connection list element. This is not a bug as they are stored in the same union. However, the new code is clearer as it intends to move connection from the idle_list only into the toremove_list mt-list.
2023-10-24 12:31:55 -04:00
tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(tokill_conn);
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
if (tokill_conn) {
/* We got one, put it into the concerned thread's to kill list, and wake it's kill task */
MT_LIST_APPEND(&idle_conns[i].toremove_conns,
&tokill_conn->toremove_list);
task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
break;
}
}
}
}
if (reuse) {
if (srv_conn->mux) {
int avail = srv_conn->mux->avail_streams(srv_conn);
if (avail <= 1) {
/* No more streams available, remove it from the list */
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(srv_conn);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
if (avail >= 1) {
if (srv_conn->mux->attach(srv_conn, s->scb->sedesc, s->sess) == -1) {
srv_conn = NULL;
if (sc_reset_endp(s->scb) < 0)
return SF_ERR_INTERNAL;
sc_ep_clr(s->scb, ~SE_FL_DETACHED);
}
}
else
srv_conn = NULL;
}
/* otherwise srv_conn is left intact */
}
else
srv_conn = NULL;
skip_reuse:
/* no reuse or failed to reuse the connection above, pick a new one */
if (!srv_conn) {
if (srv && (srv->flags & SRV_F_RHTTP)) {
DBG_TRACE_USER("cannot open a new connection for reverse server", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
s->conn_err_type = STRM_ET_CONN_ERR;
return SF_ERR_INTERNAL;
}
srv_conn = conn_new(s->target);
if (srv_conn) {
DBG_TRACE_STATE("alloc new be connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
srv_conn->owner = s->sess;
/* connection will be attached to the session if
* http-reuse mode is never or it is not targeted to a
* server */
if (reuse_mode == PR_O_REUSE_NEVR || !srv)
conn_set_private(srv_conn);
/* assign bind_addr to srv_conn */
srv_conn->src = bind_addr;
bind_addr = NULL;
srv_conn->hash_node->node.key = hash;
}
}
/* if bind_addr is non NULL free it */
sockaddr_free(&bind_addr);
/* srv_conn is still NULL only on allocation failure */
if (!srv_conn)
return SF_ERR_RESOURCE;
/* copy the target address into the connection */
*srv_conn->dst = *s->scb->dst;
/* Copy network namespace from client connection */
srv_conn->proxy_netns = cli_conn ? cli_conn->proxy_netns : NULL;
if (!srv_conn->xprt) {
/* set the correct protocol on the output stream connector */
if (srv) {
if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) {
conn_free(srv_conn);
return SF_ERR_INTERNAL;
}
} else if (obj_type(s->target) == OBJ_TYPE_PROXY) {
int ret;
/* proxies exclusively run on raw_sock right now */
ret = conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), xprt_get(XPRT_RAW));
if (ret < 0 || !(srv_conn->ctrl)) {
conn_free(srv_conn);
return SF_ERR_INTERNAL;
}
}
else {
conn_free(srv_conn);
return SF_ERR_INTERNAL; /* how did we get there ? */
}
if (sc_attach_mux(s->scb, NULL, srv_conn) < 0) {
conn_free(srv_conn);
return SF_ERR_INTERNAL; /* how did we get there ? */
}
srv_conn->ctx = s->scb;
#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
if (!srv ||
(srv->use_ssl != 1 || (!(srv->ssl_ctx.alpn_str) && !(srv->ssl_ctx.npn_str)) ||
srv->mux_proto || !IS_HTX_STRM(s)))
#endif
init_mux = 1;
/* process the case where the server requires the PROXY protocol to be sent */
srv_conn->send_proxy_ofs = 0;
if (srv && srv->pp_opts) {
srv_conn->flags |= CO_FL_SEND_PROXY;
srv_conn->send_proxy_ofs = 1; /* must compute size */
}
if (srv && (srv->flags & SRV_F_SOCKS4_PROXY)) {
srv_conn->send_proxy_ofs = 1;
srv_conn->flags |= CO_FL_SOCKS4;
}
#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
/* if websocket stream, try to update connection ALPN. */
if (unlikely(s->flags & SF_WEBSOCKET) &&
srv && srv->use_ssl && srv->ssl_ctx.alpn_str) {
char *alpn = "";
int force = 0;
switch (srv->ws) {
case SRV_WS_AUTO:
alpn = "\x08http/1.1";
force = 0;
break;
case SRV_WS_H1:
alpn = "\x08http/1.1";
force = 1;
break;
case SRV_WS_H2:
alpn = "\x02h2";
force = 1;
break;
}
if (!conn_update_alpn(srv_conn, ist(alpn), force))
DBG_TRACE_STATE("update alpn for websocket", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
#endif
}
else {
BUG/MINOR: backend: restore the SF_SRV_REUSED flag original purpose The SF_SRV_REUSED flag was set if a stream reused a backend connection. One of its purpose is to count the total reuse on the backend in opposition to newly instantiated connection. However, the flag was diverted from its original purpose since the following commit : e8f5f5d8b228d71333fb60229dc908505baf9222 BUG/MEDIUM: servers: Only set SF_SRV_REUSED if the connection if fully ready. With this change, the flag is not set anymore if the mux is not ready when a connection is picked for reuse. This can happen for multiplexed connections which are inserted in the available list as soon as created in http-reuse always mode. The goal of this change is to not retry immediately this request in case on an error on the same server if the reused connection is not fully ready. This change is justified for the retry timeout handling but it breaks other places which still uses the flag for its original purpose. Mainly, in this case the wrong 'connect' backend counter is incremented instead of the 'reuse' one. The flag is also used in http_return_srv_error and may have an impact if a http server error is replied for this stream. To fix this problem, the original purpose of the flag is restored by setting it unconditionaly when a connection is reused. Additionally, a new flag SF_SRV_REUSED_ANTICIPATED is created. This flag is set when the connection is reused but the mux is not ready yet. For the timeout handling on error, the request is retried immediately only if the stream reused a connection without this newly anticipated flag. This must be backported up to 2.1.
2021-06-17 09:14:49 -04:00
s->flags |= SF_SRV_REUSED;
/* Currently there seems to be no known cases of xprt ready
* without the mux installed here.
*/
BUG_ON(!srv_conn->mux);
if (!(srv_conn->mux->ctl(srv_conn, MUX_CTL_STATUS, NULL) & MUX_STATUS_READY))
BUG/MINOR: backend: restore the SF_SRV_REUSED flag original purpose The SF_SRV_REUSED flag was set if a stream reused a backend connection. One of its purpose is to count the total reuse on the backend in opposition to newly instantiated connection. However, the flag was diverted from its original purpose since the following commit : e8f5f5d8b228d71333fb60229dc908505baf9222 BUG/MEDIUM: servers: Only set SF_SRV_REUSED if the connection if fully ready. With this change, the flag is not set anymore if the mux is not ready when a connection is picked for reuse. This can happen for multiplexed connections which are inserted in the available list as soon as created in http-reuse always mode. The goal of this change is to not retry immediately this request in case on an error on the same server if the reused connection is not fully ready. This change is justified for the retry timeout handling but it breaks other places which still uses the flag for its original purpose. Mainly, in this case the wrong 'connect' backend counter is incremented instead of the 'reuse' one. The flag is also used in http_return_srv_error and may have an impact if a http server error is replied for this stream. To fix this problem, the original purpose of the flag is restored by setting it unconditionaly when a connection is reused. Additionally, a new flag SF_SRV_REUSED_ANTICIPATED is created. This flag is set when the connection is reused but the mux is not ready yet. For the timeout handling on error, the request is retried immediately only if the stream reused a connection without this newly anticipated flag. This must be backported up to 2.1.
2021-06-17 09:14:49 -04:00
s->flags |= SF_SRV_REUSED_ANTICIPATED;
}
/* flag for logging source ip/port */
if (strm_fe(s)->options2 & PR_O2_SRC_ADDR)
s->flags |= SF_SRC_ADDR;
/* disable lingering */
if (s->be->options & PR_O_TCP_NOLING)
s->scb->flags |= SC_FL_NOLINGER;
if (s->flags & SF_SRV_REUSED) {
_HA_ATOMIC_INC(&s->be->be_counters.reuse);
if (srv)
_HA_ATOMIC_INC(&srv->counters.reuse);
} else {
_HA_ATOMIC_INC(&s->be->be_counters.connect);
if (srv)
_HA_ATOMIC_INC(&srv->counters.connect);
}
err = do_connect_server(s, srv_conn);
if (err != SF_ERR_NONE)
return err;
#ifdef USE_OPENSSL
if (!(s->flags & SF_SRV_REUSED)) {
if (smp_make_safe(sni_smp))
ssl_sock_set_servername(srv_conn, sni_smp->data.u.str.area);
}
#endif /* USE_OPENSSL */
/* The CO_FL_SEND_PROXY flag may have been set by the connect method,
* if so, add our handshake pseudo-XPRT now.
*/
if ((srv_conn->flags & CO_FL_HANDSHAKE)) {
if (xprt_add_hs(srv_conn) < 0) {
conn_full_close(srv_conn);
return SF_ERR_INTERNAL;
}
}
conn_xprt_start(srv_conn);
/* We have to defer the mux initialization until after si_connect()
* has been called, as we need the xprt to have been properly
* initialized, or any attempt to recv during the mux init may
* fail, and flag the connection as CO_FL_ERROR.
*/
if (init_mux) {
const struct mux_ops *alt_mux =
likely(!(s->flags & SF_WEBSOCKET)) ? NULL : srv_get_ws_proto(srv);
if (conn_install_mux_be(srv_conn, s->scb, s->sess, alt_mux) < 0) {
conn_full_close(srv_conn);
return SF_ERR_INTERNAL;
}
if (IS_HTX_STRM(s)) {
/* If we're doing http-reuse always, and the connection
* is not private with available streams (an http2
* connection), add it to the available list, so that
* others can use it right away. If the connection is
* private or we're doing http-reuse safe and the mux
* protocol supports multiplexing, add it in the
* session server list.
*/
if (srv && reuse_mode == PR_O_REUSE_ALWS &&
!(srv_conn->flags & CO_FL_PRIVATE) &&
srv_conn->mux->avail_streams(srv_conn) > 0) {
srv_add_to_avail_list(srv, srv_conn);
}
else if (srv_conn->flags & CO_FL_PRIVATE ||
(reuse_mode == PR_O_REUSE_SAFE &&
srv_conn->mux->flags & MX_FL_HOL_RISK)) {
/* If it fail now, the same will be done in mux->detach() callback */
session_add_conn(s->sess, srv_conn, srv_conn->target);
}
}
}
#if defined(USE_OPENSSL) && (defined(OPENSSL_IS_BORINGSSL) || (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L))
if (!reuse && cli_conn && srv && srv_conn->mux &&
(srv->ssl_ctx.options & SRV_SSL_O_EARLY_DATA) &&
/* Only attempt to use early data if either the client sent
* early data, so that we know it can handle a 425, or if
* we are allowed to retry requests on early data failure, and
* it's our first try
*/
((cli_conn->flags & CO_FL_EARLY_DATA) ||
((s->be->retry_type & PR_RE_EARLY_ERROR) && !s->conn_retries)) &&
co_data(sc_oc(s->scb)) &&
srv_conn->flags & CO_FL_SSL_WAIT_HS)
srv_conn->flags &= ~(CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN);
#endif
/* set connect timeout */
s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.connect);
if (srv) {
int count;
s->flags |= SF_CURR_SESS;
count = _HA_ATOMIC_ADD_FETCH(&srv->cur_sess, 1);
HA_ATOMIC_UPDATE_MAX(&srv->counters.cur_sess_max, count);
if (s->be->lbprm.server_take_conn)
s->be->lbprm.server_take_conn(srv);
}
/* Now handle synchronously connected sockets. We know the stream connector
* is at least in state SC_ST_CON. These ones typically are UNIX
* sockets, socket pairs, andoccasionally TCP connections on the
* loopback on a heavily loaded system.
*/
if (srv_conn->flags & CO_FL_ERROR)
s->scb->flags |= SC_FL_ERROR;
/* If we had early data, and the handshake ended, then
* we can remove the flag, and attempt to wake the task up,
* in the event there's an analyser waiting for the end of
* the handshake.
*/
if (!(srv_conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)))
sc_ep_clr(s->scb, SE_FL_WAIT_FOR_HS);
if (!sc_state_in(s->scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
(srv_conn->flags & CO_FL_WAIT_XPRT) == 0) {
s->conn_exp = TICK_ETERNITY;
sc_oc(s->scb)->flags |= CF_WRITE_EVENT;
if (s->scb->state == SC_ST_CON)
s->scb->state = SC_ST_RDY;
}
/* Report EOI on the channel if it was reached from the mux point of
* view.
*
* Note: This test is only required because si_cs_process is also the SI
* wake callback. Otherwise si_cs_recv()/si_cs_send() already take
* care of it.
*/
if (sc_ep_test(s->scb, SE_FL_EOI) && !(s->scb->flags & SC_FL_EOI)) {
s->scb->flags |= SC_FL_EOI;
sc_ic(s->scb)->flags |= CF_READ_EVENT;
}
/* catch all sync connect while the mux is not already installed */
if (!srv_conn->mux && !(srv_conn->flags & CO_FL_WAIT_XPRT)) {
if (conn_create_mux(srv_conn) < 0) {
conn_full_close(srv_conn);
return SF_ERR_INTERNAL;
}
}
return SF_ERR_NONE; /* connection is OK */
}
/* This function performs the "redispatch" part of a connection attempt. It
* will assign a server if required, queue the connection if required, and
* handle errors that might arise at this level. It can change the server
* state. It will return 1 if it encounters an error, switches the server
* state, or has to queue a connection. Otherwise, it will return 0 indicating
* that the connection is ready to use.
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
int srv_redispatch_connect(struct stream *s)
{
struct server *srv;
int conn_err;
/* We know that we don't have any connection pending, so we will
* try to get a new one, and wait in this state if it's queued
*/
redispatch:
conn_err = assign_server_and_queue(s);
srv = objt_server(s->target);
switch (conn_err) {
case SRV_STATUS_OK:
break;
case SRV_STATUS_FULL:
/* The server has reached its maxqueue limit. Either PR_O_REDISP is set
* and we can redispatch to another server, or it is not and we return
* 503. This only makes sense in DIRECT mode however, because normal LB
* algorithms would never select such a server, and hash algorithms
* would bring us on the same server again. Note that s->target is set
* in this case.
*/
if (((s->flags & (SF_DIRECT|SF_FORCE_PRST)) == SF_DIRECT) &&
(s->be->options & PR_O_REDISP)) {
s->flags &= ~(SF_DIRECT | SF_ASSIGNED);
sockaddr_free(&s->scb->dst);
goto redispatch;
}
if (!s->conn_err_type) {
s->conn_err_type = STRM_ET_QUEUE_ERR;
[MAJOR] rework of the server FSM srv_state has been removed from HTTP state machines, and states have been split in either TCP states or analyzers. For instance, the TARPIT state has just become a simple analyzer. New flags have been added to the struct buffer to compensate this. The high-level stream processors sometimes need to force a disconnection without touching a file-descriptor (eg: report an error). But if they touched BF_SHUTW or BF_SHUTR, the file descriptor would not be closed. Thus, the two SHUT?_NOW flags have been added so that an application can request a forced close which the stream interface will be forced to obey. During this change, a new BF_HIJACK flag was added. It will be used for data generation, eg during a stats dump. It prevents the producer on a buffer from sending data into it. BF_SHUTR_NOW /* the producer must shut down for reads ASAP */ BF_SHUTW_NOW /* the consumer must shut down for writes ASAP */ BF_HIJACK /* the producer is temporarily replaced */ BF_SHUTW_NOW has precedence over BF_HIJACK. BF_HIJACK has precedence over BF_MAY_FORWARD (so that it does not need it). New functions buffer_shutr_now(), buffer_shutw_now(), buffer_abort() are provided to manipulate BF_SHUT* flags. A new type "stream_interface" has been added to describe both sides of a buffer. A stream interface has states and error reporting. The session now has two stream interfaces (one per side). Each buffer has stream_interface pointers to both consumer and producer sides. The server-side file descriptor has moved to its stream interface, so that even the buffer has access to it. process_srv() has been split into three parts : - tcp_get_connection() obtains a connection to the server - tcp_connection_failed() tests if a previously attempted connection has succeeded or not. - process_srv_data() only manages the data phase, and in this sense should be roughly equivalent to process_cli. Little code has been removed, and a lot of old code has been left in comments for now.
2008-10-19 01:30:41 -04:00
}
_HA_ATOMIC_INC(&srv->counters.failed_conns);
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
return 1;
case SRV_STATUS_NOSRV:
/* note: it is guaranteed that srv == NULL here */
if (!s->conn_err_type) {
s->conn_err_type = STRM_ET_CONN_ERR;
[MAJOR] rework of the server FSM srv_state has been removed from HTTP state machines, and states have been split in either TCP states or analyzers. For instance, the TARPIT state has just become a simple analyzer. New flags have been added to the struct buffer to compensate this. The high-level stream processors sometimes need to force a disconnection without touching a file-descriptor (eg: report an error). But if they touched BF_SHUTW or BF_SHUTR, the file descriptor would not be closed. Thus, the two SHUT?_NOW flags have been added so that an application can request a forced close which the stream interface will be forced to obey. During this change, a new BF_HIJACK flag was added. It will be used for data generation, eg during a stats dump. It prevents the producer on a buffer from sending data into it. BF_SHUTR_NOW /* the producer must shut down for reads ASAP */ BF_SHUTW_NOW /* the consumer must shut down for writes ASAP */ BF_HIJACK /* the producer is temporarily replaced */ BF_SHUTW_NOW has precedence over BF_HIJACK. BF_HIJACK has precedence over BF_MAY_FORWARD (so that it does not need it). New functions buffer_shutr_now(), buffer_shutw_now(), buffer_abort() are provided to manipulate BF_SHUT* flags. A new type "stream_interface" has been added to describe both sides of a buffer. A stream interface has states and error reporting. The session now has two stream interfaces (one per side). Each buffer has stream_interface pointers to both consumer and producer sides. The server-side file descriptor has moved to its stream interface, so that even the buffer has access to it. process_srv() has been split into three parts : - tcp_get_connection() obtains a connection to the server - tcp_connection_failed() tests if a previously attempted connection has succeeded or not. - process_srv_data() only manages the data phase, and in this sense should be roughly equivalent to process_cli. Little code has been removed, and a lot of old code has been left in comments for now.
2008-10-19 01:30:41 -04:00
}
[MEDIUM]: Prevent redispatcher from selecting the same server, version #3 When haproxy decides that session needs to be redispatched it chose a server, but there is no guarantee for it to be a different one. So, it often happens that selected server is exactly the same that it was previously, so a client ends up with a 503 error anyway, especially when one sever has much bigger weight than others. Changes from the previous version: - drop stupid and unnecessary SN_DIRECT changes - assign_server(): use srvtoavoid to keep the old server and clear s->srv so SRV_STATUS_NOSRV guarantees that t->srv == NULL (again) and get_server_rr_with_conns has chances to work (previously we were passing a NULL here) - srv_redispatch_connect(): remove t->srv->cum_sess and t->srv->failed_conns incrementing as t->srv was guaranteed to be NULL - add avoididx to get_server_rr_with_conns. I hope I correctly understand this code. - fix http_flush_cookie_flags() and move it to assign_server_and_queue() directly. The code here was supposed to set CK_DOWN and clear CK_VALID, but: (TX_CK_VALID | TX_CK_DOWN) == TX_CK_VALID == TX_CK_MASK so: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags ^= (TX_CK_VALID | TX_CK_DOWN); was really a: if ((txn->flags & TX_CK_MASK) == TX_CK_VALID) txn->flags &= TX_CK_VALID Now haproxy logs "--DI" after redispatching connection. - defer srv->redispatches++ and s->be->redispatches++ so there are called only if a conenction was redispatched, not only supposed to. - don't increment lbconn if redispatcher selected the same sarver - don't count unsuccessfully redispatched connections as redispatched connections - don't count redispatched connections as errors, so: - the number of connections effectively served by a server is: srv->cum_sess - srv->failed_conns - srv->retries - srv->redispatches and SUM(servers->failed_conns) == be->failed_conns - requires the "Don't increment server connections too much + fix retries" patch - needs little more testing and probably some discussion so reverting to the RFC state Tests #1: retries 4 redispatch i) 1 server(s): b (wght=1, down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request failed ii) server(s): b (wght=1, down), u (wght=1, down) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=1, retr=0, redis=0 -> request FAILED iii) 2 server(s): b (wght=1, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK iv) 2 server(s): b (wght=100, down), u (wght=1, up) b) sessions=4, lbtot=1, err_conn=0, retr=3, redis=1 u) sessions=1, lbtot=1, err_conn=0, retr=0, redis=0 -> request OK v) 1 server(s): b (down for first 4 SYNS) b) sessions=5, lbtot=1, err_conn=0, retr=4, redis=0 -> request OK Tests #2: retries 4 i) 1 server(s): b (down) b) sessions=5, lbtot=1, err_conn=1, retr=4, redis=0 -> request FAILED
2008-02-21 21:50:19 -05:00
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
return 1;
case SRV_STATUS_QUEUED:
s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.queue);
s->scb->state = SC_ST_QUE;
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* do nothing else and do not wake any other stream up */
return 1;
case SRV_STATUS_INTERNAL:
default:
if (!s->conn_err_type) {
s->conn_err_type = STRM_ET_CONN_OTHER;
[MAJOR] rework of the server FSM srv_state has been removed from HTTP state machines, and states have been split in either TCP states or analyzers. For instance, the TARPIT state has just become a simple analyzer. New flags have been added to the struct buffer to compensate this. The high-level stream processors sometimes need to force a disconnection without touching a file-descriptor (eg: report an error). But if they touched BF_SHUTW or BF_SHUTR, the file descriptor would not be closed. Thus, the two SHUT?_NOW flags have been added so that an application can request a forced close which the stream interface will be forced to obey. During this change, a new BF_HIJACK flag was added. It will be used for data generation, eg during a stats dump. It prevents the producer on a buffer from sending data into it. BF_SHUTR_NOW /* the producer must shut down for reads ASAP */ BF_SHUTW_NOW /* the consumer must shut down for writes ASAP */ BF_HIJACK /* the producer is temporarily replaced */ BF_SHUTW_NOW has precedence over BF_HIJACK. BF_HIJACK has precedence over BF_MAY_FORWARD (so that it does not need it). New functions buffer_shutr_now(), buffer_shutw_now(), buffer_abort() are provided to manipulate BF_SHUT* flags. A new type "stream_interface" has been added to describe both sides of a buffer. A stream interface has states and error reporting. The session now has two stream interfaces (one per side). Each buffer has stream_interface pointers to both consumer and producer sides. The server-side file descriptor has moved to its stream interface, so that even the buffer has access to it. process_srv() has been split into three parts : - tcp_get_connection() obtains a connection to the server - tcp_connection_failed() tests if a previously attempted connection has succeeded or not. - process_srv_data() only manages the data phase, and in this sense should be roughly equivalent to process_cli. Little code has been removed, and a lot of old code has been left in comments for now.
2008-10-19 01:30:41 -04:00
}
if (srv)
srv_inc_sess_ctr(srv);
if (srv)
srv_set_sess_last(srv);
if (srv)
_HA_ATOMIC_INC(&srv->counters.failed_conns);
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* release other streams waiting for this server */
if (may_dequeue_tasks(srv, s->be))
process_srv_queue(srv);
return 1;
}
/* if we get here, it's because we got SRV_STATUS_OK, which also
* means that the connection has not been queued.
*/
return 0;
}
/* Check if the connection request is in such a state that it can be aborted. */
static int back_may_abort_req(struct channel *req, struct stream *s)
{
return ((s->scf->flags & SC_FL_ERROR) ||
((s->scb->flags & (SC_FL_SHUT_WANTED|SC_FL_SHUT_DONE)) && /* empty and client aborted */
(!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE))));
}
/* Update back stream connector status for input states SC_ST_ASS, SC_ST_QUE,
* SC_ST_TAR. Other input states are simply ignored.
* Possible output states are SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ, SC_ST_CON
* and SC_ST_EST. Flags must have previously been updated for timeouts and other
* conditions.
*/
void back_try_conn_req(struct stream *s)
{
struct server *srv = objt_server(s->target);
struct stconn *sc = s->scb;
struct channel *req = &s->req;
DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
if (sc->state == SC_ST_ASS) {
/* Server assigned to connection request, we have to try to connect now */
int conn_err;
/* Before we try to initiate the connection, see if the
* request may be aborted instead.
*/
if (back_may_abort_req(req, s)) {
s->conn_err_type |= STRM_ET_CONN_ABRT;
DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto abort_connection;
}
conn_err = connect_server(s);
srv = objt_server(s->target);
if (conn_err == SF_ERR_NONE) {
/* state = SC_ST_CON or SC_ST_EST now */
if (srv)
srv_inc_sess_ctr(srv);
if (srv)
srv_set_sess_last(srv);
DBG_TRACE_STATE("connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto end;
}
/* We have received a synchronous error. We might have to
* abort, retry immediately or redispatch.
*/
if (conn_err == SF_ERR_INTERNAL) {
if (!s->conn_err_type) {
s->conn_err_type = STRM_ET_CONN_OTHER;
}
if (srv)
srv_inc_sess_ctr(srv);
if (srv)
srv_set_sess_last(srv);
if (srv)
_HA_ATOMIC_INC(&srv->counters.failed_conns);
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
/* release other streams waiting for this server */
sess_change_server(s, NULL);
if (may_dequeue_tasks(srv, s->be))
process_srv_queue(srv);
/* Failed and not retryable. */
sc_abort(sc);
sc_shutdown(sc);
sc->flags |= SC_FL_ERROR;
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
/* we may need to know the position in the queue for logging */
pendconn_cond_unlink(s->pend_pos);
/* no stream was ever accounted for this server */
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("internal error during connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
/* We are facing a retryable error, but we don't want to run a
* turn-around now, as the problem is likely a source port
* allocation problem, so we want to retry now.
*/
sc->state = SC_ST_CER;
sc->flags &= ~SC_FL_ERROR;
back_handle_st_cer(s);
DBG_TRACE_STATE("connection error, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
/* now sc->state is one of SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ */
}
else if (sc->state == SC_ST_QUE) {
/* connection request was queued, check for any update */
if (!pendconn_dequeue(s)) {
/* The connection is not in the queue anymore. Either
* we have a server connection slot available and we
* go directly to the assigned state, or we need to
* load-balance first and go to the INI state.
*/
s->conn_exp = TICK_ETERNITY;
if (unlikely(!(s->flags & SF_ASSIGNED)))
sc->state = SC_ST_REQ;
else {
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
sc->state = SC_ST_ASS;
}
DBG_TRACE_STATE("dequeue connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto end;
}
/* Connection request still in queue... */
if (s->flags & SF_CONN_EXP) {
/* ... and timeout expired */
s->conn_exp = TICK_ETERNITY;
s->flags &= ~SF_CONN_EXP;
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
/* we may need to know the position in the queue for logging */
pendconn_cond_unlink(s->pend_pos);
if (srv)
_HA_ATOMIC_INC(&srv->counters.failed_conns);
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
sc_abort(sc);
sc_shutdown(sc);
req->flags |= CF_WRITE_TIMEOUT;
if (!s->conn_err_type)
s->conn_err_type = STRM_ET_QUEUE_TO;
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("connection request still queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto end;
}
/* Connection remains in queue, check if we have to abort it */
if (back_may_abort_req(req, s)) {
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
/* we may need to know the position in the queue for logging */
pendconn_cond_unlink(s->pend_pos);
s->conn_err_type |= STRM_ET_QUEUE_ABRT;
DBG_TRACE_STATE("abort queued connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto abort_connection;
}
/* Nothing changed */
}
else if (sc->state == SC_ST_TAR) {
/* Connection request might be aborted */
if (back_may_abort_req(req, s)) {
s->conn_err_type |= STRM_ET_CONN_ABRT;
DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto abort_connection;
}
if (!(s->flags & SF_CONN_EXP))
return; /* still in turn-around */
s->flags &= ~SF_CONN_EXP;
s->conn_exp = TICK_ETERNITY;
/* we keep trying on the same server as long as the stream is
* marked "assigned".
* FIXME: Should we force a redispatch attempt when the server is down ?
*/
if (s->flags & SF_ASSIGNED)
sc->state = SC_ST_ASS;
else
sc->state = SC_ST_REQ;
DBG_TRACE_STATE("retry connection now", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
end:
DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
return;
abort_connection:
/* give up */
s->conn_exp = TICK_ETERNITY;
s->flags &= ~SF_CONN_EXP;
sc_abort(sc);
sc_shutdown(sc);
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
return;
}
/* This function initiates a server connection request on a stream connector
* already in SC_ST_REQ state. Upon success, the state goes to SC_ST_ASS for
* a real connection to a server, indicating that a server has been assigned,
* or SC_ST_RDY for a successful connection to an applet. It may also return
* SC_ST_QUE, or SC_ST_CLO upon error.
*/
void back_handle_st_req(struct stream *s)
{
struct stconn *sc = s->scb;
if (sc->state != SC_ST_REQ)
return;
DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
struct appctx *appctx;
/* The target is an applet but the SC is in SC_ST_REQ. Thus it
* means no appctx are attached to the SC. Otherwise, it will be
* in SC_ST_RDY state. So, try to create the appctx now.
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
*/
BUG_ON(sc_appctx(sc));
appctx = sc_applet_create(sc, objt_applet(s->target));
if (!appctx) {
/* No more memory, let's immediately abort. Force the
* error code to ignore the ERR_LOCAL which is not a
* real error.
*/
s->flags &= ~(SF_ERR_MASK | SF_FINST_MASK);
sc_abort(sc);
sc_shutdown(sc);
sc->flags |= SC_FL_ERROR;
s->conn_err_type = STRM_ET_CONN_RES;
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("failed to register applet", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
DBG_TRACE_STATE("applet registered", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto end;
}
/* Try to assign a server */
if (srv_redispatch_connect(s) != 0) {
/* We did not get a server. Either we queued the
* connection request, or we encountered an error.
*/
if (sc->state == SC_ST_QUE) {
DBG_TRACE_STATE("connection request queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
goto end;
}
/* we did not get any server, let's check the cause */
sc_abort(sc);
sc_shutdown(sc);
sc->flags |= SC_FL_ERROR;
if (!s->conn_err_type)
s->conn_err_type = STRM_ET_CONN_OTHER;
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("connection request failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
/* The server is assigned */
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
sc->state = SC_ST_ASS;
be_set_sess_last(s->be);
DBG_TRACE_STATE("connection request assigned to a server", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
end:
DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
/* This function is called with (sc->state == SC_ST_CON) meaning that a
* connection was attempted and that the file descriptor is already allocated.
* We must check for timeout, error and abort. Possible output states are
* SC_ST_CER (error), SC_ST_DIS (abort), and SC_ST_CON (no change). This only
* works with connection-based streams. We know that there were no I/O event
* when reaching this function. Timeouts and errors are *not* cleared.
*/
void back_handle_st_con(struct stream *s)
{
struct stconn *sc = s->scb;
struct channel *req = &s->req;
DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
/* the client might want to abort */
if ((s->scf->flags & SC_FL_SHUT_DONE) ||
((s->scb->flags & SC_FL_SHUT_WANTED) &&
(!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
sc->flags |= SC_FL_NOLINGER;
sc_shutdown(sc);
s->conn_err_type |= STRM_ET_CONN_ABRT;
if (s->srv_error)
s->srv_error(s, sc);
/* Note: state = SC_ST_DIS now */
DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
done:
/* retryable error ? */
if ((s->flags & SF_CONN_EXP) || (sc->flags & SC_FL_ERROR)) {
if (!s->conn_err_type) {
if ((sc->flags & SC_FL_ERROR))
s->conn_err_type = STRM_ET_CONN_ERR;
else
s->conn_err_type = STRM_ET_CONN_TO;
}
sc->state = SC_ST_CER;
DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
}
end:
DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
/* This function is called with (sc->state == SC_ST_CER) meaning that a
* previous connection attempt has failed and that the file descriptor
* has already been released. Possible causes include asynchronous error
* notification and time out. Possible output states are SC_ST_CLO when
* retries are exhausted, SC_ST_TAR when a delay is wanted before a new
* connection attempt, SC_ST_ASS when it's wise to retry on the same server,
* and SC_ST_REQ when an immediate redispatch is wanted. The buffers are
* marked as in error state. Timeouts and errors are cleared before retrying.
*/
void back_handle_st_cer(struct stream *s)
{
struct stconn *sc = s->scb;
int must_tar = !!(sc->flags & SC_FL_ERROR);
DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
s->conn_exp = TICK_ETERNITY;
s->flags &= ~SF_CONN_EXP;
/* we probably have to release last stream from the server */
if (objt_server(s->target)) {
struct connection *conn = sc_conn(sc);
health_adjust(__objt_server(s->target), HANA_STATUS_L4_ERR);
if (s->flags & SF_CURR_SESS) {
s->flags &= ~SF_CURR_SESS;
_HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
}
if ((sc->flags & SC_FL_ERROR) &&
conn && conn->err_code == CO_ER_SSL_MISMATCH_SNI) {
/* We tried to connect to a server which is configured
* with "verify required" and which doesn't have the
* "verifyhost" directive. The server presented a wrong
* certificate (a certificate for an unexpected name),
* which implies that we have used SNI in the handshake,
* and that the server doesn't have the associated cert
* and presented a default one.
*
* This is a serious enough issue not to retry. It's
* especially important because this wrong name might
* either be the result of a configuration error, and
* retrying will only hammer the server, or is caused
* by the use of a wrong SNI value, most likely
* provided by the client and we don't want to let the
* client provoke retries.
*/
s->conn_retries = s->be->conn_retries;
DBG_TRACE_DEVEL("Bad SSL cert, disable connection retries", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
}
}
/* ensure that we have enough retries left */
if (s->conn_retries >= s->be->conn_retries || !(s->be->retry_type & PR_RE_CONN_FAILED)) {
if (!s->conn_err_type) {
s->conn_err_type = STRM_ET_CONN_ERR;
}
if (objt_server(s->target))
_HA_ATOMIC_INC(&objt_server(s->target)->counters.failed_conns);
_HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
sess_change_server(s, NULL);
if (may_dequeue_tasks(objt_server(s->target), s->be))
process_srv_queue(objt_server(s->target));
/* shutw is enough to stop a connecting socket */
sc_shutdown(sc);
sc->flags |= SC_FL_ERROR;
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("connection failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
/* At this stage, we will trigger a connection retry (with or without
* redispatch). Thus we must reset the SI endpoint on the server side
* an close the attached connection. It is especially important to do it
* now if the retry is not immediately performed, to be sure to release
* resources as soon as possible and to not catch errors from the lower
* layers in an unexpected state (i.e < ST_CONN).
*
* Note: the stream connector will be switched to ST_REQ, ST_ASS or
* ST_TAR and SC_FL_ERROR and SF_CONN_EXP flags will be unset.
*/
if (sc_reset_endp(sc) < 0) {
if (!s->conn_err_type)
s->conn_err_type = STRM_ET_CONN_OTHER;
if (objt_server(s->target))
_HA_ATOMIC_INC(&objt_server(s->target)->counters.internal_errors);
_HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
sess_change_server(s, NULL);
if (may_dequeue_tasks(objt_server(s->target), s->be))
process_srv_queue(objt_server(s->target));
/* shutw is enough to stop a connecting socket */
sc_shutdown(sc);
sc->flags |= SC_FL_ERROR;
sc->state = SC_ST_CLO;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("error resetting endpoint", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
s->conn_retries++;
stream_choose_redispatch(s);
if (must_tar) {
/* The error was an asynchronous connection error, and we will
* likely have to retry connecting to the same server, most
* likely leading to the same result. To avoid this, we wait
* MIN(one second, connect timeout) before retrying. We don't
* do it when the failure happened on a reused connection
* though.
*/
int delay = 1000;
BUG/MINOR: backend: restore the SF_SRV_REUSED flag original purpose The SF_SRV_REUSED flag was set if a stream reused a backend connection. One of its purpose is to count the total reuse on the backend in opposition to newly instantiated connection. However, the flag was diverted from its original purpose since the following commit : e8f5f5d8b228d71333fb60229dc908505baf9222 BUG/MEDIUM: servers: Only set SF_SRV_REUSED if the connection if fully ready. With this change, the flag is not set anymore if the mux is not ready when a connection is picked for reuse. This can happen for multiplexed connections which are inserted in the available list as soon as created in http-reuse always mode. The goal of this change is to not retry immediately this request in case on an error on the same server if the reused connection is not fully ready. This change is justified for the retry timeout handling but it breaks other places which still uses the flag for its original purpose. Mainly, in this case the wrong 'connect' backend counter is incremented instead of the 'reuse' one. The flag is also used in http_return_srv_error and may have an impact if a http server error is replied for this stream. To fix this problem, the original purpose of the flag is restored by setting it unconditionaly when a connection is reused. Additionally, a new flag SF_SRV_REUSED_ANTICIPATED is created. This flag is set when the connection is reused but the mux is not ready yet. For the timeout handling on error, the request is retried immediately only if the stream reused a connection without this newly anticipated flag. This must be backported up to 2.1.
2021-06-17 09:14:49 -04:00
const int reused = (s->flags & SF_SRV_REUSED) &&
!(s->flags & SF_SRV_REUSED_ANTICIPATED);
if (s->be->timeout.connect && s->be->timeout.connect < delay)
delay = s->be->timeout.connect;
if (!s->conn_err_type)
s->conn_err_type = STRM_ET_CONN_ERR;
/* only wait when we're retrying on the same server */
if ((sc->state == SC_ST_ASS ||
BUG/MINOR: backend: restore the SF_SRV_REUSED flag original purpose The SF_SRV_REUSED flag was set if a stream reused a backend connection. One of its purpose is to count the total reuse on the backend in opposition to newly instantiated connection. However, the flag was diverted from its original purpose since the following commit : e8f5f5d8b228d71333fb60229dc908505baf9222 BUG/MEDIUM: servers: Only set SF_SRV_REUSED if the connection if fully ready. With this change, the flag is not set anymore if the mux is not ready when a connection is picked for reuse. This can happen for multiplexed connections which are inserted in the available list as soon as created in http-reuse always mode. The goal of this change is to not retry immediately this request in case on an error on the same server if the reused connection is not fully ready. This change is justified for the retry timeout handling but it breaks other places which still uses the flag for its original purpose. Mainly, in this case the wrong 'connect' backend counter is incremented instead of the 'reuse' one. The flag is also used in http_return_srv_error and may have an impact if a http server error is replied for this stream. To fix this problem, the original purpose of the flag is restored by setting it unconditionaly when a connection is reused. Additionally, a new flag SF_SRV_REUSED_ANTICIPATED is created. This flag is set when the connection is reused but the mux is not ready yet. For the timeout handling on error, the request is retried immediately only if the stream reused a connection without this newly anticipated flag. This must be backported up to 2.1.
2021-06-17 09:14:49 -04:00
(s->be->srv_act <= 1)) && !reused) {
sc->state = SC_ST_TAR;
s->conn_exp = tick_add(now_ms, MS_TO_TICKS(delay));
}
DBG_TRACE_STATE("retry a new connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
end:
DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
/* This function is called with (sc->state == SC_ST_RDY) meaning that a
* connection was attempted, that the file descriptor is already allocated,
* and that it has succeeded. We must still check for errors and aborts.
* Possible output states are SC_ST_EST (established), SC_ST_CER (error),
* and SC_ST_DIS (abort). This only works with connection-based streams.
* Timeouts and errors are *not* cleared.
*/
void back_handle_st_rdy(struct stream *s)
{
struct stconn *sc = s->scb;
struct channel *req = &s->req;
DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
/* Here the appctx must exists because the SC was set to
* SC_ST_RDY state when the appctx was created.
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
*/
BUG_ON(!sc_appctx(s->scb));
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
if (!s->logs.request_ts)
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
s->logs.request_ts = now_ns;
s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
BUG/MEDIUM: conn-stream: Set back CS to RDY state when the appctx is created When an appctx is created on the server side, we now set the corresponding conn-stream to ready state (CS_ST_RDY). When it happens, the backend conn-stream is in CS_ST_INI state. It is not consistant to let the conn-stream in this state because it means it is possible to have a target installed in CS_ST_INI state, while with a connection, the conn-stream is switch to CS_ST_RDY or CS_ST_EST state. It is especially anbiguous because we may be tempted to think there is no endpoint attached to the conn-stream before the CS_ST_CON state. And it is indeed the reason for a bug leading to a crash because a cs_detach_endp() is performed if an abort is detected on the backend conn-stream in CS_ST_INI state. With a mux or a appctx attached to the conn-stream, "->endp" field is set to NULL. It is unexpected. The API will be changed to be sure it is not possible. But it exposes a consistency issue with applets. So, the conn-stream must not stay in CS_ST_INI state when an appctx is attached. But there is no reason to set it in CS_ST_REQ. The conn-stream must be set to CS_ST_RDY to handle applets and connections in the same way. Note that if only the target is set but no appctx is created, the backend conn-stream is switched from CS_ST_INI to CS_ST_REQ state to be able to create the corresponding appctx. This part is unchanged. This patch depends on the commit "MINOR: backend: Don't allow to change backend applet". The ambiguity exists on previous versions. But the issue is 2.6-specific. Thus, no backport is needed.
2022-04-21 05:52:07 -04:00
be_set_sess_last(s->be);
}
/* We know the connection at least succeeded, though it could have
* since met an error for any other reason. At least it didn't time out
* even though the timeout might have been reported right after success.
* We need to take care of various situations here :
* - everything might be OK. We have to switch to established.
* - an I/O error might have been reported after a successful transfer,
* which is not retryable and needs to be logged correctly, and needs
* established as well
* - SC_ST_CON implies !CF_WROTE_DATA but not conversely as we could
* have validated a connection with incoming data (e.g. TCP with a
* banner protocol), or just a successful connect() probe.
* - the client might have requested a connection abort, this needs to
* be checked before we decide to retry anything.
*/
/* it's still possible to handle client aborts or connection retries
* before any data were sent.
*/
if (!(req->flags & CF_WROTE_DATA)) {
/* client abort ? */
if ((s->scf->flags & SC_FL_SHUT_DONE) ||
((s->scb->flags & SC_FL_SHUT_WANTED) &&
(!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
/* give up */
sc->flags |= SC_FL_NOLINGER;
sc_shutdown(sc);
s->conn_err_type |= STRM_ET_CONN_ABRT;
if (s->srv_error)
s->srv_error(s, sc);
DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
/* retryable error ? */
if (sc->flags & SC_FL_ERROR) {
if (!s->conn_err_type)
s->conn_err_type = STRM_ET_CONN_ERR;
sc->state = SC_ST_CER;
DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
goto end;
}
}
/* data were sent and/or we had no error, back_establish() will
* now take over.
*/
DBG_TRACE_STATE("connection established", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
s->conn_err_type = STRM_ET_NONE;
sc->state = SC_ST_EST;
end:
DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
}
/* sends a log message when a backend goes down, and also sets last
* change date.
*/
void set_backend_down(struct proxy *be)
{
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
be->last_change = ns_to_sec(now_ns);
_HA_ATOMIC_INC(&be->down_trans);
if (!(global.mode & MODE_STARTING)) {
ha_alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
}
}
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
/* Apply RDP cookie persistence to the current stream. For this, the function
* tries to extract an RDP cookie from the request buffer, and look for the
* matching server in the list. If the server is found, it is assigned to the
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
* stream. This always returns 1, and the analyser removes itself from the
* list. Nothing is performed if a server was already assigned.
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
int tcp_persist_rdp_cookie(struct stream *s, struct channel *req, int an_bit)
{
struct proxy *px = s->be;
int ret;
struct sample smp;
struct server *srv = px->srv;
uint16_t port;
uint32_t addr;
char *p;
DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
if (s->flags & SF_ASSIGNED)
goto no_cookie;
memset(&smp, 0, sizeof(smp));
ret = fetch_rdp_cookie_name(s, &smp, s->be->rdp_cookie_name, s->be->rdp_cookie_len);
if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || smp.data.u.str.data == 0)
goto no_cookie;
/* Considering an rdp cookie detected using acl, str ended with <cr><lf> and should return.
* The cookie format is <ip> "." <port> where "ip" is the integer corresponding to the
* server's IP address in network order, and "port" is the integer corresponding to the
* server's port in network order. Comments please Emeric.
*/
addr = strtoul(smp.data.u.str.area, &p, 10);
if (*p != '.')
goto no_cookie;
p++;
port = ntohs(strtoul(p, &p, 10));
if (*p != '.')
goto no_cookie;
s->target = NULL;
while (srv) {
if (srv->addr.ss_family == AF_INET &&
port == srv->svc_port &&
addr == ((struct sockaddr_in *)&srv->addr)->sin_addr.s_addr) {
if ((srv->cur_state != SRV_ST_STOPPED) || (px->options & PR_O_PERSIST)) {
/* we found the server and it is usable */
s->flags |= SF_DIRECT | SF_ASSIGNED;
s->target = &srv->obj_type;
break;
}
}
srv = srv->next;
}
no_cookie:
req->analysers &= ~an_bit;
req->analyse_exp = TICK_ETERNITY;
DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
return 1;
}
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
int be_downtime(struct proxy *px) {
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
if (px->lbprm.tot_weight && px->last_change < ns_to_sec(now_ns)) // ignore negative time
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
return px->down_time;
MEDIUM: clock: replace timeval "now" with integer "now_ns" This puts an end to the occasional confusion between the "now" date that is internal, monotonic and not synchronized with the system's date, and "date" which is the system's date and not necessarily monotonic. Variable "now" was removed and replaced with a 64-bit integer "now_ns" which is a counter of nanoseconds. It wraps every 585 years, so if all goes well (i.e. if humanity does not need haproxy anymore in 500 years), it will just never wrap. This implies that now_ns is never nul and that the zero value can reliably be used as "not set yet" for a timestamp if needed. This will also simplify date checks where it becomes possible again to do "date1<date2". All occurrences of "tv_to_ns(&now)" were simply replaced by "now_ns". Due to the intricacies between now, global_now and now_offset, all 3 had to be turned to nanoseconds at once. It's not a problem since all of them were solely used in 3 functions in clock.c, but they make the patch look bigger than it really is. The clock_update_local_date() and clock_update_global_date() functions are now much simpler as there's no need anymore to perform conversions nor to round the timeval up or down. The wrapping continues to happen by presetting the internal offset in the short future so that the 32-bit now_ms continues to wrap 20 seconds after boot. The start_time used to calculate uptime can still be turned to nanoseconds now. One interrogation concerns global_now_ms which is used only for the freq counters. It's unclear whether there's more value in using two variables that need to be synchronized sequentially like today or to just use global_now_ns divided by 1 million. Both approaches will work equally well on modern systems, the difference might come from smaller ones. Better not change anyhting for now. One benefit of the new approach is that we now have an internal date with a resolution of the nanosecond and the precision of the microsecond, which can be useful to extend some measurements given that timestamps also have this resolution.
2023-04-28 03:16:15 -04:00
return ns_to_sec(now_ns) - px->last_change + px->down_time;
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
}
/*
* This function returns a string containing the balancing
* mode of the proxy in a format suitable for stats.
*/
const char *backend_lb_algo_str(int algo) {
if (algo == BE_LB_ALGO_RR)
return "roundrobin";
else if (algo == BE_LB_ALGO_SRR)
return "static-rr";
else if (algo == BE_LB_ALGO_FAS)
return "first";
else if (algo == BE_LB_ALGO_LC)
return "leastconn";
else if (algo == BE_LB_ALGO_SH)
return "source";
else if (algo == BE_LB_ALGO_UH)
return "uri";
else if (algo == BE_LB_ALGO_PH)
return "url_param";
else if (algo == BE_LB_ALGO_HH)
return "hdr";
else if (algo == BE_LB_ALGO_RCH)
return "rdp-cookie";
else if (algo == BE_LB_ALGO_SMP)
return "hash";
else if (algo == BE_LB_ALGO_NONE)
return "none";
else
return "unknown";
}
/* This function parses a "balance" statement in a backend section describing
* <curproxy>. It returns -1 if there is any error, otherwise zero. If it
* returns -1, it will write an error message into the <err> buffer which will
* automatically be allocated and must be passed as NULL. The trailing '\n'
* will not be written. The function must be called with <args> pointing to the
* first word after "balance".
*/
int backend_parse_balance(const char **args, char **err, struct proxy *curproxy)
{
if (!*(args[0])) {
/* if no option is set, use round-robin by default */
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_RR;
return 0;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[0], "roundrobin") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_RR;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "static-rr") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_SRR;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "first") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_FAS;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "leastconn") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_LC;
}
else if (!strncmp(args[0], "random", 6)) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_RND;
curproxy->lbprm.arg_opt1 = 2;
if (*(args[0] + 6) == '(' && *(args[0] + 7) != ')') { /* number of draws */
const char *beg;
char *end;
beg = args[0] + 7;
curproxy->lbprm.arg_opt1 = strtol(beg, &end, 0);
if (*end != ')') {
if (!*end)
memprintf(err, "random : missing closing parenthesis.");
else
memprintf(err, "random : unexpected character '%c' after argument.", *end);
return -1;
}
if (curproxy->lbprm.arg_opt1 < 1) {
memprintf(err, "random : number of draws must be at least 1.");
return -1;
}
}
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "source") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_SH;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "uri") == 0) {
int arg = 1;
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_UH;
curproxy->lbprm.arg_opt1 = 0; // "whole", "path-only"
curproxy->lbprm.arg_opt2 = 0; // "len"
curproxy->lbprm.arg_opt3 = 0; // "depth"
MINOR: balance uri: added 'whole' parameter to include query string in hash calculation This patch brings a new "whole" parameter to "balance uri" which makes the hash work over the whole uri, not just the part before the query string. Len and depth parameter are still honnored. The reason for this new feature is explained below. I have 3 backend servers, each accepting different form of HTTP queries: http://backend1.server.tld/service1.php?q=... http://backend1.server.tld/service2.php?q=... http://backend2.server.tld/index.php?query=...&subquery=... http://backend3.server.tld/image/49b8c0d9ff Each backend server returns a different response based on either: - the URI path (the left part of the URI before the question mark) - the query string (the right part of the URI after the question mark) - or the combination of both I wanted to set up a common caching cluster (using 6 Squid servers, each configured as reverse proxy for those 3 backends) and have HAProxy balance the queries among the Squid servers based on URL. I also wanted to achieve hight cache hit ration on each Squid server and send the same queries to the same Squid servers. Initially I was considering using the 'balance uri' algorithm, but that would not work as in case of backend2 all queries would go to only one Squid server. The 'balance url_param' would not work either as it would send the backend3 queries to only one Squid server. So I thought the simplest solution would be to use 'balance uri', but to calculate the hash based on the whole URI (URI path + query string), instead of just the URI path.
2012-05-19 05:19:54 -04:00
while (*args[arg]) {
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[arg], "len") == 0) {
if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
return -1;
}
curproxy->lbprm.arg_opt2 = atoi(args[arg+1]);
arg += 2;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[arg], "depth") == 0) {
if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
return -1;
}
/* hint: we store the position of the ending '/' (depth+1) so
* that we avoid a comparison while computing the hash.
*/
curproxy->lbprm.arg_opt3 = atoi(args[arg+1]) + 1;
arg += 2;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[arg], "whole") == 0) {
curproxy->lbprm.arg_opt1 |= 1;
MINOR: balance uri: added 'whole' parameter to include query string in hash calculation This patch brings a new "whole" parameter to "balance uri" which makes the hash work over the whole uri, not just the part before the query string. Len and depth parameter are still honnored. The reason for this new feature is explained below. I have 3 backend servers, each accepting different form of HTTP queries: http://backend1.server.tld/service1.php?q=... http://backend1.server.tld/service2.php?q=... http://backend2.server.tld/index.php?query=...&subquery=... http://backend3.server.tld/image/49b8c0d9ff Each backend server returns a different response based on either: - the URI path (the left part of the URI before the question mark) - the query string (the right part of the URI after the question mark) - or the combination of both I wanted to set up a common caching cluster (using 6 Squid servers, each configured as reverse proxy for those 3 backends) and have HAProxy balance the queries among the Squid servers based on URL. I also wanted to achieve hight cache hit ration on each Squid server and send the same queries to the same Squid servers. Initially I was considering using the 'balance uri' algorithm, but that would not work as in case of backend2 all queries would go to only one Squid server. The 'balance url_param' would not work either as it would send the backend3 queries to only one Squid server. So I thought the simplest solution would be to use 'balance uri', but to calculate the hash based on the whole URI (URI path + query string), instead of just the URI path.
2012-05-19 05:19:54 -04:00
arg += 1;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[arg], "path-only") == 0) {
curproxy->lbprm.arg_opt1 |= 2;
arg += 1;
}
else {
memprintf(err, "%s only accepts parameters 'len', 'depth', 'path-only', and 'whole' (got '%s').", args[0], args[arg]);
return -1;
}
}
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[0], "url_param") == 0) {
if (!*args[1]) {
memprintf(err, "%s requires an URL parameter name.", args[0]);
return -1;
}
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_PH;
free(curproxy->lbprm.arg_str);
curproxy->lbprm.arg_str = strdup(args[1]);
curproxy->lbprm.arg_len = strlen(args[1]);
if (*args[2]) {
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[2], "check_post") != 0) {
memprintf(err, "%s only accepts 'check_post' modifier (got '%s').", args[0], args[2]);
return -1;
}
}
}
else if (strcmp(args[0], "hash") == 0) {
if (!*args[1]) {
memprintf(err, "%s requires a sample expression.", args[0]);
return -1;
}
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_SMP;
ha_free(&curproxy->lbprm.arg_str);
curproxy->lbprm.arg_str = strdup(args[1]);
curproxy->lbprm.arg_len = strlen(args[1]);
if (*args[2]) {
memprintf(err, "%s takes no other argument (got '%s').", args[0], args[2]);
return -1;
}
}
else if (!strncmp(args[0], "hdr(", 4)) {
const char *beg, *end;
beg = args[0] + 4;
end = strchr(beg, ')');
if (!end || end == beg) {
memprintf(err, "hdr requires an http header field name.");
return -1;
}
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_HH;
free(curproxy->lbprm.arg_str);
curproxy->lbprm.arg_len = end - beg;
curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
curproxy->lbprm.arg_opt1 = 0;
if (*args[1]) {
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[1], "use_domain_only") != 0) {
memprintf(err, "%s only accepts 'use_domain_only' modifier (got '%s').", args[0], args[1]);
return -1;
}
curproxy->lbprm.arg_opt1 = 1;
}
}
else if (!strncmp(args[0], "rdp-cookie", 10)) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_RCH;
if ( *(args[0] + 10 ) == '(' ) { /* cookie name */
const char *beg, *end;
beg = args[0] + 11;
end = strchr(beg, ')');
if (!end || end == beg) {
memprintf(err, "rdp-cookie : missing cookie name.");
return -1;
}
free(curproxy->lbprm.arg_str);
curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
curproxy->lbprm.arg_len = end - beg;
}
else if ( *(args[0] + 10 ) == '\0' ) { /* default cookie name 'mstshash' */
free(curproxy->lbprm.arg_str);
curproxy->lbprm.arg_str = strdup("mstshash");
curproxy->lbprm.arg_len = strlen(curproxy->lbprm.arg_str);
}
else { /* syntax */
memprintf(err, "rdp-cookie : missing cookie name.");
return -1;
}
}
else if (strcmp(args[0], "log-hash") == 0) {
if (!*args[1]) {
memprintf(err, "%s requires a converter list.", args[0]);
return -1;
}
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_LH;
ha_free(&curproxy->lbprm.arg_str);
curproxy->lbprm.arg_str = strdup(args[1]);
}
else if (strcmp(args[0], "sticky") == 0) {
curproxy->lbprm.algo &= ~BE_LB_ALGO;
curproxy->lbprm.algo |= BE_LB_ALGO_LS;
}
else {
memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options.");
return -1;
}
return 0;
}
/************************************************************************/
/* All supported sample and ACL keywords must be declared here. */
/************************************************************************/
/* set temp integer to the number of enabled servers on the proxy.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_nbsrv(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = args->data.prx;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = be_usable_srv(px);
return 1;
}
/* report in smp->flags a success or failure depending on the designated
* server's state. There is no match function involved since there's no pattern.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_is_up(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *srv = args->data.srv;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_BOOL;
if (!(srv->cur_admin & SRV_ADMF_MAINT) &&
(!(srv->check.state & CHK_ST_CONFIGURED) || (srv->cur_state != SRV_ST_STOPPED)))
smp->data.u.sint = 1;
else
smp->data.u.sint = 0;
return 1;
}
/* set temp integer to the number of enabled servers on the proxy.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_connslots(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *iterator;
struct proxy *px = args->data.prx;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = 0;
for (iterator = px->srv; iterator; iterator = iterator->next) {
if (iterator->cur_state == SRV_ST_STOPPED)
continue;
if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
/* configuration is stupid */
smp->data.u.sint = -1; /* FIXME: stupid value! */
return 1;
}
smp->data.u.sint += (iterator->maxconn - iterator->cur_sess)
+ (iterator->maxqueue - iterator->queue.length);
}
return 1;
}
/* set temp integer to the id of the backend */
static int
smp_fetch_be_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = NULL;
if (smp->strm)
px = smp->strm->be;
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
px = __objt_check(smp->sess->origin)->proxy;
if (!px)
return 0;
smp->flags = SMP_F_VOL_TXN;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = px->uuid;
return 1;
}
/* set string to the name of the backend */
static int
smp_fetch_be_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = NULL;
if (smp->strm)
px = smp->strm->be;
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
px = __objt_check(smp->sess->origin)->proxy;
if (!px)
return 0;
smp->data.u.str.area = (char *)px->id;
if (!smp->data.u.str.area)
return 0;
smp->data.type = SMP_T_STR;
smp->flags = SMP_F_CONST;
smp->data.u.str.data = strlen(smp->data.u.str.area);
return 1;
}
/* set temp integer to the id of the server */
static int
smp_fetch_srv_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *srv = NULL;
if (smp->strm)
srv = objt_server(smp->strm->target);
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
srv = __objt_check(smp->sess->origin)->server;
if (!srv)
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = srv->puid;
return 1;
}
/* set string to the name of the server */
static int
smp_fetch_srv_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *srv = NULL;
if (smp->strm)
srv = objt_server(smp->strm->target);
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
srv = __objt_check(smp->sess->origin)->server;
if (!srv)
return 0;
smp->data.u.str.area = srv->id;
if (!smp->data.u.str.area)
return 0;
smp->data.type = SMP_T_STR;
smp->data.u.str.data = strlen(smp->data.u.str.area);
return 1;
}
/* set temp integer to the number of connections per second reaching the backend.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_be_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = args->data.prx;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = read_freq_ctr(&px->be_sess_per_sec);
return 1;
}
/* set temp integer to the number of concurrent connections on the backend.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_be_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = args->data.prx;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = px->beconn;
return 1;
}
/* set temp integer to the number of available connections across available
* servers on the backend.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_be_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *iterator;
struct proxy *px = args->data.prx;
unsigned int maxconn;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = 0;
for (iterator = px->srv; iterator; iterator = iterator->next) {
if (iterator->cur_state == SRV_ST_STOPPED)
continue;
px = iterator->proxy;
if (!srv_currently_usable(iterator) ||
((iterator->flags & SRV_F_BACKUP) &&
(px->srv_act || (iterator != px->lbprm.fbck && !(px->options & PR_O_USE_ALL_BK)))))
continue;
if (iterator->maxconn == 0) {
/* one active server is unlimited, return -1 */
smp->data.u.sint = -1;
return 1;
}
maxconn = srv_dynamic_maxconn(iterator);
if (maxconn > iterator->cur_sess)
smp->data.u.sint += maxconn - iterator->cur_sess;
}
return 1;
}
/* set temp integer to the total number of queued connections on the backend.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = args->data.prx;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = px->totpend;
return 1;
}
/* set temp integer to the total number of queued connections on the backend divided
* by the number of running servers and rounded up. If there is no running
* server, we return twice the total, just as if we had half a running server.
* This is more or less correct anyway, since we expect the last server to come
* back soon.
* Accepts exactly 1 argument. Argument is a backend, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_avg_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct proxy *px = args->data.prx;
int nbsrv;
if (px == NULL)
return 0;
if (px->cap & PR_CAP_DEF)
px = smp->px;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
nbsrv = be_usable_srv(px);
if (nbsrv > 0)
smp->data.u.sint = (px->totpend + nbsrv - 1) / nbsrv;
else
smp->data.u.sint = px->totpend * 2;
return 1;
}
/* set temp integer to the number of concurrent connections on the server in the backend.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = args->data.srv->cur_sess;
return 1;
}
/* set temp integer to the number of available connections on the server in the backend.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
unsigned int maxconn;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
if (args->data.srv->maxconn == 0) {
/* one active server is unlimited, return -1 */
smp->data.u.sint = -1;
return 1;
}
maxconn = srv_dynamic_maxconn(args->data.srv);
if (maxconn > args->data.srv->cur_sess)
smp->data.u.sint = maxconn - args->data.srv->cur_sess;
else
smp->data.u.sint = 0;
return 1;
}
/* set temp integer to the number of connections pending in the server's queue.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_queue(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = args->data.srv->queue.length;
return 1;
}
/* set temp integer to the number of enabled servers on the proxy.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = read_freq_ctr(&args->data.srv->sess_per_sec);
return 1;
}
/* set temp integer to the server weight.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_weight(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct server *srv = args->data.srv;
struct proxy *px = srv->proxy;
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = (srv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv;
return 1;
}
/* set temp integer to the server initial weight.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_iweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = args->data.srv->iweight;
return 1;
}
/* set temp integer to the server user-specified weight.
* Accepts exactly 1 argument. Argument is a server, other types will lead to
* undefined behaviour.
*/
static int
smp_fetch_srv_uweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
smp->flags = SMP_F_VOL_TEST;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = args->data.srv->uweight;
return 1;
}
static int
smp_fetch_be_server_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
{
struct proxy *px = NULL;
if (smp->strm)
px = smp->strm->be;
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
px = __objt_check(smp->sess->origin)->proxy;
if (!px)
return 0;
smp->flags = SMP_F_VOL_TXN;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = TICKS_TO_MS(px->timeout.server);
return 1;
}
static int
smp_fetch_be_tunnel_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
{
struct proxy *px = NULL;
if (smp->strm)
px = smp->strm->be;
else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
px = __objt_check(smp->sess->origin)->proxy;
if (!px)
return 0;
smp->flags = SMP_F_VOL_TXN;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = TICKS_TO_MS(px->timeout.tunnel);
return 1;
}
static int sample_conv_nbsrv(const struct arg *args, struct sample *smp, void *private)
{
struct proxy *px;
if (!smp_make_safe(smp))
return 0;
px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
if (!px)
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = be_usable_srv(px);
return 1;
}
static int
sample_conv_srv_queue(const struct arg *args, struct sample *smp, void *private)
{
struct proxy *px;
struct server *srv;
char *bksep;
if (!smp_make_safe(smp))
return 0;
bksep = strchr(smp->data.u.str.area, '/');
if (bksep) {
*bksep = '\0';
px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
if (!px)
return 0;
smp->data.u.str.area = bksep + 1;
} else {
if (!(smp->px->cap & PR_CAP_BE))
return 0;
px = smp->px;
}
srv = server_find_by_name(px, smp->data.u.str.area);
if (!srv)
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = srv->queue.length;
return 1;
}
/* Note: must not be declared <const> as its list will be overwritten.
* Please take care of keeping this list alphabetically sorted.
*/
static struct sample_fetch_kw_list smp_kws = {ILH, {
{ "avg_queue", smp_fetch_avg_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "be_conn", smp_fetch_be_conn, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "be_conn_free", smp_fetch_be_conn_free, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "be_id", smp_fetch_be_id, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
{ "be_name", smp_fetch_be_name, 0, NULL, SMP_T_STR, SMP_USE_BKEND, },
{ "be_server_timeout", smp_fetch_be_server_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
{ "be_sess_rate", smp_fetch_be_sess_rate, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "be_tunnel_timeout", smp_fetch_be_tunnel_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
{ "connslots", smp_fetch_connslots, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "nbsrv", smp_fetch_nbsrv, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "queue", smp_fetch_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_conn", smp_fetch_srv_conn, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_conn_free", smp_fetch_srv_conn_free, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_id", smp_fetch_srv_id, 0, NULL, SMP_T_SINT, SMP_USE_SERVR, },
{ "srv_is_up", smp_fetch_srv_is_up, ARG1(1,SRV), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
{ "srv_name", smp_fetch_srv_name, 0, NULL, SMP_T_STR, SMP_USE_SERVR, },
{ "srv_queue", smp_fetch_srv_queue, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_sess_rate", smp_fetch_srv_sess_rate, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_weight", smp_fetch_srv_weight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_iweight", smp_fetch_srv_iweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ "srv_uweight", smp_fetch_srv_uweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
{ /* END */ },
}};
INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
/* Note: must not be declared <const> as its list will be overwritten */
static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "nbsrv", sample_conv_nbsrv, 0, NULL, SMP_T_STR, SMP_T_SINT },
{ "srv_queue", sample_conv_srv_queue, 0, NULL, SMP_T_STR, SMP_T_SINT },
{ /* END */ },
}};
INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
/* Note: must not be declared <const> as its list will be overwritten.
* Please take care of keeping this list alphabetically sorted.
*/
static struct acl_kw_list acl_kws = {ILH, {
{ /* END */ },
}};
INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/