haproxy/src/connection.c

2786 lines
82 KiB
C
Raw Normal View History

/*
* Connection management functions
*
* Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <errno.h>
#include <import/ebmbtree.h>
#include <haproxy/api.h>
#include <haproxy/arg.h>
#include <haproxy/cfgparse.h>
#include <haproxy/connection.h>
#include <haproxy/fd.h>
#include <haproxy/frontend.h>
#include <haproxy/hash.h>
#include <haproxy/list.h>
#include <haproxy/log.h>
#include <haproxy/namespace.h>
#include <haproxy/net_helper.h>
#include <haproxy/proto_rhttp.h>
#include <haproxy/proto_tcp.h>
#include <haproxy/sample.h>
#include <haproxy/sc_strm.h>
#include <haproxy/server.h>
#include <haproxy/session.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/stconn.h>
#include <haproxy/tools.h>
#include <haproxy/xxhash.h>
DECLARE_POOL(pool_head_connection, "connection", sizeof(struct connection));
DECLARE_POOL(pool_head_conn_hash_node, "conn_hash_node", sizeof(struct conn_hash_node));
DECLARE_POOL(pool_head_sockaddr, "sockaddr", sizeof(struct sockaddr_storage));
DECLARE_POOL(pool_head_pp_tlv_128, "pp_tlv_128", sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_128);
DECLARE_POOL(pool_head_pp_tlv_256, "pp_tlv_256", sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_256);
struct idle_conns idle_conns[MAX_THREADS] = { };
struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };
/* List head of all known muxes for PROTO */
struct mux_proto_list mux_proto_list = {
.list = LIST_HEAD_INIT(mux_proto_list.list)
};
struct mux_stopping_data mux_stopping_data[MAX_THREADS];
/* disables sending of proxy-protocol-v2's LOCAL command */
static int pp2_never_send_local;
/* find the value of a received TLV for a given type */
struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
{
struct conn_tlv_list *tlv = NULL;
if (!conn)
return NULL;
list_for_each_entry(tlv, &conn->tlv_list, list) {
if (tlv->type == type)
return tlv;
}
return NULL;
}
/* Remove <conn> idle connection from its attached tree (idle, safe or avail).
* If also present in the secondary server idle list, conn is removed from it.
*
* Must be called with idle_conns_lock held.
*/
void conn_delete_from_tree(struct connection *conn)
{
LIST_DEL_INIT(&conn->idle_list);
eb64_delete(&conn->hash_node->node);
}
int conn_create_mux(struct connection *conn)
{
if (conn_is_back(conn)) {
struct server *srv;
struct stconn *sc = conn->ctx;
struct session *sess = conn->owner;
if (conn->flags & CO_FL_ERROR)
goto fail;
if (sess && obj_type(sess->origin) == OBJ_TYPE_CHECK) {
if (conn_install_mux_chk(conn, conn->ctx, sess) < 0)
goto fail;
}
else if (conn_install_mux_be(conn, conn->ctx, sess, NULL) < 0)
goto fail;
srv = objt_server(conn->target);
/* If we're doing http-reuse always, and the connection is not
* private with available streams (an http2 connection), add it
* to the available list, so that others can use it right
* away. If the connection is private, add it in the session
* server list.
*/
if (srv && ((srv->proxy->options & PR_O_REUSE_MASK) == PR_O_REUSE_ALWS) &&
!(conn->flags & CO_FL_PRIVATE) && conn->mux->avail_streams(conn) > 0) {
srv_add_to_avail_list(srv, conn);
}
else if (conn->flags & CO_FL_PRIVATE) {
/* If it fail now, the same will be done in mux->detach() callback */
session_add_conn(sess, conn, conn->target);
}
return 0;
fail:
/* let the upper layer know the connection failed */
if (sc) {
sc->app_ops->wake(sc);
}
else if (conn_reverse_in_preconnect(conn)) {
struct listener *l = conn_active_reverse_listener(conn);
/* If mux init failed, consider connection on error.
* This is necessary to ensure connection is freed by
* proto-rhttp receiver task.
*/
if (!conn->mux)
conn->flags |= CO_FL_ERROR;
/* If connection is interrupted without CO_FL_ERROR, receiver task won't free it. */
BUG_ON(!(conn->flags & CO_FL_ERROR));
task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
}
return -1;
} else
return conn_complete_session(conn);
}
/* This is used at the end of the socket IOCB to possibly create the mux if it
* was not done yet, or wake it up if flags changed compared to old_flags or if
* need_wake insists on this. It returns <0 if the connection was destroyed and
* must not be used, >=0 otherwise.
*/
int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
{
int ret = 0;
/* If we don't yet have a mux, that means we were waiting for
* information to create one, typically from the ALPN. If we're
* done with the handshake, attempt to create one.
*/
if (unlikely(!conn->mux) && !(conn->flags & CO_FL_WAIT_XPRT)) {
ret = conn_create_mux(conn);
if (ret < 0)
goto done;
}
/* The wake callback is normally used to notify the data layer about
* data layer activity (successful send/recv), connection establishment,
* shutdown and fatal errors. We need to consider the following
* situations to wake up the data layer :
* - change among the CO_FL_NOTIFY_DONE flags :
* SOCK_{RD,WR}_SH, ERROR,
* - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
* end of handshake and transition to CONNECTED
* - raise of CONNECTED with HANDSHAKE down
* - end of HANDSHAKE with CONNECTED set
* - regular data layer activity
*
* One tricky case is the wake up on read0 or error on an idle
* backend connection, that can happen on a connection that is still
* polled while at the same moment another thread is about to perform a
* takeover. The solution against this is to remove the connection from
* the idle list if it was in it, and possibly reinsert it at the end
* if the connection remains valid. The cost is non-null (locked tree
* removal) but remains low given that this is extremely rarely called.
* In any case it's guaranteed by the FD's thread_mask that we're
* called from the same thread the connection is queued in.
*
* Note that the wake callback is allowed to release the connection and
* the fd (and return < 0 in this case).
*/
if ((forced_wake ||
((conn->flags ^ old_flags) & CO_FL_NOTIFY_DONE) ||
((old_flags & CO_FL_WAIT_XPRT) && !(conn->flags & CO_FL_WAIT_XPRT))) &&
conn->mux && conn->mux->wake) {
BUG/MAJOR: connection: make sure to always remove a connection from the tree Since commit 5afcb686b ("MAJOR: connection: purge idle conn by last usage") in 2.9-dev4, the test on conn->toremove_list added to conn_get_idle_flag() in 2.8 by commit 3a7b539b1 ("BUG/MEDIUM: connection: Preserve flags when a conn is removed from an idle list") becomes misleading. Indeed, now both toremove_list and idle_list are shared by a union since the presence in these lists is mutually exclusive. However, in conn_get_idle_flag() we check for the presence in the toremove_list to decide whether or not to delete the connection from the tree. This test now fails because instead it sees the presence in the idle or safe list via the union, and concludes the element must not be removed. Thus the element remains in the tree and can be found later after the connection is released, causing crashes that Tristan reported in issue #2292. The following config is sufficient to reproduce it with 2 threads: defaults mode http timeout client 5s timeout server 5s timeout connect 1s listen front bind :8001 server next 127.0.0.1:8002 frontend next bind :8002 timeout http-keep-alive 1 http-request redirect location / Sending traffic with a few concurrent connections and some short timeouts suffices to instantly crash it after ~10k reqs: $ h2load -t 4 -c 16 -n 10000 -m 1 -w 1 http://0:8001/ With Amaury we analyzed the conditions in which the function is called in order to figure a better condition for the test and concluded that ->toremove_list is never filled there so we can safely remove that part from the test and just move the flag retrieval back to what it was prior to the 2.8 patch above. Note that the patch is not reverted though, as the parts that would drop the unexpected flags removal are unchanged. This patch must NOT be backported. The code in 2.8 works correctly, it's only the change in 2.9 that makes it misbehave.
2023-10-12 08:01:49 -04:00
uint conn_in_list = conn->flags & CO_FL_LIST_MASK;
struct server *srv = objt_server(conn->target);
if (conn_in_list) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
ret = conn->mux->wake(conn);
if (ret < 0)
goto done;
if (conn_in_list) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
_srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
}
done:
return ret;
}
/* Change the mux for the connection.
* The caller should make sure he's not subscribed to the underlying XPRT.
*/
int conn_upgrade_mux_fe(struct connection *conn, void *ctx, struct buffer *buf,
struct ist mux_proto, int mode)
{
struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
const struct mux_ops *old_mux, *new_mux;
void *old_mux_ctx;
const char *alpn_str = NULL;
int alpn_len = 0;
if (!mux_proto.len) {
conn_get_alpn(conn, &alpn_str, &alpn_len);
mux_proto = ist2(alpn_str, alpn_len);
}
new_mux = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
old_mux = conn->mux;
/* No mux found */
if (!new_mux)
return -1;
/* Same mux, nothing to do */
if (old_mux == new_mux)
return 0;
old_mux_ctx = conn->ctx;
conn->mux = new_mux;
conn->ctx = ctx;
if (new_mux->init(conn, bind_conf->frontend, conn->owner, buf) == -1) {
/* The mux upgrade failed, so restore the old mux */
conn->ctx = old_mux_ctx;
conn->mux = old_mux;
return -1;
}
/* The mux was upgraded, destroy the old one */
*buf = BUF_NULL;
old_mux->destroy(old_mux_ctx);
return 0;
}
/* installs the best mux for incoming connection <conn> using the upper context
* <ctx>. If the mux protocol is forced, we use it to find the best
* mux. Otherwise we use the ALPN name, if any. Returns < 0 on error.
*/
int conn_install_mux_fe(struct connection *conn, void *ctx)
{
struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
const struct mux_ops *mux_ops;
if (bind_conf->mux_proto)
mux_ops = bind_conf->mux_proto->mux;
else {
struct ist mux_proto;
const char *alpn_str = NULL;
int alpn_len = 0;
int mode;
if (bind_conf->frontend->mode == PR_MODE_HTTP)
mode = PROTO_MODE_HTTP;
else
mode = PROTO_MODE_TCP;
conn_get_alpn(conn, &alpn_str, &alpn_len);
mux_proto = ist2(alpn_str, alpn_len);
mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
if (!mux_ops)
return -1;
}
/* Ensure a valid protocol is selected if connection is targeted by a
* tcp-request session attach-srv rule.
*/
if (conn->reverse.target && !(mux_ops->flags & MX_FL_REVERSABLE)) {
conn->err_code = CO_ER_REVERSE;
return -1;
}
return conn_install_mux(conn, mux_ops, ctx, bind_conf->frontend, conn->owner);
}
/* installs the best mux for outgoing connection <conn> using the upper context
* <ctx>. If the server mux protocol is forced, we use it to find the best mux.
* It's also possible to specify an alternative mux protocol <force_mux_ops>,
* in which case it will be used instead of the default server mux protocol.
*
* Returns < 0 on error.
*/
int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess,
const struct mux_ops *force_mux_ops)
{
struct server *srv = objt_server(conn->target);
struct proxy *prx = objt_proxy(conn->target);
const struct mux_ops *mux_ops;
if (srv)
prx = srv->proxy;
if (!prx) // target must be either proxy or server
return -1;
if (srv && srv->mux_proto && likely(!force_mux_ops)) {
mux_ops = srv->mux_proto->mux;
}
else if (srv && unlikely(force_mux_ops)) {
mux_ops = force_mux_ops;
}
else {
struct ist mux_proto;
const char *alpn_str = NULL;
int alpn_len = 0;
int mode;
if (prx->mode == PR_MODE_HTTP)
mode = PROTO_MODE_HTTP;
else
mode = PROTO_MODE_TCP;
conn_get_alpn(conn, &alpn_str, &alpn_len);
mux_proto = ist2(alpn_str, alpn_len);
mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
if (!mux_ops)
return -1;
}
return conn_install_mux(conn, mux_ops, ctx, prx, sess);
}
/* installs the best mux for outgoing connection <conn> for a check using the
* upper context <ctx>. If the mux protocol is forced by the check, we use it to
* find the best mux. Returns < 0 on error.
*/
int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess)
{
struct check *check = objt_check(sess->origin);
struct server *srv = objt_server(conn->target);
struct proxy *prx = objt_proxy(conn->target);
const struct mux_ops *mux_ops;
if (!check) // Check must be defined
return -1;
if (srv)
prx = srv->proxy;
if (!prx) // target must be either proxy or server
return -1;
if (check->mux_proto)
mux_ops = check->mux_proto->mux;
else {
struct ist mux_proto;
const char *alpn_str = NULL;
int alpn_len = 0;
int mode;
if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK)
mode = PROTO_MODE_HTTP;
else
mode = PROTO_MODE_TCP;
conn_get_alpn(conn, &alpn_str, &alpn_len);
mux_proto = ist2(alpn_str, alpn_len);
mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
if (!mux_ops)
return -1;
}
return conn_install_mux(conn, mux_ops, ctx, prx, sess);
}
/* Set the ALPN of connection <conn> to <alpn>. If force is false, <alpn> must
* be a subset or identical to the registered protos for the parent SSL_CTX.
* In this case <alpn> must be a single protocol value, not a list.
*
* Returns 0 if ALPN is updated else -1.
*/
int conn_update_alpn(struct connection *conn, const struct ist alpn, int force)
{
#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
size_t alpn_len = istlen(alpn);
char *ctx_alpn_str = NULL;
int ctx_alpn_len = 0, found = 0;
/* if not force, first search if alpn is a subset or identical to the
* parent SSL_CTX.
*/
if (!force) {
/* retrieve the SSL_CTX according to the connection side. */
if (conn_is_back(conn)) {
if (obj_type(conn->target) == OBJ_TYPE_SERVER) {
struct server *srv = __objt_server(conn->target);
ctx_alpn_str = srv->ssl_ctx.alpn_str;
ctx_alpn_len = srv->ssl_ctx.alpn_len;
}
}
else {
struct session *sess = conn->owner;
struct listener *li = sess->listener;
if (li->bind_conf && li->bind_conf->options & BC_O_USE_SSL) {
ctx_alpn_str = li->bind_conf->ssl_conf.alpn_str;
ctx_alpn_len = li->bind_conf->ssl_conf.alpn_len;
}
}
if (ctx_alpn_str) {
/* search if ALPN is present in SSL_CTX ALPN before
* using it.
*/
while (ctx_alpn_len) {
/* skip ALPN whose size is not 8 */
if (*ctx_alpn_str != alpn_len - 1) {
ctx_alpn_len -= *ctx_alpn_str + 1;
}
else {
if (isteqi(ist2(ctx_alpn_str, alpn_len), alpn)) {
found = 1;
break;
}
}
ctx_alpn_str += *ctx_alpn_str + 1;
/* This indicates an invalid ALPN formatted
* string and should never happen. */
BUG_ON(ctx_alpn_len < 0);
}
}
}
if (found || force) {
ssl_sock_set_alpn(conn, (const uchar *)istptr(alpn), istlen(alpn));
return 0;
}
#endif
return -1;
}
/* Initializes all required fields for a new connection. Note that it does the
* minimum acceptable initialization for a connection that already exists and
* is about to be reused. It also leaves the addresses untouched, which makes
* it usable across connection retries to reset a connection to a known state.
*/
void conn_init(struct connection *conn, void *target)
{
conn->obj_type = OBJ_TYPE_CONN;
conn->flags = CO_FL_NONE;
conn->mux = NULL;
conn->ctx = NULL;
conn->owner = NULL;
conn->send_proxy_ofs = 0;
conn->handle.fd = DEAD_FD_MAGIC;
conn->err_code = CO_ER_NONE;
conn->target = target;
conn->destroy_cb = NULL;
conn->proxy_netns = NULL;
MT_LIST_INIT(&conn->toremove_list);
if (conn_is_back(conn))
LIST_INIT(&conn->session_list);
else
LIST_INIT(&conn->stopping_list);
LIST_INIT(&conn->tlv_list);
conn->subs = NULL;
conn->src = NULL;
conn->dst = NULL;
conn->hash_node = NULL;
conn->xprt = NULL;
conn->reverse.target = NULL;
conn->reverse.name = BUF_NULL;
}
/* Initialize members used for backend connections.
*
* Returns 0 on success else non-zero.
*/
static int conn_backend_init(struct connection *conn)
{
if (!sockaddr_alloc(&conn->dst, 0, 0))
return 1;
conn->hash_node = conn_alloc_hash_node(conn);
if (unlikely(!conn->hash_node))
return 1;
return 0;
}
/* Release connection elements reserved for backend side usage. It also takes
* care to detach it if linked to a session or a server instance.
*
* This function is useful when freeing a connection or reversing it to the
* frontend side.
*/
static void conn_backend_deinit(struct connection *conn)
{
/* If the connection is owned by the session, remove it from its list
*/
if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) {
session_unown_conn(conn->owner, conn);
}
else if (!(conn->flags & CO_FL_PRIVATE)) {
if (obj_type(conn->target) == OBJ_TYPE_SERVER)
srv_release_conn(__objt_server(conn->target), conn);
}
/* Make sure the connection is not left in the idle connection tree */
if (conn->hash_node != NULL)
BUG_ON(conn->hash_node->node.node.leaf_p != NULL);
pool_free(pool_head_conn_hash_node, conn->hash_node);
conn->hash_node = NULL;
}
/* Tries to allocate a new connection and initialized its main fields. The
* connection is returned on success, NULL on failure. The connection must
* be released using pool_free() or conn_free().
*/
struct connection *conn_new(void *target)
{
struct connection *conn;
conn = pool_alloc(pool_head_connection);
if (unlikely(!conn))
return NULL;
conn_init(conn, target);
if (conn_is_back(conn)) {
if (obj_type(target) == OBJ_TYPE_SERVER)
srv_use_conn(__objt_server(target), conn);
if (conn_backend_init(conn)) {
conn_free(conn);
return NULL;
}
}
return conn;
}
/* Releases a connection previously allocated by conn_new() */
void conn_free(struct connection *conn)
{
struct conn_tlv_list *tlv, *tlv_back = NULL;
if (conn_is_back(conn))
conn_backend_deinit(conn);
/* Remove the conn from toremove_list.
*
* This is needed to prevent a double-free in case the connection was
* already scheduled from cleaning but is freed before via another
* call.
*/
MT_LIST_DELETE(&conn->toremove_list);
sockaddr_free(&conn->src);
sockaddr_free(&conn->dst);
/* Free all previously allocated TLVs */
list_for_each_entry_safe(tlv, tlv_back, &conn->tlv_list, list) {
LIST_DELETE(&tlv->list);
if (tlv->len > HA_PP2_TLV_VALUE_256)
free(tlv);
else if (tlv->len <= HA_PP2_TLV_VALUE_128)
pool_free(pool_head_pp_tlv_128, tlv);
else
pool_free(pool_head_pp_tlv_256, tlv);
}
ha_free(&conn->reverse.name.area);
MEDIUM: proto_reverse_connect: bootstrap active reverse connection Implement active reverse connection initialization. This is done through a new task stored in the receiver structure. This task is instantiated via bind callback and first woken up via enable callback. Task handler is separated into two halves. On the first step, a new connection is allocated and stored in <pend_conn> member of the receiver. This new client connection will proceed to connect using the server instance referenced in the bind_conf. When connect has successfully been executed and HTTP/2 connection is ready for exchange after SETTINGS, reverse_connect task is woken up. As <pend_conn> is still set, the second halve is executed which only execute listener_accept(). This will in turn execute accept_conn callback which is defined to return the pending connection. The task is automatically requeued inside accept_conn callback if bind maxconn is not yet reached. This allows to specify how many connection should be opened. Each connection is instantiated and reversed serially one by one until maxconn is reached. conn_free() has been modified to handle failure if a reverse connection fails before being accepted. In this case, no session exists to notify about the failure. Instead, reverse_connect task is requeud with a 1 second delay, giving time to fix a possible network issue. This will allow to attempt a new connection reverse. Note that for the moment connection rebinding after accept is disabled for simplicity. Extra operations are required to migrate an existing connection and its stack to a new thread which will be implemented later.
2023-08-23 11:16:07 -04:00
if (conn_reverse_in_preconnect(conn)) {
struct listener *l = conn_active_reverse_listener(conn);
rhttp_notify_preconn_err(l);
HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
MEDIUM: proto_reverse_connect: bootstrap active reverse connection Implement active reverse connection initialization. This is done through a new task stored in the receiver structure. This task is instantiated via bind callback and first woken up via enable callback. Task handler is separated into two halves. On the first step, a new connection is allocated and stored in <pend_conn> member of the receiver. This new client connection will proceed to connect using the server instance referenced in the bind_conf. When connect has successfully been executed and HTTP/2 connection is ready for exchange after SETTINGS, reverse_connect task is woken up. As <pend_conn> is still set, the second halve is executed which only execute listener_accept(). This will in turn execute accept_conn callback which is defined to return the pending connection. The task is automatically requeued inside accept_conn callback if bind maxconn is not yet reached. This allows to specify how many connection should be opened. Each connection is instantiated and reversed serially one by one until maxconn is reached. conn_free() has been modified to handle failure if a reverse connection fails before being accepted. In this case, no session exists to notify about the failure. Instead, reverse_connect task is requeud with a 1 second delay, giving time to fix a possible network issue. This will allow to attempt a new connection reverse. Note that for the moment connection rebinding after accept is disabled for simplicity. Extra operations are required to migrate an existing connection and its stack to a new thread which will be implemented later.
2023-08-23 11:16:07 -04:00
}
else if (conn->flags & CO_FL_REVERSED) {
HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
}
MEDIUM: proto_reverse_connect: bootstrap active reverse connection Implement active reverse connection initialization. This is done through a new task stored in the receiver structure. This task is instantiated via bind callback and first woken up via enable callback. Task handler is separated into two halves. On the first step, a new connection is allocated and stored in <pend_conn> member of the receiver. This new client connection will proceed to connect using the server instance referenced in the bind_conf. When connect has successfully been executed and HTTP/2 connection is ready for exchange after SETTINGS, reverse_connect task is woken up. As <pend_conn> is still set, the second halve is executed which only execute listener_accept(). This will in turn execute accept_conn callback which is defined to return the pending connection. The task is automatically requeued inside accept_conn callback if bind maxconn is not yet reached. This allows to specify how many connection should be opened. Each connection is instantiated and reversed serially one by one until maxconn is reached. conn_free() has been modified to handle failure if a reverse connection fails before being accepted. In this case, no session exists to notify about the failure. Instead, reverse_connect task is requeud with a 1 second delay, giving time to fix a possible network issue. This will allow to attempt a new connection reverse. Note that for the moment connection rebinding after accept is disabled for simplicity. Extra operations are required to migrate an existing connection and its stack to a new thread which will be implemented later.
2023-08-23 11:16:07 -04:00
conn_force_unsubscribe(conn);
pool_free(pool_head_connection, conn);
}
struct conn_hash_node *conn_alloc_hash_node(struct connection *conn)
{
struct conn_hash_node *hash_node = NULL;
hash_node = pool_zalloc(pool_head_conn_hash_node);
if (unlikely(!hash_node))
return NULL;
hash_node->conn = conn;
return hash_node;
}
/* Allocates a struct sockaddr from the pool if needed, assigns it to *sap and
* returns it. If <sap> is NULL, the address is always allocated and returned.
* if <sap> is non-null, an address will only be allocated if it points to a
* non-null pointer. In this case the allocated address will be assigned there.
* If <orig> is non-null and <len> positive, the address in <sa> will be copied
* into the allocated address. In both situations the new pointer is returned.
*/
struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len)
{
struct sockaddr_storage *sa;
if (sap && *sap)
return *sap;
sa = pool_alloc(pool_head_sockaddr);
if (sa && orig && len > 0)
memcpy(sa, orig, len);
if (sap)
*sap = sa;
return sa;
}
/* Releases the struct sockaddr potentially pointed to by <sap> to the pool. It
* may be NULL or may point to NULL. If <sap> is not NULL, a NULL is placed
* there.
*/
void sockaddr_free(struct sockaddr_storage **sap)
{
if (!sap)
return;
pool_free(pool_head_sockaddr, *sap);
*sap = NULL;
}
/* Try to add a handshake pseudo-XPRT. If the connection's first XPRT is
* raw_sock, then just use the new XPRT as the connection XPRT, otherwise
* call the xprt's add_xprt() method.
* Returns 0 on success, or non-zero on failure.
*/
int xprt_add_hs(struct connection *conn)
{
void *xprt_ctx = NULL;
const struct xprt_ops *ops = xprt_get(XPRT_HANDSHAKE);
void *nextxprt_ctx = NULL;
const struct xprt_ops *nextxprt_ops = NULL;
if (conn->flags & CO_FL_ERROR)
return -1;
if (ops->init(conn, &xprt_ctx) < 0)
return -1;
if (conn->xprt == xprt_get(XPRT_RAW)) {
nextxprt_ctx = conn->xprt_ctx;
nextxprt_ops = conn->xprt;
conn->xprt_ctx = xprt_ctx;
conn->xprt = ops;
} else {
if (conn->xprt->add_xprt(conn, conn->xprt_ctx, xprt_ctx, ops,
&nextxprt_ctx, &nextxprt_ops) != 0) {
ops->close(conn, xprt_ctx);
return -1;
}
}
if (ops->add_xprt(conn, xprt_ctx, nextxprt_ctx, nextxprt_ops, NULL, NULL) != 0) {
ops->close(conn, xprt_ctx);
return -1;
}
return 0;
}
/* returns a human-readable error code for conn->err_code, or NULL if the code
* is unknown.
*/
const char *conn_err_code_str(struct connection *c)
{
switch (c->err_code) {
case CO_ER_NONE: return "Success";
case CO_ER_CONF_FDLIM: return "Reached configured maxconn value";
case CO_ER_PROC_FDLIM: return "Too many sockets on the process";
case CO_ER_SYS_FDLIM: return "Too many sockets on the system";
case CO_ER_SYS_MEMLIM: return "Out of system buffers";
case CO_ER_NOPROTO: return "Protocol or address family not supported";
case CO_ER_SOCK_ERR: return "General socket error";
case CO_ER_PORT_RANGE: return "Source port range exhausted";
case CO_ER_CANT_BIND: return "Can't bind to source address";
case CO_ER_FREE_PORTS: return "Out of local source ports on the system";
case CO_ER_ADDR_INUSE: return "Local source address already in use";
case CO_ER_PRX_EMPTY: return "Connection closed while waiting for PROXY protocol header";
case CO_ER_PRX_ABORT: return "Connection error while waiting for PROXY protocol header";
case CO_ER_PRX_TIMEOUT: return "Timeout while waiting for PROXY protocol header";
case CO_ER_PRX_TRUNCATED: return "Truncated PROXY protocol header received";
case CO_ER_PRX_NOT_HDR: return "Received something which does not look like a PROXY protocol header";
case CO_ER_PRX_BAD_HDR: return "Received an invalid PROXY protocol header";
case CO_ER_PRX_BAD_PROTO: return "Received an unhandled protocol in the PROXY protocol header";
case CO_ER_CIP_EMPTY: return "Connection closed while waiting for NetScaler Client IP header";
case CO_ER_CIP_ABORT: return "Connection error while waiting for NetScaler Client IP header";
case CO_ER_CIP_TIMEOUT: return "Timeout while waiting for a NetScaler Client IP header";
case CO_ER_CIP_TRUNCATED: return "Truncated NetScaler Client IP header received";
case CO_ER_CIP_BAD_MAGIC: return "Received an invalid NetScaler Client IP magic number";
case CO_ER_CIP_BAD_PROTO: return "Received an unhandled protocol in the NetScaler Client IP header";
case CO_ER_SSL_EMPTY: return "Connection closed during SSL handshake";
case CO_ER_SSL_ABORT: return "Connection error during SSL handshake";
case CO_ER_SSL_TIMEOUT: return "Timeout during SSL handshake";
case CO_ER_SSL_TOO_MANY: return "Too many SSL connections";
case CO_ER_SSL_NO_MEM: return "Out of memory when initializing an SSL connection";
case CO_ER_SSL_RENEG: return "Rejected a client-initiated SSL renegotiation attempt";
case CO_ER_SSL_CA_FAIL: return "SSL client CA chain cannot be verified";
case CO_ER_SSL_CRT_FAIL: return "SSL client certificate not trusted";
case CO_ER_SSL_MISMATCH: return "Server presented an SSL certificate different from the configured one";
case CO_ER_SSL_MISMATCH_SNI: return "Server presented an SSL certificate different from the expected one";
case CO_ER_SSL_HANDSHAKE: return "SSL handshake failure";
case CO_ER_SSL_HANDSHAKE_HB: return "SSL handshake failure after heartbeat";
case CO_ER_SSL_KILLED_HB: return "Stopped a TLSv1 heartbeat attack (CVE-2014-0160)";
case CO_ER_SSL_NO_TARGET: return "Attempt to use SSL on an unknown target (internal error)";
case CO_ER_SSL_EARLY_FAILED: return "Server refused early data";
case CO_ER_SOCKS4_SEND: return "SOCKS4 Proxy write error during handshake";
case CO_ER_SOCKS4_RECV: return "SOCKS4 Proxy read error during handshake";
case CO_ER_SOCKS4_DENY: return "SOCKS4 Proxy deny the request";
case CO_ER_SOCKS4_ABORT: return "SOCKS4 Proxy handshake aborted by server";
case CO_ERR_SSL_FATAL: return "SSL fatal error";
case CO_ER_REVERSE: return "Reverse connect failure";
}
return NULL;
}
/* Send a message over an established connection. It makes use of send() and
* returns the same return code and errno. If the socket layer is not ready yet
* then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
* as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
* EMSGSIZE if called with a zero length message. The purpose is to simplify
* some rare attempts to directly write on the socket from above the connection
* (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
* It automatically retries on EINTR. Other errors cause the connection to be
* marked as in error state. It takes similar arguments as send() except the
* first one which is the connection instead of the file descriptor. <flags>
* only support CO_SFL_MSG_MORE.
*/
int conn_ctrl_send(struct connection *conn, const void *buf, int len, int flags)
{
const struct buffer buffer = b_make((char*)buf, len, 0, len);
const struct xprt_ops *xprt = xprt_get(XPRT_RAW);
int ret;
ret = -1;
errno = ENOTSOCK;
if (conn->flags & CO_FL_SOCK_WR_SH)
goto fail;
if (!conn_ctrl_ready(conn))
goto fail;
errno = EMSGSIZE;
if (!len)
goto fail;
/* snd_buf() already takes care of updating conn->flags and handling
* the FD polling status.
*/
ret = xprt->snd_buf(conn, NULL, &buffer, buffer.data, flags);
if (conn->flags & CO_FL_ERROR)
ret = -1;
return ret;
fail:
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
return ret;
}
/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
* The event subscriber <es> is not allowed to change from a previous call as
* long as at least one event is still subscribed. The <event_type> must only
* be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
*/
int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
{
BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
BUG_ON(conn->subs && conn->subs != es);
es->events &= ~event_type;
if (!es->events)
conn->subs = NULL;
if (conn_ctrl_ready(conn) && conn->ctrl->ignore_events)
conn->ctrl->ignore_events(conn, event_type);
return 0;
}
MEDIUM: connection: make the subscribe() call able to wakeup if ready There's currently an internal API limitation at the connection layer regarding conn_subscribe(). We must not subscribe if we haven't yet met EAGAIN or such a condition, so we sometimes force ourselves to read in order to meet this condition and being allowed to call subscribe. But reading cannot always be done (e.g. at the end of a loop where we cannot afford to retrieve new data and start again) so we instead perform a tasklet_wakeup() of the requester's io_cb. This is what is done in mux_h1 for example. The problem with this is that it forces a new receive when we're not necessarily certain we need one. And if the FD is not ready and was already being polled, it's a useless wakeup. The current patch improves the connection-level subscribe() so that it really manipulates the polling if the FD is marked not-ready, but instead schedules the argument tasklet for a wakeup if the FD is ready. This guarantees we'll wake this tasklet up in any case once the FD is ready, either immediately or after polling. By doing so, a test on pure close mode shows we cut in half the number of epoll_ctl() calls and almost eliminate failed recvfrom(): $ ./h1load -n 100000 -r 1 -t 4 -c 1000 -T 20 -F 127.0.0.1:8001/?s=1k/t=20 before: 399464 epoll_ctl 1 200007 recvfrom 1 200000 sendto 1 100000 recvfrom -1 7508 epoll_wait 1 after: 205739 epoll_ctl 1 200000 sendto 1 200000 recvfrom 1 6084 epoll_wait 1 2651 recvfrom -1 On keep-alive there is no change however.
2020-02-28 08:24:49 -05:00
/* Called from the upper layer, to subscribe <es> to events <event_type>.
* The <es> struct is not allowed to differ from the one passed during a
* previous call to subscribe(). If the connection's ctrl layer is ready,
* the wait_event is immediately woken up and the subscription is cancelled.
* It always returns zero.
*/
int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
{
int ret = 0;
BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
BUG_ON(conn->subs && conn->subs != es);
MEDIUM: connection: make the subscribe() call able to wakeup if ready There's currently an internal API limitation at the connection layer regarding conn_subscribe(). We must not subscribe if we haven't yet met EAGAIN or such a condition, so we sometimes force ourselves to read in order to meet this condition and being allowed to call subscribe. But reading cannot always be done (e.g. at the end of a loop where we cannot afford to retrieve new data and start again) so we instead perform a tasklet_wakeup() of the requester's io_cb. This is what is done in mux_h1 for example. The problem with this is that it forces a new receive when we're not necessarily certain we need one. And if the FD is not ready and was already being polled, it's a useless wakeup. The current patch improves the connection-level subscribe() so that it really manipulates the polling if the FD is marked not-ready, but instead schedules the argument tasklet for a wakeup if the FD is ready. This guarantees we'll wake this tasklet up in any case once the FD is ready, either immediately or after polling. By doing so, a test on pure close mode shows we cut in half the number of epoll_ctl() calls and almost eliminate failed recvfrom(): $ ./h1load -n 100000 -r 1 -t 4 -c 1000 -T 20 -F 127.0.0.1:8001/?s=1k/t=20 before: 399464 epoll_ctl 1 200007 recvfrom 1 200000 sendto 1 100000 recvfrom -1 7508 epoll_wait 1 after: 205739 epoll_ctl 1 200000 sendto 1 200000 recvfrom 1 6084 epoll_wait 1 2651 recvfrom -1 On keep-alive there is no change however.
2020-02-28 08:24:49 -05:00
if (conn->subs && (conn->subs->events & event_type) == event_type)
return 0;
if (conn_ctrl_ready(conn) && conn->ctrl->check_events) {
ret = conn->ctrl->check_events(conn, event_type);
if (ret)
tasklet_wakeup(es->tasklet);
}
es->events = (es->events | event_type) & ~ret;
conn->subs = es->events ? es : NULL;
return 0;
}
/* Drains possibly pending incoming data on the connection and update the flags
* accordingly. This is used to know whether we need to disable lingering on
* close. Returns non-zero if it is safe to close without disabling lingering,
* otherwise zero. The CO_FL_SOCK_RD_SH flag may also be updated if the incoming
* shutdown was reported by the ->drain() function.
*/
int conn_ctrl_drain(struct connection *conn)
{
int ret = 0;
if (!conn_ctrl_ready(conn) || conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
ret = 1;
else if (conn->ctrl->drain) {
ret = conn->ctrl->drain(conn);
if (ret)
conn->flags |= CO_FL_SOCK_RD_SH;
}
return ret;
}
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
/*
* Get data length from tlv
*/
static inline size_t get_tlv_length(const struct tlv *src)
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
{
return (src->length_hi << 8) | src->length_lo;
}
/* This handshake handler waits a PROXY protocol header at the beginning of the
* raw data stream. The header looks like this :
*
* "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
*
* There must be exactly one space between each field. Fields are :
* - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
* - SRC3 : layer 3 (eg: IP) source address in standard text form
* - DST3 : layer 3 (eg: IP) destination address in standard text form
* - SRC4 : layer 4 (eg: TCP port) source address in standard text form
* - DST4 : layer 4 (eg: TCP port) destination address in standard text form
*
* This line MUST be at the beginning of the buffer and MUST NOT wrap.
*
* The header line is small and in all cases smaller than the smallest normal
* TCP MSS. So it MUST always be delivered as one segment, which ensures we
* can safely use MSG_PEEK and avoid buffering.
*
* Once the data is fetched, the values are set in the connection's address
* fields, and data are removed from the socket's buffer. The function returns
* zero if it needs to wait for more data or if it fails, or 1 if it completed
* and removed itself.
*/
int conn_recv_proxy(struct connection *conn, int flag)
{
struct session *sess = conn->owner;
char *line, *end;
struct proxy_hdr_v2 *hdr_v2;
const char v2sig[] = PP2_SIGNATURE;
size_t total_v2_len;
size_t tlv_offset = 0;
int ret;
if (!conn_ctrl_ready(conn))
MAJOR: connection: add two new flags to indicate readiness of control/transport Currently the control and transport layers of a connection are supposed to be initialized when their respective pointers are not NULL. This will not work anymore when we plan to reuse connections, because there is an asymmetry between the accept() side and the connect() side : - on accept() side, the fd is set first, then the ctrl layer then the transport layer ; upon error, they must be undone in the reverse order, then the FD must be closed. The FD must not be deleted if the control layer was not yet initialized ; - on the connect() side, the fd is set last and there is no reliable way to know if it has been initialized or not. In practice it's initialized to -1 first but this is hackish and supposes that local FDs only will be used forever. Also, there are even less solutions for keeping trace of the transport layer's state. Also it is possible to support delayed close() when something (eg: logs) tracks some information requiring the transport and/or control layers, making it even more difficult to clean them. So the proposed solution is to add two flags to the connection : - CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert) and cleared after it's released (fd_delete). - CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init) and cleared after it's released (xprt->close). The functions have been adapted to rely on this and not on the pointers anymore. conn_xprt_close() was unused and dangerous : it did not close the control layer (eg: the socket itself) but still marks the transport layer as closed, preventing any future call to conn_full_close() from finishing the job. The problem comes from conn_full_close() in fact. It needs to close the xprt and ctrl layers independantly. After that we're still having an issue : we don't know based on ->ctrl alone whether the fd was registered or not. For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what remains to be done on the connection. In order not to miss some flag assignments, we introduce conn_ctrl_init() to initialize the control layer, register the fd using fd_insert() and set the flag, and conn_ctrl_close() which unregisters the fd and removes the flag, but only if the transport layer was closed. Similarly, at the transport layer, conn_xprt_init() calls ->init and sets the flag, while conn_xprt_close() checks the flag, calls ->close and clears the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init and the ->close functions are called only once each and in the correct order. Note that conn_xprt_close() does nothing if the transport layer is still tracked. conn_full_close() now simply calls conn_xprt_close() then conn_full_close() in turn, which do nothing if CO_FL_XPRT_TRACKED is set. In order to handle the error path, we also provide conn_force_close() which ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers in turns. All relevant instances of fd_delete() have been replaced with conn_force_close(). Now we always know what state the connection is in and we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
goto fail;
BUG_ON(conn->flags & CO_FL_FDLESS);
if (!fd_recv_ready(conn->handle.fd))
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
while (1) {
ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
if (ret < 0) {
if (errno == EINTR)
continue;
if (errno == EAGAIN || errno == EWOULDBLOCK) {
fd_cant_recv(conn->handle.fd);
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
}
goto recv_abort;
}
trash.data = ret;
break;
}
if (!trash.data) {
/* client shutdown */
conn->err_code = CO_ER_PRX_EMPTY;
goto fail;
}
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_* Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done callback.") broke the master CLI for a very obscure reason. It happens that short requests immediately terminated by a shutdown are properly received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not yet set on the connection since we've not passed again through conn_fd_handler() and it was not done in conn_complete_session(). While commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in conn_complete_session()") fixed the issue, such accident may happen again as the root cause is deeper and actually comes down to the fact that CO_FL_CONNECTED is lazily set at various check points in the code but not every time we drop one wait bit. It is not the first time we face this situation. Originally this flag was used to detect the transition between WAIT_* and CONNECTED in order to call ->wake() from the FD handler. But since at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is always synchronized against the two others before being checked. Moreover, with the I/Os moved to tasklets, the decision to call the ->wake() function is performed after the I/Os in si_cs_process() and equivalent, which don't care about this transition either. So in essence, checking for CO_FL_CONNECTED has become a lazy wait to check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always relies on someone else having synchronized it. This patch addresses it once for all by killing this flag and only checking the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This revealed a number of inconsistencies that were purposely not addressed here for the sake of bisectability: - while most places do check both L4+L6 and HANDSHAKE at the same time, some places like assign_server() or back_handle_st_con() and a few sample fetches looking for proxy protocol do check for L4+L6 but don't care about HANDSHAKE ; these ones will probably fail on TCP request session rules if the handshake is not complete. - some handshake handlers do validate that a connection is established at L4 but didn't clear CO_FL_WAIT_L4_CONN - the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6 before declaring the mux ready while the snd_buf function also checks for the handshake's completion. Likely the former should validate the handshake as well and we should get rid of these extra tests in snd_buf. - raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only later clear CO_FL_WAIT_L4_CONN. - xprt_handshake would set CO_FL_CONNECTED itself without actually clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if waiting for a pure Rx handshake. - most places in ssl_sock that were checking CO_FL_CONNECTED don't need to include the L4 check as an L6 check is enough to decide whether to wait for more info or not. It also becomes obvious when reading the test in si_cs_recv() that caused the failure mentioned above that once converted it doesn't make any sense anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete cannot happen since for CS_FL_EOS to be set, the other ones must have been validated. Some of these parts will still deserve further cleanup, and some of the observations above may induce some backports of potential bug fixes once totally analyzed in their context. The risk of breaking existing stuff is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
conn->flags &= ~CO_FL_WAIT_L4_CONN;
if (trash.data < 6)
goto missing;
line = trash.area;
end = trash.area + trash.data;
/* Decode a possible proxy request, fail early if it does not match */
if (strncmp(line, "PROXY ", 6) != 0)
goto not_v1;
line += 6;
if (trash.data < 9) /* shortest possible line */
goto missing;
if (memcmp(line, "TCP4 ", 5) == 0) {
u32 src3, dst3, sport, dport;
line += 5;
src3 = inetaddr_host_lim_ret(line, end, &line);
if (line == end)
goto missing;
if (*line++ != ' ')
goto bad_header;
dst3 = inetaddr_host_lim_ret(line, end, &line);
if (line == end)
goto missing;
if (*line++ != ' ')
goto bad_header;
sport = read_uint((const char **)&line, end);
if (line == end)
goto missing;
if (*line++ != ' ')
goto bad_header;
dport = read_uint((const char **)&line, end);
if (line > end - 2)
goto missing;
if (*line++ != '\r')
goto bad_header;
if (*line++ != '\n')
goto bad_header;
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
/* update the session's addresses and mark them set */
((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
((struct sockaddr_in *)sess->src)->sin_addr.s_addr = htonl(src3);
((struct sockaddr_in *)sess->src)->sin_port = htons(sport);
((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = htonl(dst3);
((struct sockaddr_in *)sess->dst)->sin_port = htons(dport);
}
else if (memcmp(line, "TCP6 ", 5) == 0) {
u32 sport, dport;
char *src_s;
char *dst_s, *sport_s, *dport_s;
struct in6_addr src3, dst3;
line += 5;
src_s = line;
dst_s = sport_s = dport_s = NULL;
while (1) {
if (line > end - 2) {
goto missing;
}
else if (*line == '\r') {
*line = 0;
line++;
if (*line++ != '\n')
goto bad_header;
break;
}
if (*line == ' ') {
*line = 0;
if (!dst_s)
dst_s = line + 1;
else if (!sport_s)
sport_s = line + 1;
else if (!dport_s)
dport_s = line + 1;
}
line++;
}
if (!dst_s || !sport_s || !dport_s)
goto bad_header;
sport = read_uint((const char **)&sport_s,dport_s - 1);
if (*sport_s != 0)
goto bad_header;
dport = read_uint((const char **)&dport_s,line - 2);
if (*dport_s != 0)
goto bad_header;
if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
goto bad_header;
if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
goto bad_header;
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
/* update the session's addresses and mark them set */
((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, &src3, sizeof(struct in6_addr));
((struct sockaddr_in6 *)sess->src)->sin6_port = htons(sport);
((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, &dst3, sizeof(struct in6_addr));
((struct sockaddr_in6 *)sess->dst)->sin6_port = htons(dport);
}
else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
/* This can be a UNIX socket forwarded by an haproxy upstream */
line += 9;
}
else {
/* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
conn->err_code = CO_ER_PRX_BAD_PROTO;
goto fail;
}
trash.data = line - trash.area;
goto eat_header;
not_v1:
/* try PPv2 */
if (trash.data < PP2_HEADER_LEN)
goto missing;
hdr_v2 = (struct proxy_hdr_v2 *) trash.area;
if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
(hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
conn->err_code = CO_ER_PRX_NOT_HDR;
goto fail;
}
total_v2_len = PP2_HEADER_LEN + ntohs(hdr_v2->len);
if (trash.data < total_v2_len)
goto missing;
switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
case 0x01: /* PROXY command */
switch (hdr_v2->fam) {
case 0x11: /* TCPv4 */
if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
goto bad_header;
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
((struct sockaddr_in *)sess->src)->sin_port = hdr_v2->addr.ip4.src_port;
((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
((struct sockaddr_in *)sess->dst)->sin_port = hdr_v2->addr.ip4.dst_port;
tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
break;
case 0x21: /* TCPv6 */
if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
goto bad_header;
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_v2->addr.ip6.src_port;
((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_v2->addr.ip6.dst_port;
tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
break;
}
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
/* TLV parsing */
while (tlv_offset < total_v2_len) {
struct ist tlv;
struct tlv *tlv_packet = NULL;
struct conn_tlv_list *new_tlv = NULL;
size_t data_len = 0;
/* Verify that we have at least TLV_HEADER_SIZE bytes left */
if (tlv_offset + TLV_HEADER_SIZE > total_v2_len)
goto bad_header;
tlv_packet = (struct tlv *) &trash.area[tlv_offset];
tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet));
tlv_offset += istlen(tlv) + TLV_HEADER_SIZE;
/* Verify that the TLV length does not exceed the total PROXYv2 length */
if (tlv_offset > total_v2_len)
goto bad_header;
/* Prepare known TLV types */
switch (tlv_packet->type) {
case PP2_TYPE_CRC32C: {
uint32_t n_crc32c;
/* Verify that this TLV is exactly 4 bytes long */
if (istlen(tlv) != PP2_CRC32C_LEN)
goto bad_header;
n_crc32c = read_n32(istptr(tlv));
write_n32(istptr(tlv), 0); // compute with CRC==0
if (hash_crc32c(trash.area, total_v2_len) != n_crc32c)
goto bad_header;
break;
}
#ifdef USE_NS
case PP2_TYPE_NETNS: {
const struct netns_entry *ns;
ns = netns_store_lookup(istptr(tlv), istlen(tlv));
if (ns)
conn->proxy_netns = ns;
break;
}
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
#endif
case PP2_TYPE_AUTHORITY: {
/* For now, keep the length restriction by HAProxy */
if (istlen(tlv) > HA_PP2_AUTHORITY_MAX)
goto bad_header;
break;
}
case PP2_TYPE_UNIQUE_ID: {
if (istlen(tlv) > UNIQUEID_LEN)
goto bad_header;
break;
}
default:
break;
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
}
/* If we did not find a known TLV type that we can optimize for, we generically allocate it */
data_len = get_tlv_length(tlv_packet);
/* Prevent attackers from allocating too much memory */
if (unlikely(data_len > HA_PP2_MAX_ALLOC))
goto fail;
/* Alloc memory based on data_len */
if (data_len > HA_PP2_TLV_VALUE_256)
new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list));
else if (data_len <= HA_PP2_TLV_VALUE_128)
new_tlv = pool_alloc(pool_head_pp_tlv_128);
else
new_tlv = pool_alloc(pool_head_pp_tlv_256);
if (unlikely(!new_tlv))
goto fail;
new_tlv->type = tlv_packet->type;
/* Save TLV to make it accessible via sample fetch */
memcpy(new_tlv->value, tlv.ptr, data_len);
new_tlv->len = data_len;
LIST_APPEND(&conn->tlv_list, &new_tlv->list);
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
}
/* Verify that the PROXYv2 header ends at a TLV boundary.
* This is can not be true, because the TLV parsing already
* verifies that a TLV does not exceed the total length and
* also that there is space for a TLV header.
*/
BUG_ON(tlv_offset != total_v2_len);
/* unsupported protocol, keep local connection address */
break;
case 0x00: /* LOCAL command */
/* keep local connection address for LOCAL */
break;
default:
goto bad_header; /* not a supported command */
}
trash.data = total_v2_len;
goto eat_header;
eat_header:
/* remove the PROXY line from the request. For this we re-read the
* exact line at once. If we don't get the exact same result, we
* fail.
*/
while (1) {
ssize_t len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
if (len2 < 0 && errno == EINTR)
continue;
if (len2 != trash.data)
goto recv_abort;
break;
}
conn->flags &= ~flag;
conn->flags |= CO_FL_RCVD_PROXY;
return 1;
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
not_ready:
return 0;
missing:
/* Missing data. Since we're using MSG_PEEK, we can only poll again if
* we have not read anything. Otherwise we need to fail because we won't
* be able to poll anymore.
*/
conn->err_code = CO_ER_PRX_TRUNCATED;
goto fail;
bad_header:
/* This is not a valid proxy protocol header */
conn->err_code = CO_ER_PRX_BAD_HDR;
goto fail;
recv_abort:
conn->err_code = CO_ER_PRX_ABORT;
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
goto fail;
fail:
conn->flags |= CO_FL_ERROR;
return 0;
}
/* This callback is used to send a valid PROXY protocol line to a socket being
* established. It returns 0 if it fails in a fatal way or needs to poll to go
* further, otherwise it returns non-zero and removes itself from the connection's
* flags (the bit is provided in <flag> by the caller). It is designed to be
* called by the connection handler and relies on it to commit polling changes.
* Note that it can emit a PROXY line by relying on the other end's address
* when the connection is attached to a stream connector, or by resolving the
* local address otherwise (also called a LOCAL line).
*/
int conn_send_proxy(struct connection *conn, unsigned int flag)
{
if (!conn_ctrl_ready(conn))
goto out_error;
/* If we have a PROXY line to send, we'll use this to validate the
* connection, in which case the connection is validated only once
* we've sent the whole proxy line. Otherwise we use connect().
*/
if (conn->send_proxy_ofs) {
struct stconn *sc;
int ret;
/* If there is no mux attached to the connection, it means the
* connection context is a stream connector.
*/
sc = conn->mux ? conn_get_first_sc(conn) : conn->ctx;
/* The target server expects a PROXY line to be sent first.
* If the send_proxy_ofs is negative, it corresponds to the
* offset to start sending from then end of the proxy string
* (which is recomputed every time since it's constant). If
* it is positive, it means we have to send from the start.
* We can only send a "normal" PROXY line when the connection
* is attached to a stream connector. Otherwise we can only
* send a LOCAL line (eg: for use with health checks).
*/
if (sc && sc_strm(sc)) {
ret = make_proxy_line(trash.area, trash.size,
objt_server(conn->target),
sc_conn(sc_opposite(sc)),
__sc_strm(sc));
}
else {
/* The target server expects a LOCAL line to be sent first. Retrieving
* local or remote addresses may fail until the connection is established.
*/
if (!conn_get_src(conn) || !conn_get_dst(conn))
goto out_wait;
ret = make_proxy_line(trash.area, trash.size,
objt_server(conn->target), conn,
NULL);
}
if (!ret)
goto out_error;
if (conn->send_proxy_ofs > 0)
conn->send_proxy_ofs = -ret; /* first call */
/* we have to send trash from (ret+sp for -sp bytes). If the
* data layer has a pending write, we'll also set MSG_MORE.
*/
ret = conn_ctrl_send(conn,
trash.area + ret + conn->send_proxy_ofs,
-conn->send_proxy_ofs,
(conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
if (ret < 0)
goto out_error;
conn->send_proxy_ofs += ret; /* becomes zero once complete */
if (conn->send_proxy_ofs != 0)
goto out_wait;
/* OK we've sent the whole line, we're connected */
}
/* The connection is ready now, simply return and let the connection
* handler notify upper layers if needed.
*/
conn->flags &= ~CO_FL_WAIT_L4_CONN;
conn->flags &= ~flag;
return 1;
out_error:
/* Write error on the file descriptor */
conn->flags |= CO_FL_ERROR;
return 0;
out_wait:
return 0;
}
/* This handshake handler waits a NetScaler Client IP insertion header
* at the beginning of the raw data stream. The header format is
* described in doc/netscaler-client-ip-insertion-protocol.txt
*
* This line MUST be at the beginning of the buffer and MUST NOT be
* fragmented.
*
* The header line is small and in all cases smaller than the smallest normal
* TCP MSS. So it MUST always be delivered as one segment, which ensures we
* can safely use MSG_PEEK and avoid buffering.
*
* Once the data is fetched, the values are set in the connection's address
* fields, and data are removed from the socket's buffer. The function returns
* zero if it needs to wait for more data or if it fails, or 1 if it completed
* and removed itself.
*/
int conn_recv_netscaler_cip(struct connection *conn, int flag)
{
struct session *sess = conn->owner;
char *line;
uint32_t hdr_len;
uint8_t ip_ver;
int ret;
if (!conn_ctrl_ready(conn))
goto fail;
BUG_ON(conn->flags & CO_FL_FDLESS);
if (!fd_recv_ready(conn->handle.fd))
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
while (1) {
ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
if (ret < 0) {
if (errno == EINTR)
continue;
if (errno == EAGAIN || errno == EWOULDBLOCK) {
fd_cant_recv(conn->handle.fd);
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
}
goto recv_abort;
}
trash.data = ret;
break;
}
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_* Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done callback.") broke the master CLI for a very obscure reason. It happens that short requests immediately terminated by a shutdown are properly received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not yet set on the connection since we've not passed again through conn_fd_handler() and it was not done in conn_complete_session(). While commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in conn_complete_session()") fixed the issue, such accident may happen again as the root cause is deeper and actually comes down to the fact that CO_FL_CONNECTED is lazily set at various check points in the code but not every time we drop one wait bit. It is not the first time we face this situation. Originally this flag was used to detect the transition between WAIT_* and CONNECTED in order to call ->wake() from the FD handler. But since at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is always synchronized against the two others before being checked. Moreover, with the I/Os moved to tasklets, the decision to call the ->wake() function is performed after the I/Os in si_cs_process() and equivalent, which don't care about this transition either. So in essence, checking for CO_FL_CONNECTED has become a lazy wait to check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always relies on someone else having synchronized it. This patch addresses it once for all by killing this flag and only checking the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This revealed a number of inconsistencies that were purposely not addressed here for the sake of bisectability: - while most places do check both L4+L6 and HANDSHAKE at the same time, some places like assign_server() or back_handle_st_con() and a few sample fetches looking for proxy protocol do check for L4+L6 but don't care about HANDSHAKE ; these ones will probably fail on TCP request session rules if the handshake is not complete. - some handshake handlers do validate that a connection is established at L4 but didn't clear CO_FL_WAIT_L4_CONN - the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6 before declaring the mux ready while the snd_buf function also checks for the handshake's completion. Likely the former should validate the handshake as well and we should get rid of these extra tests in snd_buf. - raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only later clear CO_FL_WAIT_L4_CONN. - xprt_handshake would set CO_FL_CONNECTED itself without actually clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if waiting for a pure Rx handshake. - most places in ssl_sock that were checking CO_FL_CONNECTED don't need to include the L4 check as an L6 check is enough to decide whether to wait for more info or not. It also becomes obvious when reading the test in si_cs_recv() that caused the failure mentioned above that once converted it doesn't make any sense anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete cannot happen since for CS_FL_EOS to be set, the other ones must have been validated. Some of these parts will still deserve further cleanup, and some of the observations above may induce some backports of potential bug fixes once totally analyzed in their context. The risk of breaking existing stuff is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
conn->flags &= ~CO_FL_WAIT_L4_CONN;
if (!trash.data) {
/* client shutdown */
conn->err_code = CO_ER_CIP_EMPTY;
goto fail;
}
/* Fail if buffer length is not large enough to contain
* CIP magic, header length or
* CIP magic, CIP length, CIP type, header length */
if (trash.data < 12)
goto missing;
line = trash.area;
/* Decode a possible NetScaler Client IP request, fail early if
* it does not match */
if (ntohl(read_u32(line)) != __objt_listener(conn->target)->bind_conf->ns_cip_magic)
goto bad_magic;
/* Legacy CIP protocol */
if ((trash.area[8] & 0xD0) == 0x40) {
hdr_len = ntohl(read_u32((line+4)));
line += 8;
}
/* Standard CIP protocol */
else if (trash.area[8] == 0x00) {
hdr_len = ntohs(read_u32((line+10)));
line += 12;
}
/* Unknown CIP protocol */
else {
conn->err_code = CO_ER_CIP_BAD_PROTO;
goto fail;
}
/* Fail if buffer length is not large enough to contain
* a minimal IP header */
if (trash.data < 20)
goto missing;
/* Get IP version from the first four bits */
ip_ver = (*line & 0xf0) >> 4;
if (ip_ver == 4) {
struct ip *hdr_ip4;
struct my_tcphdr *hdr_tcp;
hdr_ip4 = (struct ip *)line;
if (trash.data < 40 || trash.data < hdr_len) {
/* Fail if buffer length is not large enough to contain
* IPv4 header, TCP header */
goto missing;
}
else if (hdr_ip4->ip_p != IPPROTO_TCP) {
/* The protocol does not include a TCP header */
conn->err_code = CO_ER_CIP_BAD_PROTO;
goto fail;
}
hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
/* update the session's addresses and mark them set */
((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
((struct sockaddr_in *)sess->src)->sin_port = hdr_tcp->source;
((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
((struct sockaddr_in *)sess->dst)->sin_port = hdr_tcp->dest;
}
else if (ip_ver == 6) {
struct ip6_hdr *hdr_ip6;
struct my_tcphdr *hdr_tcp;
hdr_ip6 = (struct ip6_hdr *)line;
if (trash.data < 60 || trash.data < hdr_len) {
/* Fail if buffer length is not large enough to contain
* IPv6 header, TCP header */
goto missing;
}
else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
/* The protocol does not include a TCP header */
conn->err_code = CO_ER_CIP_BAD_PROTO;
goto fail;
}
hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));
if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
goto fail;
/* update the session's addresses and mark them set */
((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
((struct sockaddr_in6 *)sess->src)->sin6_addr = hdr_ip6->ip6_src;
((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_tcp->source;
((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
((struct sockaddr_in6 *)sess->dst)->sin6_addr = hdr_ip6->ip6_dst;
((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_tcp->dest;
}
else {
/* The protocol does not match something known (IPv4/IPv6) */
conn->err_code = CO_ER_CIP_BAD_PROTO;
goto fail;
}
line += hdr_len;
trash.data = line - trash.area;
/* remove the NetScaler Client IP header from the request. For this
* we re-read the exact line at once. If we don't get the exact same
* result, we fail.
*/
while (1) {
int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
if (len2 < 0 && errno == EINTR)
continue;
if (len2 != trash.data)
goto recv_abort;
break;
}
conn->flags &= ~flag;
return 1;
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
not_ready:
return 0;
missing:
/* Missing data. Since we're using MSG_PEEK, we can only poll again if
* we have not read anything. Otherwise we need to fail because we won't
* be able to poll anymore.
*/
conn->err_code = CO_ER_CIP_TRUNCATED;
goto fail;
bad_magic:
conn->err_code = CO_ER_CIP_BAD_MAGIC;
goto fail;
recv_abort:
conn->err_code = CO_ER_CIP_ABORT;
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
goto fail;
fail:
conn->flags |= CO_FL_ERROR;
return 0;
}
int conn_send_socks4_proxy_request(struct connection *conn)
{
struct socks4_request req_line;
if (!conn_ctrl_ready(conn))
goto out_error;
if (!conn_get_dst(conn))
goto out_error;
req_line.version = 0x04;
req_line.command = 0x01;
req_line.port = get_net_port(conn->dst);
req_line.ip = is_inet_addr(conn->dst);
memcpy(req_line.user_id, "HAProxy\0", 8);
if (conn->send_proxy_ofs > 0) {
/*
* This is the first call to send the request
*/
conn->send_proxy_ofs = -(int)sizeof(req_line);
}
if (conn->send_proxy_ofs < 0) {
int ret = 0;
/* we are sending the socks4_req_line here. If the data layer
* has a pending write, we'll also set MSG_MORE.
*/
ret = conn_ctrl_send(
conn,
((char *)(&req_line)) + (sizeof(req_line)+conn->send_proxy_ofs),
-conn->send_proxy_ofs,
(conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Before send remain is [%d], sent [%d]\n",
conn_fd(conn), -conn->send_proxy_ofs, ret);
if (ret < 0) {
goto out_error;
}
conn->send_proxy_ofs += ret; /* becomes zero once complete */
if (conn->send_proxy_ofs != 0) {
goto out_wait;
}
}
/* OK we've the whole request sent */
conn->flags &= ~CO_FL_SOCKS4_SEND;
/* The connection is ready now, simply return and let the connection
* handler notify upper layers if needed.
*/
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_* Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done callback.") broke the master CLI for a very obscure reason. It happens that short requests immediately terminated by a shutdown are properly received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not yet set on the connection since we've not passed again through conn_fd_handler() and it was not done in conn_complete_session(). While commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in conn_complete_session()") fixed the issue, such accident may happen again as the root cause is deeper and actually comes down to the fact that CO_FL_CONNECTED is lazily set at various check points in the code but not every time we drop one wait bit. It is not the first time we face this situation. Originally this flag was used to detect the transition between WAIT_* and CONNECTED in order to call ->wake() from the FD handler. But since at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is always synchronized against the two others before being checked. Moreover, with the I/Os moved to tasklets, the decision to call the ->wake() function is performed after the I/Os in si_cs_process() and equivalent, which don't care about this transition either. So in essence, checking for CO_FL_CONNECTED has become a lazy wait to check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always relies on someone else having synchronized it. This patch addresses it once for all by killing this flag and only checking the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This revealed a number of inconsistencies that were purposely not addressed here for the sake of bisectability: - while most places do check both L4+L6 and HANDSHAKE at the same time, some places like assign_server() or back_handle_st_con() and a few sample fetches looking for proxy protocol do check for L4+L6 but don't care about HANDSHAKE ; these ones will probably fail on TCP request session rules if the handshake is not complete. - some handshake handlers do validate that a connection is established at L4 but didn't clear CO_FL_WAIT_L4_CONN - the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6 before declaring the mux ready while the snd_buf function also checks for the handshake's completion. Likely the former should validate the handshake as well and we should get rid of these extra tests in snd_buf. - raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only later clear CO_FL_WAIT_L4_CONN. - xprt_handshake would set CO_FL_CONNECTED itself without actually clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if waiting for a pure Rx handshake. - most places in ssl_sock that were checking CO_FL_CONNECTED don't need to include the L4 check as an L6 check is enough to decide whether to wait for more info or not. It also becomes obvious when reading the test in si_cs_recv() that caused the failure mentioned above that once converted it doesn't make any sense anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete cannot happen since for CS_FL_EOS to be set, the other ones must have been validated. Some of these parts will still deserve further cleanup, and some of the observations above may induce some backports of potential bug fixes once totally analyzed in their context. The risk of breaking existing stuff is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
conn->flags &= ~CO_FL_WAIT_L4_CONN;
if (conn->flags & CO_FL_SEND_PROXY) {
/*
* Get the send_proxy_ofs ready for the send_proxy due to we are
* reusing the "send_proxy_ofs", and SOCKS4 handshake should be done
* before sending PROXY Protocol.
*/
conn->send_proxy_ofs = 1;
}
return 1;
out_error:
/* Write error on the file descriptor */
conn->flags |= CO_FL_ERROR;
if (conn->err_code == CO_ER_NONE) {
conn->err_code = CO_ER_SOCKS4_SEND;
}
return 0;
out_wait:
return 0;
}
int conn_recv_socks4_proxy_response(struct connection *conn)
{
char line[SOCKS4_HS_RSP_LEN];
int ret;
if (!conn_ctrl_ready(conn))
goto fail;
BUG_ON(conn->flags & CO_FL_FDLESS);
if (!fd_recv_ready(conn->handle.fd))
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
while (1) {
/* SOCKS4 Proxy will response with 8 bytes, 0x00 | 0x5A | 0x00 0x00 | 0x00 0x00 0x00 0x00
* Try to peek into it, before all 8 bytes ready.
*/
ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, MSG_PEEK);
if (ret == 0) {
/* the socket has been closed or shutdown for send */
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d], looks like the socket has been closed or shutdown for send\n",
conn->handle.fd, ret, errno);
if (conn->err_code == CO_ER_NONE) {
conn->err_code = CO_ER_SOCKS4_RECV;
}
goto fail;
}
if (ret > 0) {
if (ret == SOCKS4_HS_RSP_LEN) {
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received 8 bytes, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
}else{
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], first byte is [%02X], last bye is [%02X]\n", conn->handle.fd, ret, line[0], line[ret-1]);
}
} else {
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d]\n", conn->handle.fd, ret, errno);
}
if (ret < 0) {
if (errno == EINTR) {
continue;
}
if (errno == EAGAIN || errno == EWOULDBLOCK) {
fd_cant_recv(conn->handle.fd);
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
}
goto recv_abort;
}
break;
}
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_* Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done callback.") broke the master CLI for a very obscure reason. It happens that short requests immediately terminated by a shutdown are properly received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not yet set on the connection since we've not passed again through conn_fd_handler() and it was not done in conn_complete_session(). While commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in conn_complete_session()") fixed the issue, such accident may happen again as the root cause is deeper and actually comes down to the fact that CO_FL_CONNECTED is lazily set at various check points in the code but not every time we drop one wait bit. It is not the first time we face this situation. Originally this flag was used to detect the transition between WAIT_* and CONNECTED in order to call ->wake() from the FD handler. But since at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is always synchronized against the two others before being checked. Moreover, with the I/Os moved to tasklets, the decision to call the ->wake() function is performed after the I/Os in si_cs_process() and equivalent, which don't care about this transition either. So in essence, checking for CO_FL_CONNECTED has become a lazy wait to check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always relies on someone else having synchronized it. This patch addresses it once for all by killing this flag and only checking the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This revealed a number of inconsistencies that were purposely not addressed here for the sake of bisectability: - while most places do check both L4+L6 and HANDSHAKE at the same time, some places like assign_server() or back_handle_st_con() and a few sample fetches looking for proxy protocol do check for L4+L6 but don't care about HANDSHAKE ; these ones will probably fail on TCP request session rules if the handshake is not complete. - some handshake handlers do validate that a connection is established at L4 but didn't clear CO_FL_WAIT_L4_CONN - the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6 before declaring the mux ready while the snd_buf function also checks for the handshake's completion. Likely the former should validate the handshake as well and we should get rid of these extra tests in snd_buf. - raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only later clear CO_FL_WAIT_L4_CONN. - xprt_handshake would set CO_FL_CONNECTED itself without actually clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if waiting for a pure Rx handshake. - most places in ssl_sock that were checking CO_FL_CONNECTED don't need to include the L4 check as an L6 check is enough to decide whether to wait for more info or not. It also becomes obvious when reading the test in si_cs_recv() that caused the failure mentioned above that once converted it doesn't make any sense anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete cannot happen since for CS_FL_EOS to be set, the other ones must have been validated. Some of these parts will still deserve further cleanup, and some of the observations above may induce some backports of potential bug fixes once totally analyzed in their context. The risk of breaking existing stuff is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
conn->flags &= ~CO_FL_WAIT_L4_CONN;
if (ret < SOCKS4_HS_RSP_LEN) {
/* Missing data. Since we're using MSG_PEEK, we can only poll again if
* we are not able to read enough data.
*/
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
goto not_ready;
}
/*
* Base on the SOCSK4 protocol:
*
* +----+----+----+----+----+----+----+----+
* | VN | CD | DSTPORT | DSTIP |
* +----+----+----+----+----+----+----+----+
* # of bytes: 1 1 2 4
* VN is the version of the reply code and should be 0. CD is the result
* code with one of the following values:
* 90: request granted
* 91: request rejected or failed
* 92: request rejected because SOCKS server cannot connect to identd on the client
* 93: request rejected because the client program and identd report different user-ids
* The remaining fields are ignored.
*/
if (line[1] != 90) {
conn->flags &= ~CO_FL_SOCKS4_RECV;
DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: FAIL, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
if (conn->err_code == CO_ER_NONE) {
conn->err_code = CO_ER_SOCKS4_DENY;
}
goto fail;
}
/* remove the 8 bytes response from the stream */
while (1) {
ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, 0);
if (ret < 0 && errno == EINTR) {
continue;
}
if (ret != SOCKS4_HS_RSP_LEN) {
if (conn->err_code == CO_ER_NONE) {
conn->err_code = CO_ER_SOCKS4_RECV;
}
goto fail;
}
break;
}
conn->flags &= ~CO_FL_SOCKS4_RECV;
return 1;
BUG/MEDIUM: connection: fix multiple handshake polling issues Connection handshakes were rarely stacked on top of each other, but the recent experiments consisting in sending PROXY over SOCKS4 revealed a number of issues in these lower layers. First, each handler waiting for data MUST subscribe to recv events with __conn_sock_want_recv() and MUST unsubscribe from send events using __conn_sock_stop_send() to avoid any wake-up loop in case a previous sender has set this. Second, each handler waiting for sending MUST subscribe to send events with __conn_sock_want_send() and MUST unsubscribe from recv events using __conn_sock_stop_recv() to avoid any wake-up loop in case some data are available on the connection. Till now this was done at various random places, and in particular the cases where the FD was not ready for recv forgot to re-enable reading. Second, while senders can happily use conn_sock_send() which automatically handles EINTR, loops, and marks the FD as not ready with fd_cant_send(), there is no equivalent for recv so receivers facing EAGAIN MUST call fd_cant_send() to enable polling. It could be argued that implementing an equivalent conn_sock_recv() function could be useful and more long-term proof than the current situation. Third, both types of handlers MUST unsubscribe from their respective events once they managed to do their job, and none may even play with __conn_xprt_*(). Here again this was lacking, and one surprizing call to __conn_xprt_stop_recv() was present in the proxy protocol parser for TCP6 messages! Thanks to Alexander Liu for his help on this issue. This patch must be backported to 1.9 and possibly some older versions, though the SOCKS parts should be dropped.
2019-06-03 02:17:30 -04:00
not_ready:
return 0;
recv_abort:
if (conn->err_code == CO_ER_NONE) {
conn->err_code = CO_ER_SOCKS4_ABORT;
}
conn->flags |= (CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH);
goto fail;
fail:
conn->flags |= CO_FL_ERROR;
return 0;
}
/* registers proto mux list <list>. Modifies the list element! */
void register_mux_proto(struct mux_proto_list *list)
{
LIST_APPEND(&mux_proto_list.list, &list->list);
}
/* Lists the known proto mux on <out>. This function is used by "haproxy -vv"
* and is suitable for early boot just after the "REGISTER" stage because it
* doesn't depend on anything to be already allocated.
*/
void list_mux_proto(FILE *out)
{
struct mux_proto_list *item;
struct ist proto;
char *mode, *side;
int done;
fprintf(out, "Available multiplexer protocols :\n"
"(protocols marked as <default> cannot be specified using 'proto' keyword)\n");
list_for_each_entry(item, &mux_proto_list.list, list) {
proto = item->token;
if (item->mode == PROTO_MODE_ANY)
mode = "TCP|HTTP";
else if (item->mode == PROTO_MODE_TCP)
mode = "TCP";
else if (item->mode == PROTO_MODE_HTTP)
mode = "HTTP";
else
mode = "NONE";
if (item->side == PROTO_SIDE_BOTH)
side = "FE|BE";
else if (item->side == PROTO_SIDE_FE)
side = "FE";
else if (item->side == PROTO_SIDE_BE)
side = "BE";
else
side = "NONE";
fprintf(out, " %10s : mode=%-5s side=%-6s mux=%-5s flags=",
(proto.len ? proto.ptr : "<default>"), mode, side, item->mux->name);
done = 0;
/* note: the block below could be simplified using macros but for only
* 4 flags it's not worth it.
*/
if (item->mux->flags & MX_FL_HTX)
done |= fprintf(out, "%sHTX", done ? "|" : "");
if (item->mux->flags & MX_FL_HOL_RISK)
done |= fprintf(out, "%sHOL_RISK", done ? "|" : "");
if (item->mux->flags & MX_FL_NO_UPG)
done |= fprintf(out, "%sNO_UPG", done ? "|" : "");
if (item->mux->flags & MX_FL_FRAMED)
done |= fprintf(out, "%sFRAMED", done ? "|" : "");
fprintf(out, "\n");
}
}
/* Makes a PROXY protocol line from the two addresses. The output is sent to
* buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
* It returns the number of bytes composing this line (including the trailing
* LF), or zero in case of failure (eg: not enough space). It supports TCP4,
* TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
* emitted as well.
*/
static int make_proxy_line_v1(char *buf, int buf_len, const struct sockaddr_storage *src, const struct sockaddr_storage *dst)
{
int ret = 0;
char * protocol;
char src_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
char dst_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
in_port_t src_port;
in_port_t dst_port;
if ( !src
|| !dst
|| (src->ss_family != AF_INET && src->ss_family != AF_INET6)
|| (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
/* unknown family combination */
ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
if (ret >= buf_len)
return 0;
return ret;
}
/* IPv4 for both src and dst */
if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
protocol = "TCP4";
if (!inet_ntop(AF_INET, &((struct sockaddr_in *)src)->sin_addr, src_str, sizeof(src_str)))
return 0;
src_port = ((struct sockaddr_in *)src)->sin_port;
if (!inet_ntop(AF_INET, &((struct sockaddr_in *)dst)->sin_addr, dst_str, sizeof(dst_str)))
return 0;
dst_port = ((struct sockaddr_in *)dst)->sin_port;
}
/* IPv6 for at least one of src and dst */
else {
struct in6_addr tmp;
protocol = "TCP6";
if (src->ss_family == AF_INET) {
/* Convert src to IPv6 */
v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
src_port = ((struct sockaddr_in *)src)->sin_port;
}
else {
tmp = ((struct sockaddr_in6 *)src)->sin6_addr;
src_port = ((struct sockaddr_in6 *)src)->sin6_port;
}
if (!inet_ntop(AF_INET6, &tmp, src_str, sizeof(src_str)))
return 0;
if (dst->ss_family == AF_INET) {
/* Convert dst to IPv6 */
v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
dst_port = ((struct sockaddr_in *)dst)->sin_port;
}
else {
tmp = ((struct sockaddr_in6 *)dst)->sin6_addr;
dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
}
if (!inet_ntop(AF_INET6, &tmp, dst_str, sizeof(dst_str)))
return 0;
}
ret = snprintf(buf, buf_len, "PROXY %s %s %s %u %u\r\n", protocol, src_str, dst_str, ntohs(src_port), ntohs(dst_port));
if (ret >= buf_len)
return 0;
return ret;
}
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
{
struct tlv *tlv;
if (!dest || (length + sizeof(*tlv) > dest_len))
return 0;
tlv = (struct tlv *)dest;
tlv->type = type;
tlv->length_hi = length >> 8;
tlv->length_lo = length & 0x00ff;
memcpy(tlv->value, value, length);
return length + sizeof(*tlv);
}
/* Note: <remote> is explicitly allowed to be NULL */
static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
{
const char pp2_signature[] = PP2_SIGNATURE;
void *tlv_crc32c_p = NULL;
int ret = 0;
struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
struct sockaddr_storage null_addr = { .ss_family = 0 };
struct srv_pp_tlv_list *srv_tlv = NULL;
const struct sockaddr_storage *src = &null_addr;
const struct sockaddr_storage *dst = &null_addr;
const char *value = "";
int value_len = 0;
if (buf_len < PP2_HEADER_LEN)
return 0;
memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);
if (strm) {
src = sc_src(strm->scf);
dst = sc_dst(strm->scf);
}
else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
src = conn_src(remote);
dst = conn_dst(remote);
}
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
/* At least one of src or dst is not of AF_INET or AF_INET6 */
if ( !src
|| !dst
|| (!pp2_never_send_local && conn_is_back(remote)) // locally initiated connection
|| (src->ss_family != AF_INET && src->ss_family != AF_INET6)
|| (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
if (buf_len < PP2_HDR_LEN_UNSPEC)
return 0;
hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
ret = PP2_HDR_LEN_UNSPEC;
}
else {
hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
/* IPv4 for both src and dst */
if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
if (buf_len < PP2_HDR_LEN_INET)
return 0;
hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
ret = PP2_HDR_LEN_INET;
}
/* IPv6 for at least one of src and dst */
else {
struct in6_addr tmp;
if (buf_len < PP2_HDR_LEN_INET6)
return 0;
hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
if (src->ss_family == AF_INET) {
v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
memcpy(hdr->addr.ip6.src_addr, &tmp, 16);
hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
}
else {
memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
}
if (dst->ss_family == AF_INET) {
v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
memcpy(hdr->addr.ip6.dst_addr, &tmp, 16);
hdr->addr.ip6.dst_port = ((struct sockaddr_in *)dst)->sin_port;
}
else {
memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
}
ret = PP2_HDR_LEN_INET6;
}
}
if (strm) {
struct buffer *replace = NULL;
list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) {
replace = NULL;
/* Users will always need to provide a value, in case of forwarding, they should use fc_pp_tlv.
* for generic types. Otherwise, we will send an empty TLV.
*/
if (!LIST_ISEMPTY(&srv_tlv->fmt)) {
replace = alloc_trash_chunk();
if (unlikely(!replace))
return 0;
replace->data = build_logline(strm, replace->area, replace->size, &srv_tlv->fmt);
if (unlikely((buf_len - ret) < sizeof(struct tlv))) {
free_trash_chunk(replace);
return 0;
}
ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, replace->data, replace->area);
free_trash_chunk(replace);
}
else {
/* Create empty TLV as no value was specified */
ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, 0, NULL);
}
}
}
/* Handle predefined TLVs as usual */
if (srv->pp_opts & SRV_PP_V2_CRC32C) {
uint32_t zero_crc32c = 0;
if ((buf_len - ret) < sizeof(struct tlv))
return 0;
tlv_crc32c_p = (void *)((struct tlv *)&buf[ret])->value;
ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_CRC32C, sizeof(zero_crc32c), (const char *)&zero_crc32c);
}
if (remote && conn_get_alpn(remote, &value, &value_len)) {
if ((buf_len - ret) < sizeof(struct tlv))
return 0;
ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_ALPN, value_len, value);
}
if (srv->pp_opts & SRV_PP_V2_AUTHORITY) {
struct conn_tlv_list *tlv = conn_get_tlv(remote, PP2_TYPE_AUTHORITY);
value = NULL;
if (tlv) {
value_len = tlv->len;
value = tlv->value;
}
#ifdef USE_OPENSSL
else {
if ((value = ssl_sock_get_sni(remote)))
value_len = strlen(value);
}
#endif
if (value) {
if ((buf_len - ret) < sizeof(struct tlv))
return 0;
ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_AUTHORITY, value_len, value);
}
}
if (strm && (srv->pp_opts & SRV_PP_V2_UNIQUE_ID)) {
struct session* sess = strm_sess(strm);
struct ist unique_id = stream_generate_unique_id(strm, &sess->fe->format_unique_id);
value = unique_id.ptr;
value_len = unique_id.len;
if (value_len >= 0) {
if ((buf_len - ret) < sizeof(struct tlv))
return 0;
ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_UNIQUE_ID, value_len, value);
}
}
#ifdef USE_OPENSSL
if (srv->pp_opts & SRV_PP_V2_SSL) {
struct tlv_ssl *tlv;
int ssl_tlv_len = 0;
if ((buf_len - ret) < sizeof(struct tlv_ssl))
return 0;
tlv = (struct tlv_ssl *)&buf[ret];
memset(tlv, 0, sizeof(struct tlv_ssl));
ssl_tlv_len += sizeof(struct tlv_ssl);
tlv->tlv.type = PP2_TYPE_SSL;
if (conn_is_ssl(remote)) {
tlv->client |= PP2_CLIENT_SSL;
value = ssl_sock_get_proto_version(remote);
if (value) {
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
}
if (ssl_sock_get_cert_used_sess(remote)) {
tlv->client |= PP2_CLIENT_CERT_SESS;
tlv->verify = htonl(ssl_sock_get_verify_result(remote));
if (ssl_sock_get_cert_used_conn(remote))
tlv->client |= PP2_CLIENT_CERT_CONN;
}
if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
struct buffer *cn_trash = get_trash_chunk();
if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN,
cn_trash->data,
cn_trash->area);
}
}
if (srv->pp_opts & SRV_PP_V2_SSL_KEY_ALG) {
struct buffer *pkey_trash = get_trash_chunk();
if (ssl_sock_get_pkey_algo(remote, pkey_trash) > 0) {
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_KEY_ALG,
pkey_trash->data,
pkey_trash->area);
}
}
if (srv->pp_opts & SRV_PP_V2_SSL_SIG_ALG) {
value = ssl_sock_get_cert_sig(remote);
if (value) {
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_SIG_ALG, strlen(value), value);
}
}
if (srv->pp_opts & SRV_PP_V2_SSL_CIPHER) {
value = ssl_sock_get_cipher_name(remote);
if (value) {
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CIPHER, strlen(value), value);
}
}
}
tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
ret += ssl_tlv_len;
}
#endif
#ifdef USE_NS
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
if (remote && (remote->proxy_netns)) {
if ((buf_len - ret) < sizeof(struct tlv))
return 0;
ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
}
#endif
hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
if (tlv_crc32c_p) {
write_u32(tlv_crc32c_p, htonl(hash_crc32c(buf, ret)));
}
return ret;
}
/* Note: <remote> is explicitly allowed to be NULL */
int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
{
int ret = 0;
if (srv && (srv->pp_opts & SRV_PP_V2)) {
ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm);
}
else {
const struct sockaddr_storage *src = NULL;
const struct sockaddr_storage *dst = NULL;
if (strm) {
src = sc_src(strm->scf);
dst = sc_dst(strm->scf);
}
else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
src = conn_src(remote);
dst = conn_dst(remote);
}
if (src && dst)
ret = make_proxy_line_v1(buf, buf_len, src, dst);
else
ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
}
return ret;
}
/* returns 0 on success */
static int cfg_parse_pp2_never_send_local(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
char **err)
{
if (too_many_args(0, args, err, NULL))
return -1;
pp2_never_send_local = 1;
return 0;
}
/* extracts some info from the connection and appends them to buffer <buf>. The
* connection's pointer, its direction, target (fe/be/srv), xprt/ctrl, source
* when set, destination when set, are printed in a compact human-readable format
* fitting on a single line. This is handy to complete traces or debug output.
* It is permitted to pass a NULL conn pointer. The number of characters emitted
* is returned. A prefix <pfx> might be prepended before the first field if not
* NULL.
*/
int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx)
{
const struct listener *li;
const struct server *sv;
const struct proxy *px;
char addr[40];
int old_len = buf->data;
if (!conn)
return 0;
chunk_appendf(buf, "%sconn=%p(%s)", pfx ? pfx : "", conn, conn_is_back(conn) ? "OUT" : "IN");
if ((li = objt_listener(conn->target)))
chunk_appendf(buf, " fe=%s", li->bind_conf->frontend->id);
else if ((sv = objt_server(conn->target)))
chunk_appendf(buf, " sv=%s/%s", sv->proxy->id, sv->id);
else if ((px = objt_proxy(conn->target)))
chunk_appendf(buf, " be=%s", px->id);
chunk_appendf(buf, " %s/%s", conn_get_xprt_name(conn), conn_get_ctrl_name(conn));
if (conn->src && addr_to_str(conn->src, addr, sizeof(addr)))
chunk_appendf(buf, " src=%s:%d", addr, get_host_port(conn->src));
if (conn->dst && addr_to_str(conn->dst, addr, sizeof(addr)))
chunk_appendf(buf, " dst=%s:%d", addr, get_host_port(conn->dst));
return buf->data - old_len;
}
/* return the number of glitches experienced on the mux connection. */
static int
smp_fetch_fc_glitches(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn = NULL;
int ret;
if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
else
conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
smp->strm ? sc_conn(smp->strm->scb) : NULL;
/* No connection or a connection with an unsupported mux */
if (!conn || (conn->mux && !conn->mux->ctl))
return 0;
/* Mux not installed yet, this may change */
if (!conn->mux) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
ret = conn->mux->ctl(conn, MUX_CTL_GET_GLITCHES, NULL);
if (ret < 0) {
/* not supported by the mux */
return 0;
}
smp->data.type = SMP_T_SINT;
smp->data.u.sint = ret;
return 1;
}
/* return the major HTTP version as 1 or 2 depending on how the request arrived
* before being processed.
*
* WARNING: Should be updated if a new major HTTP version is added.
*/
static int
smp_fetch_fc_http_major(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn = NULL;
const char *mux_name = NULL;
if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
else
conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
smp->strm ? sc_conn(smp->strm->scb) : NULL;
/* No connection or a connection with a RAW muxx */
if (!conn || (conn->mux && !(conn->mux->flags & MX_FL_HTX)))
return 0;
/* No mux install, this may change */
if (!conn->mux) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
mux_name = conn_get_mux_name(conn);
smp->data.type = SMP_T_SINT;
if (strcmp(mux_name, "QUIC") == 0)
smp->data.u.sint = 3;
else if (strcmp(mux_name, "H2") == 0)
smp->data.u.sint = 2;
else
smp->data.u.sint = 1;
return 1;
}
/* fetch if the received connection used a PROXY protocol header */
int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn;
conn = objt_conn(smp->sess->origin);
if (!conn)
return 0;
if (conn->flags & CO_FL_WAIT_XPRT) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
smp->flags = 0;
smp->data.type = SMP_T_BOOL;
smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;
return 1;
}
/*
* This function checks the TLV type converter configuration.
* It expects the corresponding TLV type as a string representing the number
* or a constant. args[0] will be turned into the numerical value of the
* TLV type string.
*/
static int smp_check_tlv_type(struct arg *args, char **err)
{
int type;
char *endp;
struct ist input = ist2(args[0].data.str.area, args[0].data.str.data);
if (isteqi(input, ist("ALPN")) != 0)
type = PP2_TYPE_ALPN;
else if (isteqi(input, ist("AUTHORITY")) != 0)
type = PP2_TYPE_AUTHORITY;
else if (isteqi(input, ist("CRC32C")) != 0)
type = PP2_TYPE_CRC32C;
else if (isteqi(input, ist("NOOP")) != 0)
type = PP2_TYPE_NOOP;
else if (isteqi(input, ist("UNIQUE_ID")) != 0)
type = PP2_TYPE_UNIQUE_ID;
else if (isteqi(input, ist("SSL")) != 0)
type = PP2_TYPE_SSL;
else if (isteqi(input, ist("SSL_VERSION")) != 0)
type = PP2_SUBTYPE_SSL_VERSION;
else if (isteqi(input, ist("SSL_CN")) != 0)
type = PP2_SUBTYPE_SSL_CN;
else if (isteqi(input, ist("SSL_CIPHER")) != 0)
type = PP2_SUBTYPE_SSL_CIPHER;
else if (isteqi(input, ist("SSL_SIG_ALG")) != 0)
type = PP2_SUBTYPE_SSL_SIG_ALG;
else if (isteqi(input, ist("SSL_KEY_ALG")) != 0)
type = PP2_SUBTYPE_SSL_KEY_ALG;
else if (isteqi(input, ist("NETNS")) != 0)
type = PP2_TYPE_NETNS;
else {
type = strtoul(input.ptr, &endp, 0);
if (endp && *endp != '\0') {
memprintf(err, "Could not convert type '%s'", input.ptr);
return 0;
}
}
if (type < 0 || type > 255) {
memprintf(err, "Invalid TLV Type '%s'", input.ptr);
return 0;
}
chunk_destroy(&args[0].data.str);
args[0].type = ARGT_SINT;
args[0].data.sint = type;
return 1;
}
/* fetch an arbitrary TLV from a PROXY protocol v2 header */
int smp_fetch_fc_pp_tlv(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
int idx;
struct connection *conn = NULL;
struct conn_tlv_list *conn_tlv = NULL;
conn = objt_conn(smp->sess->origin);
if (!conn)
return 0;
if (conn->flags & CO_FL_WAIT_XPRT) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
if (args[0].type != ARGT_SINT)
return 0;
idx = args[0].data.sint;
conn_tlv = smp->ctx.p ? smp->ctx.p : LIST_ELEM(conn->tlv_list.n, struct conn_tlv_list *, list);
list_for_each_entry_from(conn_tlv, &conn->tlv_list, list) {
if (conn_tlv->type == idx) {
smp->flags |= SMP_F_NOT_LAST;
smp->data.type = SMP_T_STR;
smp->data.u.str.area = conn_tlv->value;
smp->data.u.str.data = conn_tlv->len;
smp->ctx.p = conn_tlv;
return 1;
}
}
smp->flags &= ~SMP_F_NOT_LAST;
return 0;
}
/* fetch the authority TLV from a PROXY protocol header */
int smp_fetch_fc_pp_authority(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct arg tlv_arg;
int ret;
set_tlv_arg(PP2_TYPE_AUTHORITY, &tlv_arg);
ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
smp->flags &= ~SMP_F_NOT_LAST; // return only the first authority
return ret;
}
/* fetch the unique ID TLV from a PROXY protocol header */
int smp_fetch_fc_pp_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct arg tlv_arg;
int ret;
set_tlv_arg(PP2_TYPE_UNIQUE_ID, &tlv_arg);
ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
smp->flags &= ~SMP_F_NOT_LAST; // return only the first unique ID
return ret;
}
/* fetch the error code of a connection */
int smp_fetch_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn;
if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
else
conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
smp->strm ? sc_conn(smp->strm->scb) : NULL;
if (!conn)
return 0;
if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
smp->flags = 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = (unsigned long long int)conn->err_code;
return 1;
}
/* fetch a string representation of the error code of a connection */
int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn;
const char *err_code_str;
if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
else
conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
smp->strm ? sc_conn(smp->strm->scb) : NULL;
if (!conn)
return 0;
if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
err_code_str = conn_err_code_str(conn);
if (!err_code_str)
return 0;
smp->flags = 0;
smp->data.type = SMP_T_STR;
smp->data.u.str.area = (char*)err_code_str;
smp->data.u.str.data = strlen(err_code_str);
return 1;
}
/* Note: must not be declared <const> as its list will be overwritten.
MEDIUM: tree-wide: fetches that may return IPV4+IPV6 now return ADDR Historically, the ADDR pseudo-type did not exist. So when IPV6 support was added to existing IPV4 sample fetches (e.g.: src,dst,hdr_ip...) the expected out_type in related sample definitions was left on IPV4 because it was required to declare the out_type as the lowest common denominator (the type that can be casted into all other ones) to make compatibility checks at parse time work properly. However, now that ADDR pseudo-type may safely be used as out_type since ("MEDIUM: sample: add missing ADDR=>? compatibility matrix entries"), we can use ADDR for fetches that may output both IPV4 and IPV6 at runtime. One added benefit on top of making the code less confusing is that 'haproxy -dKsmp' output will now show "addr" instead of "ipv4" for such fetches, so the 'haproxy -dKsmp' output better complies with the fetches signatures from the documentation. out_ip fetch, which returns an ip according to the doc, was purposely left as is (returning IPV4) since the smp_fetch_url_ip() implementation forces output type to IPV4 anyway, and since this is an historical fetch I prefer not to touch it to prevent any regression. However if smp_fetch_url_ip() were to be fixed to also return IPV6 in the future, then its expected out_type may be changed to ADDR as well. Multiple notes in the code were updated to mention that the appropriate pseudo-type may be used instead of the lowest common denominator for out_type when available.
2023-06-07 09:55:13 -04:00
* Note: fetches that may return multiple types should be declared using the
* appropriate pseudo-type. If not available it must be declared as the lowest
* common denominator, the type that can be casted into all other ones.
*/
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
{ "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
{ "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
{ "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
{ "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
{ "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
{ "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L4CLI },
{ /* END */ },
}};
INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "pp2-never-send-local", cfg_parse_pp2_never_send_local },
{ /* END */ },
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
/* Generate the hash of a connection with params as input
* Each non-null field of params is taken into account for the hash calcul.
*/
uint64_t conn_hash_prehash(char *buf, size_t size)
{
return XXH64(buf, size, 0);
}
/* Computes <data> hash into <hash>. In the same time, <flags>
* are updated with <type> for the hash header.
*/
static void conn_hash_update(XXH64_state_t *hash,
const void *data, size_t size,
enum conn_hash_params_t *flags,
enum conn_hash_params_t type)
{
XXH64_update(hash, data, size);
*flags |= type;
}
static uint64_t conn_hash_digest(XXH64_state_t *hash,
enum conn_hash_params_t flags)
{
const uint64_t flags_u64 = (uint64_t)flags;
const uint64_t f_hash = XXH64_digest(hash);
return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(f_hash);
}
/* private function to handle sockaddr as input for connection hash */
static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss,
XXH64_state_t *hash,
enum conn_hash_params_t *hash_flags,
enum conn_hash_params_t param_type_addr,
enum conn_hash_params_t param_type_port)
{
struct sockaddr_in *addr;
struct sockaddr_in6 *addr6;
switch (ss->ss_family) {
case AF_INET:
addr = (struct sockaddr_in *)ss;
conn_hash_update(hash,
&addr->sin_addr, sizeof(addr->sin_addr),
hash_flags, param_type_addr);
if (addr->sin_port) {
conn_hash_update(hash,
&addr->sin_port, sizeof(addr->sin_port),
hash_flags, param_type_port);
}
break;
case AF_INET6:
addr6 = (struct sockaddr_in6 *)ss;
conn_hash_update(hash,
&addr6->sin6_addr, sizeof(addr6->sin6_addr),
hash_flags, param_type_addr);
if (addr6->sin6_port) {
conn_hash_update(hash,
&addr6->sin6_port, sizeof(addr6->sin6_port),
hash_flags, param_type_port);
}
break;
}
}
uint64_t conn_calculate_hash(const struct conn_hash_params *params)
{
enum conn_hash_params_t hash_flags = 0;
XXH64_state_t hash;
XXH64_reset(&hash, 0);
conn_hash_update(&hash, &params->target, sizeof(params->target), &hash_flags, 0);
if (params->sni_prehash) {
conn_hash_update(&hash,
&params->sni_prehash, sizeof(params->sni_prehash),
&hash_flags, CONN_HASH_PARAMS_TYPE_SNI);
}
if (params->dst_addr) {
conn_calculate_hash_sockaddr(params->dst_addr,
&hash, &hash_flags,
CONN_HASH_PARAMS_TYPE_DST_ADDR,
CONN_HASH_PARAMS_TYPE_DST_PORT);
}
if (params->src_addr) {
conn_calculate_hash_sockaddr(params->src_addr,
&hash, &hash_flags,
CONN_HASH_PARAMS_TYPE_SRC_ADDR,
CONN_HASH_PARAMS_TYPE_SRC_PORT);
}
if (params->proxy_prehash) {
conn_hash_update(&hash,
&params->proxy_prehash, sizeof(params->proxy_prehash),
&hash_flags, CONN_HASH_PARAMS_TYPE_PROXY);
}
if (params->mark_tos_prehash) {
conn_hash_update(&hash,
&params->mark_tos_prehash, sizeof(params->mark_tos_prehash),
&hash_flags, CONN_HASH_PARAMS_TYPE_MARK_TOS);
}
return conn_hash_digest(&hash, hash_flags);
}
/* Reverse a <conn> connection instance. This effectively moves the connection
* from frontend to backend side or vice-versa depending on its initial status.
*
* For active reversal, 'reverse' member points to the listener used as the new
* connection target. Once transition is completed, the connection needs to be
* accepted on the listener to instantiate its parent session before using
* streams.
*
* For passive reversal, 'reverse' member points to the server used as the new
* connection target. Once transition is completed, the connection appears as a
* normal backend connection.
*
* Returns 0 on success else non-zero.
*/
int conn_reverse(struct connection *conn)
{
struct conn_hash_params hash_params;
int64_t hash = 0;
struct session *sess = conn->owner;
if (!conn_is_back(conn)) {
/* srv must have been set by a previous 'attach-srv' rule. */
struct server *srv = objt_server(conn->reverse.target);
BUG_ON(!srv);
if (conn_backend_init(conn))
return 1;
/* Initialize hash value for usage as idle conns. */
memset(&hash_params, 0, sizeof(hash_params));
hash_params.target = srv;
if (b_data(&conn->reverse.name)) {
/* data cannot wrap else prehash usage is incorrect */
BUG_ON(b_data(&conn->reverse.name) != b_contig_data(&conn->reverse.name, 0));
hash_params.sni_prehash =
conn_hash_prehash(b_head(&conn->reverse.name),
b_data(&conn->reverse.name));
}
hash = conn_calculate_hash(&hash_params);
conn->hash_node->node.key = hash;
conn->target = &srv->obj_type;
srv_use_conn(srv, conn);
/* Free the session after detaching the connection from it. */
session_unown_conn(sess, conn);
sess->origin = NULL;
session_free(sess);
conn_set_owner(conn, NULL, NULL);
conn->flags |= CO_FL_REVERSED;
}
else {
MEDIUM: proto_reverse_connect: bootstrap active reverse connection Implement active reverse connection initialization. This is done through a new task stored in the receiver structure. This task is instantiated via bind callback and first woken up via enable callback. Task handler is separated into two halves. On the first step, a new connection is allocated and stored in <pend_conn> member of the receiver. This new client connection will proceed to connect using the server instance referenced in the bind_conf. When connect has successfully been executed and HTTP/2 connection is ready for exchange after SETTINGS, reverse_connect task is woken up. As <pend_conn> is still set, the second halve is executed which only execute listener_accept(). This will in turn execute accept_conn callback which is defined to return the pending connection. The task is automatically requeued inside accept_conn callback if bind maxconn is not yet reached. This allows to specify how many connection should be opened. Each connection is instantiated and reversed serially one by one until maxconn is reached. conn_free() has been modified to handle failure if a reverse connection fails before being accepted. In this case, no session exists to notify about the failure. Instead, reverse_connect task is requeud with a 1 second delay, giving time to fix a possible network issue. This will allow to attempt a new connection reverse. Note that for the moment connection rebinding after accept is disabled for simplicity. Extra operations are required to migrate an existing connection and its stack to a new thread which will be implemented later.
2023-08-23 11:16:07 -04:00
/* Wake up receiver to proceed to connection accept. */
struct listener *l = __objt_listener(conn->reverse.target);
conn_backend_deinit(conn);
MEDIUM: proto_reverse_connect: bootstrap active reverse connection Implement active reverse connection initialization. This is done through a new task stored in the receiver structure. This task is instantiated via bind callback and first woken up via enable callback. Task handler is separated into two halves. On the first step, a new connection is allocated and stored in <pend_conn> member of the receiver. This new client connection will proceed to connect using the server instance referenced in the bind_conf. When connect has successfully been executed and HTTP/2 connection is ready for exchange after SETTINGS, reverse_connect task is woken up. As <pend_conn> is still set, the second halve is executed which only execute listener_accept(). This will in turn execute accept_conn callback which is defined to return the pending connection. The task is automatically requeued inside accept_conn callback if bind maxconn is not yet reached. This allows to specify how many connection should be opened. Each connection is instantiated and reversed serially one by one until maxconn is reached. conn_free() has been modified to handle failure if a reverse connection fails before being accepted. In this case, no session exists to notify about the failure. Instead, reverse_connect task is requeud with a 1 second delay, giving time to fix a possible network issue. This will allow to attempt a new connection reverse. Note that for the moment connection rebinding after accept is disabled for simplicity. Extra operations are required to migrate an existing connection and its stack to a new thread which will be implemented later.
2023-08-23 11:16:07 -04:00
conn->target = &l->obj_type;
conn->flags |= CO_FL_ACT_REVERSING;
task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
}
/* Invert source and destination addresses if already set. */
SWAP(conn->src, conn->dst);
conn->reverse.target = NULL;
ha_free(&conn->reverse.name.area);
conn->reverse.name = BUF_NULL;
return 0;
}
/* Handler of the task of mux_stopping_data.
* Called on soft-stop.
*/
static struct task *mux_stopping_process(struct task *t, void *ctx, unsigned int state)
{
struct connection *conn, *back;
list_for_each_entry_safe(conn, back, &mux_stopping_data[tid].list, stopping_list) {
if (conn->mux && conn->mux->wake)
conn->mux->wake(conn);
}
return t;
}
static int allocate_mux_cleanup(void)
{
/* allocates the thread bound mux_stopping_data task */
mux_stopping_data[tid].task = task_new_here();
if (!mux_stopping_data[tid].task) {
ha_alert("Failed to allocate the task for connection cleanup on thread %d.\n", tid);
return 0;
}
mux_stopping_data[tid].task->process = mux_stopping_process;
LIST_INIT(&mux_stopping_data[tid].list);
return 1;
}
REGISTER_PER_THREAD_ALLOC(allocate_mux_cleanup);
static int deallocate_mux_cleanup(void)
{
task_destroy(mux_stopping_data[tid].task);
return 1;
}
REGISTER_PER_THREAD_FREE(deallocate_mux_cleanup);
static void deinit_idle_conns(void)
{
int i;
for (i = 0; i < global.nbthread; i++) {
task_destroy(idle_conns[i].cleanup_task);
}
}
REGISTER_POST_DEINIT(deinit_idle_conns);