haproxy/src/connection.c

/*
 * Connection management functions
 *
 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 */

#include <errno.h>

#include <import/ebmbtree.h>

#include <haproxy/api.h>
#include <haproxy/arg.h>
#include <haproxy/cfgparse.h>
#include <haproxy/connection.h>
#include <haproxy/fd.h>
#include <haproxy/frontend.h>
#include <haproxy/hash.h>
#include <haproxy/list.h>
#include <haproxy/log.h>
#include <haproxy/namespace.h>
#include <haproxy/net_helper.h>
#include <haproxy/proto_rhttp.h>
#include <haproxy/proto_tcp.h>
#include <haproxy/sample.h>
#include <haproxy/sc_strm.h>
#include <haproxy/server.h>
#include <haproxy/session.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/stconn.h>
#include <haproxy/tools.h>
#include <haproxy/xxhash.h>


DECLARE_POOL(pool_head_connection,     "connection",     sizeof(struct connection));
DECLARE_POOL(pool_head_conn_hash_node, "conn_hash_node", sizeof(struct conn_hash_node));
DECLARE_POOL(pool_head_sockaddr,       "sockaddr",       sizeof(struct sockaddr_storage));
DECLARE_POOL(pool_head_pp_tlv_128,     "pp_tlv_128",     sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_128);
DECLARE_POOL(pool_head_pp_tlv_256,     "pp_tlv_256",     sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_256);

struct idle_conns idle_conns[MAX_THREADS] = { };
struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };

/* List head of all known muxes for PROTO */
struct mux_proto_list mux_proto_list = {
        .list = LIST_HEAD_INIT(mux_proto_list.list)
};

struct mux_stopping_data mux_stopping_data[MAX_THREADS];

/* disables sending of proxy-protocol-v2's LOCAL command */
static int pp2_never_send_local;

/* find the value of a received TLV for a given type */
struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
{
	struct conn_tlv_list *tlv = NULL;

	if (!conn)
		return NULL;

	list_for_each_entry(tlv, &conn->tlv_list, list) {
		if (tlv->type == type)
			return tlv;
	}

	return NULL;
}

/* Remove <conn> idle connection from its attached tree (idle, safe or avail).
 * If also present in the secondary server idle list, conn is removed from it.
 *
 * Must be called with idle_conns_lock held.
 */
void conn_delete_from_tree(struct connection *conn)
{
	LIST_DEL_INIT(&conn->idle_list);
	eb64_delete(&conn->hash_node->node);
}

int conn_create_mux(struct connection *conn)
{
	if (conn_is_back(conn)) {
		struct server *srv;
		struct stconn *sc = conn->ctx;
		struct session *sess = conn->owner;

		if (conn->flags & CO_FL_ERROR)
			goto fail;

		if (sess && obj_type(sess->origin) == OBJ_TYPE_CHECK) {
			if (conn_install_mux_chk(conn, conn->ctx, sess) < 0)
				goto fail;
		}
		else if (conn_install_mux_be(conn, conn->ctx, sess, NULL) < 0)
			goto fail;
		srv = objt_server(conn->target);

		/* If we're doing http-reuse always, and the connection is not
		 * private with available streams (an http2 connection), add it
		 * to the available list, so that others can use it right
		 * away. If the connection is private, add it in the session
		 * server list.
		 */
		if (srv && ((srv->proxy->options & PR_O_REUSE_MASK) == PR_O_REUSE_ALWS) &&
		    !(conn->flags & CO_FL_PRIVATE) && conn->mux->avail_streams(conn) > 0) {
			srv_add_to_avail_list(srv, conn);
		}
		else if (conn->flags & CO_FL_PRIVATE) {
			/* If it fail now, the same will be done in mux->detach() callback */
			session_add_conn(sess, conn, conn->target);
		}
		return 0;
fail:
		/* let the upper layer know the connection failed */
		if (sc) {
			sc->app_ops->wake(sc);
		}
		else if (conn_reverse_in_preconnect(conn)) {
			struct listener *l = conn_active_reverse_listener(conn);

			/* If mux init failed, consider connection on error.
			 * This is necessary to ensure connection is freed by
			 * proto-rhttp receiver task.
			 */
			if (!conn->mux)
				conn->flags |= CO_FL_ERROR;

			/* If connection is interrupted  without CO_FL_ERROR, receiver task won't free it. */
			BUG_ON(!(conn->flags & CO_FL_ERROR));

			task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
		}
		return -1;
	} else
		return conn_complete_session(conn);

}

/* This is used at the end of the socket IOCB to possibly create the mux if it
 * was not done yet, or wake it up if flags changed compared to old_flags or if
 * need_wake insists on this. It returns <0 if the connection was destroyed and
 * must not be used, >=0 otherwise.
 */
int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
{
	int ret = 0;

	/* If we don't yet have a mux, that means we were waiting for
	 * information to create one, typically from the ALPN. If we're
	 * done with the handshake, attempt to create one.
	 */
	if (unlikely(!conn->mux) && !(conn->flags & CO_FL_WAIT_XPRT)) {
		ret = conn_create_mux(conn);
		if (ret < 0)
			goto done;
	}

	/* The wake callback is normally used to notify the data layer about
	 * data layer activity (successful send/recv), connection establishment,
	 * shutdown and fatal errors. We need to consider the following
	 * situations to wake up the data layer :
	 *  - change among the CO_FL_NOTIFY_DONE flags :
	 *      SOCK_{RD,WR}_SH, ERROR,
	 *  - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
	 *    end of handshake and transition to CONNECTED
	 *  - raise of CONNECTED with HANDSHAKE down
	 *  - end of HANDSHAKE with CONNECTED set
	 *  - regular data layer activity
	 *
	 * One tricky case is the wake up on read0 or error on an idle
	 * backend connection, that can happen on a connection that is still
	 * polled while at the same moment another thread is about to perform a
	 * takeover. The solution against this is to remove the connection from
	 * the idle list if it was in it, and possibly reinsert it at the end
	 * if the connection remains valid. The cost is non-null (locked tree
	 * removal) but remains low given that this is extremely rarely called.
	 * In any case it's guaranteed by the FD's thread_mask that we're
	 * called from the same thread the connection is queued in.
	 *
	 * Note that the wake callback is allowed to release the connection and
	 * the fd (and return < 0 in this case).
	 */
	if ((forced_wake ||
	     ((conn->flags ^ old_flags) & CO_FL_NOTIFY_DONE) ||
	     ((old_flags & CO_FL_WAIT_XPRT) && !(conn->flags & CO_FL_WAIT_XPRT))) &&
	    conn->mux && conn->mux->wake) {
		uint conn_in_list = conn->flags & CO_FL_LIST_MASK;
		struct server *srv = objt_server(conn->target);

		if (conn_in_list) {
			HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
			conn_delete_from_tree(conn);
			HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
		}

		ret = conn->mux->wake(conn);
		if (ret < 0)
			goto done;

		if (conn_in_list) {
			HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
			_srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
			HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
		}
	}
 done:
	return ret;
}

/* Change the mux for the connection.
 * The caller should make sure he's not subscribed to the underlying XPRT.
 */
int conn_upgrade_mux_fe(struct connection *conn, void *ctx, struct buffer *buf,
                        struct ist mux_proto, int mode)
{
	struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
	const struct mux_ops *old_mux, *new_mux;
	void *old_mux_ctx;
	const char *alpn_str = NULL;
	int alpn_len = 0;

	if (!mux_proto.len) {
		conn_get_alpn(conn, &alpn_str, &alpn_len);
		mux_proto = ist2(alpn_str, alpn_len);
	}
	new_mux = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
	old_mux = conn->mux;

	/* No mux found */
	if (!new_mux)
		return -1;

	/* Same mux, nothing to do */
	if (old_mux == new_mux)
		return 0;

	old_mux_ctx = conn->ctx;
	conn->mux = new_mux;
	conn->ctx = ctx;
	if (new_mux->init(conn, bind_conf->frontend, conn->owner, buf) == -1) {
		/* The mux upgrade failed, so restore the old mux */
		conn->ctx = old_mux_ctx;
		conn->mux = old_mux;
		return -1;
	}

	/* The mux was upgraded, destroy the old one */
	*buf = BUF_NULL;
	old_mux->destroy(old_mux_ctx);
	return 0;
}

/* installs the best mux for incoming connection <conn> using the upper context
 * <ctx>. If the mux protocol is forced, we use it to find the best
 * mux. Otherwise we use the ALPN name, if any. Returns < 0 on error.
 */
int conn_install_mux_fe(struct connection *conn, void *ctx)
{
	struct bind_conf     *bind_conf = __objt_listener(conn->target)->bind_conf;
	const struct mux_ops *mux_ops;

	if (bind_conf->mux_proto)
		mux_ops = bind_conf->mux_proto->mux;
	else {
		struct ist mux_proto;
		const char *alpn_str = NULL;
		int alpn_len = 0;
		int mode;

		if (bind_conf->frontend->mode == PR_MODE_HTTP)
			mode = PROTO_MODE_HTTP;
		else
			mode = PROTO_MODE_TCP;

		conn_get_alpn(conn, &alpn_str, &alpn_len);
		mux_proto = ist2(alpn_str, alpn_len);
		mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
		if (!mux_ops)
			return -1;
	}

	/* Ensure a valid protocol is selected if connection is targeted by a
	 * tcp-request session attach-srv rule.
	 */
	if (conn->reverse.target && !(mux_ops->flags & MX_FL_REVERSABLE)) {
		conn->err_code = CO_ER_REVERSE;
		return -1;
	}

	return conn_install_mux(conn, mux_ops, ctx, bind_conf->frontend, conn->owner);
}

/* installs the best mux for outgoing connection <conn> using the upper context
 * <ctx>. If the server mux protocol is forced, we use it to find the best mux.
 * It's also possible to specify an alternative mux protocol <force_mux_ops>,
 * in which case it will be used instead of the default server mux protocol.
 *
 * Returns < 0 on error.
 */
int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess,
                        const struct mux_ops *force_mux_ops)
{
	struct server *srv = objt_server(conn->target);
	struct proxy  *prx = objt_proxy(conn->target);
	const struct mux_ops *mux_ops;

	if (srv)
		prx = srv->proxy;

	if (!prx) // target must be either proxy or server
		return -1;

	if (srv && srv->mux_proto && likely(!force_mux_ops)) {
		mux_ops = srv->mux_proto->mux;
	}
	else if (srv && unlikely(force_mux_ops)) {
		mux_ops = force_mux_ops;
	}
	else {
		struct ist mux_proto;
		const char *alpn_str = NULL;
		int alpn_len = 0;
		int mode;

		if (prx->mode == PR_MODE_HTTP)
			mode = PROTO_MODE_HTTP;
		else
			mode = PROTO_MODE_TCP;

		conn_get_alpn(conn, &alpn_str, &alpn_len);
		mux_proto = ist2(alpn_str, alpn_len);

		mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
		if (!mux_ops)
			return -1;
	}
	return conn_install_mux(conn, mux_ops, ctx, prx, sess);
}

/* installs the best mux for outgoing connection <conn> for a check using the
 * upper context <ctx>. If the mux protocol is forced by the check, we use it to
 * find the best mux. Returns < 0 on error.
 */
int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess)
{
	struct check *check = objt_check(sess->origin);
	struct server *srv = objt_server(conn->target);
	struct proxy *prx = objt_proxy(conn->target);
	const struct mux_ops *mux_ops;

	if (!check) // Check must be defined
		return -1;

	if (srv)
		prx = srv->proxy;

	if (!prx) // target must be either proxy or server
		return -1;

	if (check->mux_proto)
		mux_ops = check->mux_proto->mux;
	else {
		struct ist mux_proto;
		const char *alpn_str = NULL;
		int alpn_len = 0;
		int mode;

		if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK)
			mode = PROTO_MODE_HTTP;
		else
			mode = PROTO_MODE_TCP;

		conn_get_alpn(conn, &alpn_str, &alpn_len);
		mux_proto = ist2(alpn_str, alpn_len);

		mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
		if (!mux_ops)
			return -1;
	}
	return conn_install_mux(conn, mux_ops, ctx, prx, sess);
}

/* Set the ALPN of connection <conn> to <alpn>. If force is false, <alpn> must
 * be a subset or identical to the registered protos for the parent SSL_CTX.
 * In this case <alpn> must be a single protocol value, not a list.
 *
 * Returns 0 if ALPN is updated else -1.
 */
int conn_update_alpn(struct connection *conn, const struct ist alpn, int force)
{
#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
	size_t alpn_len = istlen(alpn);
	char *ctx_alpn_str = NULL;
	int ctx_alpn_len = 0, found = 0;

	/* if not force, first search if alpn is a subset or identical to the
	 * parent SSL_CTX.
	 */
	if (!force) {
		/* retrieve the SSL_CTX according to the connection side. */
		if (conn_is_back(conn)) {
			if (obj_type(conn->target) == OBJ_TYPE_SERVER) {
				struct server *srv = __objt_server(conn->target);
				ctx_alpn_str = srv->ssl_ctx.alpn_str;
				ctx_alpn_len = srv->ssl_ctx.alpn_len;
			}
		}
		else {
			struct session *sess = conn->owner;
			struct listener *li = sess->listener;

			if (li->bind_conf && li->bind_conf->options & BC_O_USE_SSL) {
				ctx_alpn_str = li->bind_conf->ssl_conf.alpn_str;
				ctx_alpn_len = li->bind_conf->ssl_conf.alpn_len;
			}
		}

		if (ctx_alpn_str) {
			/* search if ALPN is present in SSL_CTX ALPN before
			 * using it.
			 */
			while (ctx_alpn_len) {
				/* skip ALPN whose size is not 8 */
				if (*ctx_alpn_str != alpn_len - 1) {
					ctx_alpn_len -= *ctx_alpn_str + 1;
				}
				else {
					if (isteqi(ist2(ctx_alpn_str, alpn_len), alpn)) {
						found = 1;
						break;
					}
				}
				ctx_alpn_str += *ctx_alpn_str + 1;

				/* This indicates an invalid ALPN formatted
				 * string and should never happen. */
				BUG_ON(ctx_alpn_len < 0);
			}
		}
	}

	if (found || force) {
		ssl_sock_set_alpn(conn, (const uchar *)istptr(alpn), istlen(alpn));
		return 0;
	}

#endif
	return -1;
}

/* Initializes all required fields for a new connection. Note that it does the
 * minimum acceptable initialization for a connection that already exists and
 * is about to be reused. It also leaves the addresses untouched, which makes
 * it usable across connection retries to reset a connection to a known state.
 */
void conn_init(struct connection *conn, void *target)
{
	conn->obj_type = OBJ_TYPE_CONN;
	conn->flags = CO_FL_NONE;
	conn->mux = NULL;
	conn->ctx = NULL;
	conn->owner = NULL;
	conn->send_proxy_ofs = 0;
	conn->handle.fd = DEAD_FD_MAGIC;
	conn->err_code = CO_ER_NONE;
	conn->target = target;
	conn->destroy_cb = NULL;
	conn->proxy_netns = NULL;
	MT_LIST_INIT(&conn->toremove_list);
	if (conn_is_back(conn))
		LIST_INIT(&conn->session_list);
	else
		LIST_INIT(&conn->stopping_list);
	LIST_INIT(&conn->tlv_list);
	conn->subs = NULL;
	conn->src = NULL;
	conn->dst = NULL;
	conn->hash_node = NULL;
	conn->xprt = NULL;
	conn->reverse.target = NULL;
	conn->reverse.name = BUF_NULL;
}

/* Initialize members used for backend connections.
 *
 * Returns 0 on success else non-zero.
 */
static int conn_backend_init(struct connection *conn)
{
	if (!sockaddr_alloc(&conn->dst, 0, 0))
		return 1;

	conn->hash_node = conn_alloc_hash_node(conn);
	if (unlikely(!conn->hash_node))
		return 1;

	return 0;
}

/* Release connection elements reserved for backend side usage. It also takes
 * care to detach it if linked to a session or a server instance.
 *
 * This function is useful when freeing a connection or reversing it to the
 * frontend side.
 */
static void conn_backend_deinit(struct connection *conn)
{
	/* If the connection is owned by the session, remove it from its list
	 */
	if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) {
		session_unown_conn(conn->owner, conn);
	}
	else if (!(conn->flags & CO_FL_PRIVATE)) {
		if (obj_type(conn->target) == OBJ_TYPE_SERVER)
			srv_release_conn(__objt_server(conn->target), conn);
	}

	/* Make sure the connection is not left in the idle connection tree */
	if (conn->hash_node != NULL)
		BUG_ON(conn->hash_node->node.node.leaf_p != NULL);

	pool_free(pool_head_conn_hash_node, conn->hash_node);
	conn->hash_node = NULL;

}

/* Tries to allocate a new connection and initialized its main fields. The
 * connection is returned on success, NULL on failure. The connection must
 * be released using pool_free() or conn_free().
 */
struct connection *conn_new(void *target)
{
	struct connection *conn;

	conn = pool_alloc(pool_head_connection);
	if (unlikely(!conn))
		return NULL;

	conn_init(conn, target);

	if (conn_is_back(conn)) {
		if (obj_type(target) == OBJ_TYPE_SERVER)
			srv_use_conn(__objt_server(target), conn);

		if (conn_backend_init(conn)) {
			conn_free(conn);
			return NULL;
		}
	}

	return conn;
}

/* Releases a connection previously allocated by conn_new() */
void conn_free(struct connection *conn)
{
	struct conn_tlv_list *tlv, *tlv_back = NULL;

	if (conn_is_back(conn))
		conn_backend_deinit(conn);

	/* Remove the conn from toremove_list.
	 *
	 * This is needed to prevent a double-free in case the connection was
	 * already scheduled from cleaning but is freed before via another
	 * call.
	 */
	MT_LIST_DELETE(&conn->toremove_list);

	sockaddr_free(&conn->src);
	sockaddr_free(&conn->dst);

	/* Free all previously allocated TLVs */
	list_for_each_entry_safe(tlv, tlv_back, &conn->tlv_list, list) {
		LIST_DELETE(&tlv->list);
		if (tlv->len > HA_PP2_TLV_VALUE_256)
			free(tlv);
		else if (tlv->len <= HA_PP2_TLV_VALUE_128)
			pool_free(pool_head_pp_tlv_128, tlv);
		else
			pool_free(pool_head_pp_tlv_256, tlv);
	}

	ha_free(&conn->reverse.name.area);

	if (conn_reverse_in_preconnect(conn)) {
		struct listener *l = conn_active_reverse_listener(conn);
		rhttp_notify_preconn_err(l);
		HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
	}
	else if (conn->flags & CO_FL_REVERSED) {
		HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
	}


	conn_force_unsubscribe(conn);
	pool_free(pool_head_connection, conn);
}

struct conn_hash_node *conn_alloc_hash_node(struct connection *conn)
{
	struct conn_hash_node *hash_node = NULL;

	hash_node = pool_zalloc(pool_head_conn_hash_node);
	if (unlikely(!hash_node))
		return NULL;

	hash_node->conn = conn;

	return hash_node;
}

/* Allocates a struct sockaddr from the pool if needed, assigns it to *sap and
 * returns it. If <sap> is NULL, the address is always allocated and returned.
 * if <sap> is non-null, an address will only be allocated if it points to a
 * non-null pointer. In this case the allocated address will be assigned there.
 * If <orig> is non-null and <len> positive, the address in <sa> will be copied
 * into the allocated address. In both situations the new pointer is returned.
 */
struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len)
{
	struct sockaddr_storage *sa;

	if (sap && *sap)
		return *sap;

	sa = pool_alloc(pool_head_sockaddr);
	if (sa && orig && len > 0)
		memcpy(sa, orig, len);
	if (sap)
		*sap = sa;
	return sa;
}

/* Releases the struct sockaddr potentially pointed to by <sap> to the pool. It
 * may be NULL or may point to NULL. If <sap> is not NULL, a NULL is placed
 * there.
 */
void sockaddr_free(struct sockaddr_storage **sap)
{
	if (!sap)
		return;
	pool_free(pool_head_sockaddr, *sap);
	*sap = NULL;
}

/* Try to add a handshake pseudo-XPRT. If the connection's first XPRT is
 * raw_sock, then just use the new XPRT as the connection XPRT, otherwise
 * call the xprt's add_xprt() method.
 * Returns 0 on success, or non-zero on failure.
 */
int xprt_add_hs(struct connection *conn)
{
	void *xprt_ctx = NULL;
	const struct xprt_ops *ops = xprt_get(XPRT_HANDSHAKE);
	void *nextxprt_ctx = NULL;
	const struct xprt_ops *nextxprt_ops = NULL;

	if (conn->flags & CO_FL_ERROR)
		return -1;
	if (ops->init(conn, &xprt_ctx) < 0)
		return -1;
	if (conn->xprt == xprt_get(XPRT_RAW)) {
		nextxprt_ctx = conn->xprt_ctx;
		nextxprt_ops = conn->xprt;
		conn->xprt_ctx = xprt_ctx;
		conn->xprt = ops;
	} else {
		if (conn->xprt->add_xprt(conn, conn->xprt_ctx, xprt_ctx, ops,
		                         &nextxprt_ctx, &nextxprt_ops) != 0) {
			ops->close(conn, xprt_ctx);
			return -1;
		}
	}
	if (ops->add_xprt(conn, xprt_ctx, nextxprt_ctx, nextxprt_ops, NULL, NULL) != 0) {
		ops->close(conn, xprt_ctx);
		return -1;
	}
	return 0;
}

/* returns a human-readable error code for conn->err_code, or NULL if the code
 * is unknown.
 */
const char *conn_err_code_str(struct connection *c)
{
	switch (c->err_code) {
	case CO_ER_NONE:          return "Success";

	case CO_ER_CONF_FDLIM:    return "Reached configured maxconn value";
	case CO_ER_PROC_FDLIM:    return "Too many sockets on the process";
	case CO_ER_SYS_FDLIM:     return "Too many sockets on the system";
	case CO_ER_SYS_MEMLIM:    return "Out of system buffers";
	case CO_ER_NOPROTO:       return "Protocol or address family not supported";
	case CO_ER_SOCK_ERR:      return "General socket error";
	case CO_ER_PORT_RANGE:    return "Source port range exhausted";
	case CO_ER_CANT_BIND:     return "Can't bind to source address";
	case CO_ER_FREE_PORTS:    return "Out of local source ports on the system";
	case CO_ER_ADDR_INUSE:    return "Local source address already in use";

	case CO_ER_PRX_EMPTY:     return "Connection closed while waiting for PROXY protocol header";
	case CO_ER_PRX_ABORT:     return "Connection error while waiting for PROXY protocol header";
	case CO_ER_PRX_TIMEOUT:   return "Timeout while waiting for PROXY protocol header";
	case CO_ER_PRX_TRUNCATED: return "Truncated PROXY protocol header received";
	case CO_ER_PRX_NOT_HDR:   return "Received something which does not look like a PROXY protocol header";
	case CO_ER_PRX_BAD_HDR:   return "Received an invalid PROXY protocol header";
	case CO_ER_PRX_BAD_PROTO: return "Received an unhandled protocol in the PROXY protocol header";

	case CO_ER_CIP_EMPTY:     return "Connection closed while waiting for NetScaler Client IP header";
	case CO_ER_CIP_ABORT:     return "Connection error while waiting for NetScaler Client IP header";
	case CO_ER_CIP_TIMEOUT:   return "Timeout while waiting for a NetScaler Client IP header";
	case CO_ER_CIP_TRUNCATED: return "Truncated NetScaler Client IP header received";
	case CO_ER_CIP_BAD_MAGIC: return "Received an invalid NetScaler Client IP magic number";
	case CO_ER_CIP_BAD_PROTO: return "Received an unhandled protocol in the NetScaler Client IP header";

	case CO_ER_SSL_EMPTY:     return "Connection closed during SSL handshake";
	case CO_ER_SSL_ABORT:     return "Connection error during SSL handshake";
	case CO_ER_SSL_TIMEOUT:   return "Timeout during SSL handshake";
	case CO_ER_SSL_TOO_MANY:  return "Too many SSL connections";
	case CO_ER_SSL_NO_MEM:    return "Out of memory when initializing an SSL connection";
	case CO_ER_SSL_RENEG:     return "Rejected a client-initiated SSL renegotiation attempt";
	case CO_ER_SSL_CA_FAIL:   return "SSL client CA chain cannot be verified";
	case CO_ER_SSL_CRT_FAIL:  return "SSL client certificate not trusted";
	case CO_ER_SSL_MISMATCH:  return "Server presented an SSL certificate different from the configured one";
	case CO_ER_SSL_MISMATCH_SNI: return "Server presented an SSL certificate different from the expected one";
	case CO_ER_SSL_HANDSHAKE: return "SSL handshake failure";
	case CO_ER_SSL_HANDSHAKE_HB: return "SSL handshake failure after heartbeat";
	case CO_ER_SSL_KILLED_HB: return "Stopped a TLSv1 heartbeat attack (CVE-2014-0160)";
	case CO_ER_SSL_NO_TARGET: return "Attempt to use SSL on an unknown target (internal error)";
	case CO_ER_SSL_EARLY_FAILED: return "Server refused early data";

	case CO_ER_SOCKS4_SEND:    return "SOCKS4 Proxy write error during handshake";
	case CO_ER_SOCKS4_RECV:    return "SOCKS4 Proxy read error during handshake";
	case CO_ER_SOCKS4_DENY:    return "SOCKS4 Proxy deny the request";
	case CO_ER_SOCKS4_ABORT:   return "SOCKS4 Proxy handshake aborted by server";

	case CO_ERR_SSL_FATAL:     return "SSL fatal error";

	case CO_ER_REVERSE:        return "Reverse connect failure";
	}
	return NULL;
}

/* Send a message over an established connection. It makes use of send() and
 * returns the same return code and errno. If the socket layer is not ready yet
 * then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
 * as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
 * EMSGSIZE if called with a zero length message. The purpose is to simplify
 * some rare attempts to directly write on the socket from above the connection
 * (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
 * It automatically retries on EINTR. Other errors cause the connection to be
 * marked as in error state. It takes similar arguments as send() except the
 * first one which is the connection instead of the file descriptor. <flags>
 * only support CO_SFL_MSG_MORE.
 */
int conn_ctrl_send(struct connection *conn, const void *buf, int len, int flags)
{
	const struct buffer buffer = b_make((char*)buf, len, 0, len);
	const struct xprt_ops *xprt = xprt_get(XPRT_RAW);
	int ret;

	ret = -1;
	errno = ENOTSOCK;

	if (conn->flags & CO_FL_SOCK_WR_SH)
		goto fail;

	if (!conn_ctrl_ready(conn))
		goto fail;

	errno = EMSGSIZE;
	if (!len)
		goto fail;

	/* snd_buf() already takes care of updating conn->flags and handling
	 * the FD polling status.
	 */
	ret = xprt->snd_buf(conn, NULL, &buffer, buffer.data, flags);
	if (conn->flags & CO_FL_ERROR)
		ret = -1;
	return ret;
 fail:
	conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
	return ret;
}

/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
 * The event subscriber <es> is not allowed to change from a previous call as
 * long as at least one event is still subscribed. The <event_type> must only
 * be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
 */
int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
{
	BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
	BUG_ON(conn->subs && conn->subs != es);

	es->events &= ~event_type;
	if (!es->events)
		conn->subs = NULL;

	if (conn_ctrl_ready(conn) && conn->ctrl->ignore_events)
		conn->ctrl->ignore_events(conn, event_type);

	return 0;
}

/* Called from the upper layer, to subscribe <es> to events <event_type>.
 * The <es> struct is not allowed to differ from the one passed during a
 * previous call to subscribe(). If the connection's ctrl layer is ready,
 * the wait_event is immediately woken up and the subscription is cancelled.
 * It always returns zero.
 */
int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
{
	int ret = 0;

	BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
	BUG_ON(conn->subs && conn->subs != es);

	if (conn->subs && (conn->subs->events & event_type) == event_type)
		return 0;

	if (conn_ctrl_ready(conn) && conn->ctrl->check_events) {
		ret = conn->ctrl->check_events(conn, event_type);
		if (ret)
			tasklet_wakeup(es->tasklet);
	}

	es->events = (es->events | event_type) & ~ret;
	conn->subs = es->events ? es : NULL;
	return 0;
}

/* Drains possibly pending incoming data on the connection and update the flags
 * accordingly. This is used to know whether we need to disable lingering on
 * close. Returns non-zero if it is safe to close without disabling lingering,
 * otherwise zero. The CO_FL_SOCK_RD_SH flag may also be updated if the incoming
 * shutdown was reported by the ->drain() function.
 */
int conn_ctrl_drain(struct connection *conn)
{
	int ret = 0;

	if (!conn_ctrl_ready(conn) || conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
		ret = 1;
	else if (conn->ctrl->drain) {
		ret = conn->ctrl->drain(conn);
		if (ret)
			conn->flags |= CO_FL_SOCK_RD_SH;
	}
	return ret;
}

/*
 * Get data length from tlv
 */
static inline size_t get_tlv_length(const struct tlv *src)
{
	return (src->length_hi << 8) | src->length_lo;
}

/* This handshake handler waits a PROXY protocol header at the beginning of the
 * raw data stream. The header looks like this :
 *
 *   "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
 *
 * There must be exactly one space between each field. Fields are :
 *  - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
 *  - SRC3  : layer 3 (eg: IP) source address in standard text form
 *  - DST3  : layer 3 (eg: IP) destination address in standard text form
 *  - SRC4  : layer 4 (eg: TCP port) source address in standard text form
 *  - DST4  : layer 4 (eg: TCP port) destination address in standard text form
 *
 * This line MUST be at the beginning of the buffer and MUST NOT wrap.
 *
 * The header line is small and in all cases smaller than the smallest normal
 * TCP MSS. So it MUST always be delivered as one segment, which ensures we
 * can safely use MSG_PEEK and avoid buffering.
 *
 * Once the data is fetched, the values are set in the connection's address
 * fields, and data are removed from the socket's buffer. The function returns
 * zero if it needs to wait for more data or if it fails, or 1 if it completed
 * and removed itself.
 */
int conn_recv_proxy(struct connection *conn, int flag)
{
	struct session *sess = conn->owner;
	char *line, *end;
	struct proxy_hdr_v2 *hdr_v2;
	const char v2sig[] = PP2_SIGNATURE;
	size_t total_v2_len;
	size_t tlv_offset = 0;
	int ret;

	if (!conn_ctrl_ready(conn))
		goto fail;

	BUG_ON(conn->flags & CO_FL_FDLESS);

	if (!fd_recv_ready(conn->handle.fd))
		goto not_ready;

	while (1) {
		ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
		if (ret < 0) {
			if (errno == EINTR)
				continue;
			if (errno == EAGAIN || errno == EWOULDBLOCK) {
				fd_cant_recv(conn->handle.fd);
				goto not_ready;
			}
			goto recv_abort;
		}
		trash.data = ret;
		break;
	}

	if (!trash.data) {
		/* client shutdown */
		conn->err_code = CO_ER_PRX_EMPTY;
		goto fail;
	}

	conn->flags &= ~CO_FL_WAIT_L4_CONN;

	if (trash.data < 6)
		goto missing;

	line = trash.area;
	end = trash.area + trash.data;

	/* Decode a possible proxy request, fail early if it does not match */
	if (strncmp(line, "PROXY ", 6) != 0)
		goto not_v1;

	line += 6;
	if (trash.data < 9) /* shortest possible line */
		goto missing;

	if (memcmp(line, "TCP4 ", 5) == 0) {
		u32 src3, dst3, sport, dport;

		line += 5;

		src3 = inetaddr_host_lim_ret(line, end, &line);
		if (line == end)
			goto missing;
		if (*line++ != ' ')
			goto bad_header;

		dst3 = inetaddr_host_lim_ret(line, end, &line);
		if (line == end)
			goto missing;
		if (*line++ != ' ')
			goto bad_header;

		sport = read_uint((const char **)&line, end);
		if (line == end)
			goto missing;
		if (*line++ != ' ')
			goto bad_header;

		dport = read_uint((const char **)&line, end);
		if (line > end - 2)
			goto missing;
		if (*line++ != '\r')
			goto bad_header;
		if (*line++ != '\n')
			goto bad_header;

		if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
			goto fail;

		/* update the session's addresses and mark them set */
		((struct sockaddr_in *)sess->src)->sin_family      = AF_INET;
		((struct sockaddr_in *)sess->src)->sin_addr.s_addr = htonl(src3);
		((struct sockaddr_in *)sess->src)->sin_port        = htons(sport);

		((struct sockaddr_in *)sess->dst)->sin_family        = AF_INET;
		((struct sockaddr_in *)sess->dst)->sin_addr.s_addr   = htonl(dst3);
		((struct sockaddr_in *)sess->dst)->sin_port          = htons(dport);
	}
	else if (memcmp(line, "TCP6 ", 5) == 0) {
		u32 sport, dport;
		char *src_s;
		char *dst_s, *sport_s, *dport_s;
		struct in6_addr src3, dst3;

		line += 5;

		src_s = line;
		dst_s = sport_s = dport_s = NULL;
		while (1) {
			if (line > end - 2) {
				goto missing;
			}
			else if (*line == '\r') {
				*line = 0;
				line++;
				if (*line++ != '\n')
					goto bad_header;
				break;
			}

			if (*line == ' ') {
				*line = 0;
				if (!dst_s)
					dst_s = line + 1;
				else if (!sport_s)
					sport_s = line + 1;
				else if (!dport_s)
					dport_s = line + 1;
			}
			line++;
		}

		if (!dst_s || !sport_s || !dport_s)
			goto bad_header;

		sport = read_uint((const char **)&sport_s,dport_s - 1);
		if (*sport_s != 0)
			goto bad_header;

		dport = read_uint((const char **)&dport_s,line - 2);
		if (*dport_s != 0)
			goto bad_header;

		if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
			goto bad_header;

		if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
			goto bad_header;

		if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
			goto fail;

		/* update the session's addresses and mark them set */
		((struct sockaddr_in6 *)sess->src)->sin6_family      = AF_INET6;
		memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, &src3, sizeof(struct in6_addr));
		((struct sockaddr_in6 *)sess->src)->sin6_port        = htons(sport);

		((struct sockaddr_in6 *)sess->dst)->sin6_family        = AF_INET6;
		memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, &dst3, sizeof(struct in6_addr));
		((struct sockaddr_in6 *)sess->dst)->sin6_port          = htons(dport);
	}
	else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
		/* This can be a UNIX socket forwarded by an haproxy upstream */
		line += 9;
	}
	else {
		/* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
		conn->err_code = CO_ER_PRX_BAD_PROTO;
		goto fail;
	}

	trash.data = line - trash.area;
	goto eat_header;

 not_v1:
	/* try PPv2 */
	if (trash.data < PP2_HEADER_LEN)
		goto missing;

	hdr_v2 = (struct proxy_hdr_v2 *) trash.area;

	if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
	    (hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
		conn->err_code = CO_ER_PRX_NOT_HDR;
		goto fail;
	}

	total_v2_len = PP2_HEADER_LEN + ntohs(hdr_v2->len);
	if (trash.data < total_v2_len)
		goto missing;

	switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
	case 0x01: /* PROXY command */
		switch (hdr_v2->fam) {
		case 0x11:  /* TCPv4 */
			if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
				goto bad_header;

			if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
				goto fail;

			((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
			((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
			((struct sockaddr_in *)sess->src)->sin_port = hdr_v2->addr.ip4.src_port;
			((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
			((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
			((struct sockaddr_in *)sess->dst)->sin_port = hdr_v2->addr.ip4.dst_port;
			tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
			break;
		case 0x21:  /* TCPv6 */
			if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
				goto bad_header;

			if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
				goto fail;

			((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
			memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
			((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_v2->addr.ip6.src_port;
			((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
			memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
			((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_v2->addr.ip6.dst_port;
			tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
			break;
		}

		/* TLV parsing */
		while (tlv_offset < total_v2_len) {
			struct ist tlv;
			struct tlv *tlv_packet = NULL;
			struct conn_tlv_list *new_tlv = NULL;
			size_t data_len = 0;

			/* Verify that we have at least TLV_HEADER_SIZE bytes left */
			if (tlv_offset + TLV_HEADER_SIZE > total_v2_len)
				goto bad_header;

			tlv_packet = (struct tlv *) &trash.area[tlv_offset];
			tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet));
			tlv_offset += istlen(tlv) + TLV_HEADER_SIZE;

			/* Verify that the TLV length does not exceed the total PROXYv2 length */
			if (tlv_offset > total_v2_len)
				goto bad_header;

			/* Prepare known TLV types */
			switch (tlv_packet->type) {
			case PP2_TYPE_CRC32C: {
				uint32_t n_crc32c;

				/* Verify that this TLV is exactly 4 bytes long */
				if (istlen(tlv) != PP2_CRC32C_LEN)
					goto bad_header;

				n_crc32c = read_n32(istptr(tlv));
				write_n32(istptr(tlv), 0); // compute with CRC==0

				if (hash_crc32c(trash.area, total_v2_len) != n_crc32c)
					goto bad_header;
				break;
			}
#ifdef USE_NS
			case PP2_TYPE_NETNS: {
				const struct netns_entry *ns;

				ns = netns_store_lookup(istptr(tlv), istlen(tlv));
				if (ns)
					conn->proxy_netns = ns;
				break;
			}
#endif
			case PP2_TYPE_AUTHORITY: {
				/* For now, keep the length restriction by HAProxy */
				if (istlen(tlv) > HA_PP2_AUTHORITY_MAX)
					goto bad_header;

				break;
			}
			case PP2_TYPE_UNIQUE_ID: {
				if (istlen(tlv) > UNIQUEID_LEN)
					goto bad_header;
				break;
			}
			default:
				break;
			}

			/* If we did not find a known TLV type that we can optimize for, we generically allocate it */
			data_len = get_tlv_length(tlv_packet);

			/* Prevent attackers from allocating too much memory */
			if (unlikely(data_len > HA_PP2_MAX_ALLOC))
				goto fail;

			/* Alloc memory based on data_len */
			if (data_len > HA_PP2_TLV_VALUE_256)
				new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list));
			else if (data_len <= HA_PP2_TLV_VALUE_128)
				new_tlv = pool_alloc(pool_head_pp_tlv_128);
			else
				new_tlv = pool_alloc(pool_head_pp_tlv_256);

			if (unlikely(!new_tlv))
				goto fail;

			new_tlv->type = tlv_packet->type;

			/* Save TLV to make it accessible via sample fetch */
			memcpy(new_tlv->value, tlv.ptr, data_len);
			new_tlv->len = data_len;

			LIST_APPEND(&conn->tlv_list, &new_tlv->list);
		}


		/* Verify that the PROXYv2 header ends at a TLV boundary.
		 * This is can not be true, because the TLV parsing already
		 * verifies that a TLV does not exceed the total length and
		 * also that there is space for a TLV header.
		 */
		BUG_ON(tlv_offset != total_v2_len);

		/* unsupported protocol, keep local connection address */
		break;
	case 0x00: /* LOCAL command */
		/* keep local connection address for LOCAL */
		break;
	default:
		goto bad_header; /* not a supported command */
	}

	trash.data = total_v2_len;
	goto eat_header;

 eat_header:
	/* remove the PROXY line from the request. For this we re-read the
	 * exact line at once. If we don't get the exact same result, we
	 * fail.
	 */
	while (1) {
		ssize_t len2 = recv(conn->handle.fd, trash.area, trash.data, 0);

		if (len2 < 0 && errno == EINTR)
			continue;
		if (len2 != trash.data)
			goto recv_abort;
		break;
	}

	conn->flags &= ~flag;
	conn->flags |= CO_FL_RCVD_PROXY;
	return 1;

 not_ready:
	return 0;

 missing:
	/* Missing data. Since we're using MSG_PEEK, we can only poll again if
	 * we have not read anything. Otherwise we need to fail because we won't
	 * be able to poll anymore.
	 */
	conn->err_code = CO_ER_PRX_TRUNCATED;
	goto fail;

 bad_header:
	/* This is not a valid proxy protocol header */
	conn->err_code = CO_ER_PRX_BAD_HDR;
	goto fail;

 recv_abort:
	conn->err_code = CO_ER_PRX_ABORT;
	conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
	goto fail;

 fail:
	conn->flags |= CO_FL_ERROR;
	return 0;
}

/* This callback is used to send a valid PROXY protocol line to a socket being
 * established. It returns 0 if it fails in a fatal way or needs to poll to go
 * further, otherwise it returns non-zero and removes itself from the connection's
 * flags (the bit is provided in <flag> by the caller). It is designed to be
 * called by the connection handler and relies on it to commit polling changes.
 * Note that it can emit a PROXY line by relying on the other end's address
 * when the connection is attached to a stream connector, or by resolving the
 * local address otherwise (also called a LOCAL line).
 */
int conn_send_proxy(struct connection *conn, unsigned int flag)
{
	if (!conn_ctrl_ready(conn))
		goto out_error;

	/* If we have a PROXY line to send, we'll use this to validate the
	 * connection, in which case the connection is validated only once
	 * we've sent the whole proxy line. Otherwise we use connect().
	 */
	if (conn->send_proxy_ofs) {
		struct stconn *sc;
		int ret;

		/* If there is no mux attached to the connection, it means the
		 * connection context is a stream connector.
		 */
		sc = conn->mux ? conn_get_first_sc(conn) : conn->ctx;

		/* The target server expects a PROXY line to be sent first.
		 * If the send_proxy_ofs is negative, it corresponds to the
		 * offset to start sending from then end of the proxy string
		 * (which is recomputed every time since it's constant). If
		 * it is positive, it means we have to send from the start.
		 * We can only send a "normal" PROXY line when the connection
		 * is attached to a stream connector. Otherwise we can only
		 * send a LOCAL line (eg: for use with health checks).
		 */

		if (sc && sc_strm(sc)) {
			ret = make_proxy_line(trash.area, trash.size,
					      objt_server(conn->target),
					      sc_conn(sc_opposite(sc)),
					      __sc_strm(sc));
		}
		else {
			/* The target server expects a LOCAL line to be sent first. Retrieving
			 * local or remote addresses may fail until the connection is established.
			 */
			if (!conn_get_src(conn) || !conn_get_dst(conn))
				goto out_wait;

			ret = make_proxy_line(trash.area, trash.size,
					      objt_server(conn->target), conn,
					      NULL);
		}

		if (!ret)
			goto out_error;

		if (conn->send_proxy_ofs > 0)
			conn->send_proxy_ofs = -ret; /* first call */

		/* we have to send trash from (ret+sp for -sp bytes). If the
		 * data layer has a pending write, we'll also set MSG_MORE.
		 */
		ret = conn_ctrl_send(conn,
				     trash.area + ret + conn->send_proxy_ofs,
		                     -conn->send_proxy_ofs,
		                     (conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);

		if (ret < 0)
			goto out_error;

		conn->send_proxy_ofs += ret; /* becomes zero once complete */
		if (conn->send_proxy_ofs != 0)
			goto out_wait;

		/* OK we've sent the whole line, we're connected */
	}

	/* The connection is ready now, simply return and let the connection
	 * handler notify upper layers if needed.
	 */
	conn->flags &= ~CO_FL_WAIT_L4_CONN;
	conn->flags &= ~flag;
	return 1;

 out_error:
	/* Write error on the file descriptor */
	conn->flags |= CO_FL_ERROR;
	return 0;

 out_wait:
	return 0;
}

/* This handshake handler waits a NetScaler Client IP insertion header
 * at the beginning of the raw data stream. The header format is
 * described in doc/netscaler-client-ip-insertion-protocol.txt
 *
 * This line MUST be at the beginning of the buffer and MUST NOT be
 * fragmented.
 *
 * The header line is small and in all cases smaller than the smallest normal
 * TCP MSS. So it MUST always be delivered as one segment, which ensures we
 * can safely use MSG_PEEK and avoid buffering.
 *
 * Once the data is fetched, the values are set in the connection's address
 * fields, and data are removed from the socket's buffer. The function returns
 * zero if it needs to wait for more data or if it fails, or 1 if it completed
 * and removed itself.
 */
int conn_recv_netscaler_cip(struct connection *conn, int flag)
{
	struct session *sess = conn->owner;
	char *line;
	uint32_t hdr_len;
	uint8_t ip_ver;
	int ret;

	if (!conn_ctrl_ready(conn))
		goto fail;

	BUG_ON(conn->flags & CO_FL_FDLESS);

	if (!fd_recv_ready(conn->handle.fd))
		goto not_ready;

	while (1) {
		ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
		if (ret < 0) {
			if (errno == EINTR)
				continue;
			if (errno == EAGAIN || errno == EWOULDBLOCK) {
				fd_cant_recv(conn->handle.fd);
				goto not_ready;
			}
			goto recv_abort;
		}
		trash.data = ret;
		break;
	}

	conn->flags &= ~CO_FL_WAIT_L4_CONN;

	if (!trash.data) {
		/* client shutdown */
		conn->err_code = CO_ER_CIP_EMPTY;
		goto fail;
	}

	/* Fail if buffer length is not large enough to contain
	 * CIP magic, header length or
	 * CIP magic, CIP length, CIP type, header length */
	if (trash.data < 12)
		goto missing;

	line = trash.area;

	/* Decode a possible NetScaler Client IP request, fail early if
	 * it does not match */
	if (ntohl(read_u32(line)) != __objt_listener(conn->target)->bind_conf->ns_cip_magic)
		goto bad_magic;

	/* Legacy CIP protocol */
	if ((trash.area[8] & 0xD0) == 0x40) {
		hdr_len = ntohl(read_u32((line+4)));
		line += 8;
	}
	/* Standard CIP protocol */
	else if (trash.area[8] == 0x00) {
		hdr_len = ntohs(read_u32((line+10)));
		line += 12;
	}
	/* Unknown CIP protocol */
	else {
		conn->err_code = CO_ER_CIP_BAD_PROTO;
		goto fail;
	}

	/* Fail if buffer length is not large enough to contain
	 * a minimal IP header */
	if (trash.data < 20)
		goto missing;

	/* Get IP version from the first four bits */
	ip_ver = (*line & 0xf0) >> 4;

	if (ip_ver == 4) {
		struct ip *hdr_ip4;
		struct my_tcphdr *hdr_tcp;

		hdr_ip4 = (struct ip *)line;

		if (trash.data < 40 || trash.data < hdr_len) {
			/* Fail if buffer length is not large enough to contain
			 * IPv4 header, TCP header */
			goto missing;
		}
		else if (hdr_ip4->ip_p != IPPROTO_TCP) {
			/* The protocol does not include a TCP header */
			conn->err_code = CO_ER_CIP_BAD_PROTO;
			goto fail;
		}

		hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));

		if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
			goto fail;

		/* update the session's addresses and mark them set */
		((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
		((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
		((struct sockaddr_in *)sess->src)->sin_port = hdr_tcp->source;

		((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
		((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
		((struct sockaddr_in *)sess->dst)->sin_port = hdr_tcp->dest;
	}
	else if (ip_ver == 6) {
		struct ip6_hdr *hdr_ip6;
		struct my_tcphdr *hdr_tcp;

		hdr_ip6 = (struct ip6_hdr *)line;

		if (trash.data < 60 || trash.data < hdr_len) {
			/* Fail if buffer length is not large enough to contain
			 * IPv6 header, TCP header */
			goto missing;
		}
		else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
			/* The protocol does not include a TCP header */
			conn->err_code = CO_ER_CIP_BAD_PROTO;
			goto fail;
		}

		hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));

		if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
			goto fail;

		/* update the session's addresses and mark them set */
		((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
		((struct sockaddr_in6 *)sess->src)->sin6_addr = hdr_ip6->ip6_src;
		((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_tcp->source;

		((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
		((struct sockaddr_in6 *)sess->dst)->sin6_addr = hdr_ip6->ip6_dst;
		((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_tcp->dest;
	}
	else {
		/* The protocol does not match something known (IPv4/IPv6) */
		conn->err_code = CO_ER_CIP_BAD_PROTO;
		goto fail;
	}

	line += hdr_len;
	trash.data = line - trash.area;

	/* remove the NetScaler Client IP header from the request. For this
	 * we re-read the exact line at once. If we don't get the exact same
	 * result, we fail.
	 */
	while (1) {
		int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
		if (len2 < 0 && errno == EINTR)
			continue;
		if (len2 != trash.data)
			goto recv_abort;
		break;
	}

	conn->flags &= ~flag;
	return 1;

 not_ready:
	return 0;

 missing:
	/* Missing data. Since we're using MSG_PEEK, we can only poll again if
	 * we have not read anything. Otherwise we need to fail because we won't
	 * be able to poll anymore.
	 */
	conn->err_code = CO_ER_CIP_TRUNCATED;
	goto fail;

 bad_magic:
	conn->err_code = CO_ER_CIP_BAD_MAGIC;
	goto fail;

 recv_abort:
	conn->err_code = CO_ER_CIP_ABORT;
	conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
	goto fail;

 fail:
	conn->flags |= CO_FL_ERROR;
	return 0;
}


int conn_send_socks4_proxy_request(struct connection *conn)
{
	struct socks4_request req_line;

	if (!conn_ctrl_ready(conn))
		goto out_error;

	if (!conn_get_dst(conn))
		goto out_error;

	req_line.version = 0x04;
	req_line.command = 0x01;
	req_line.port    = get_net_port(conn->dst);
	req_line.ip      = is_inet_addr(conn->dst);
	memcpy(req_line.user_id, "HAProxy\0", 8);

	if (conn->send_proxy_ofs > 0) {
		/*
		 * This is the first call to send the request
		 */
		conn->send_proxy_ofs = -(int)sizeof(req_line);
	}

	if (conn->send_proxy_ofs < 0) {
		int ret = 0;

		/* we are sending the socks4_req_line here. If the data layer
		 * has a pending write, we'll also set MSG_MORE.
		 */
		ret = conn_ctrl_send(
				conn,
				((char *)(&req_line)) + (sizeof(req_line)+conn->send_proxy_ofs),
				-conn->send_proxy_ofs,
				(conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);

		DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Before send remain is [%d], sent [%d]\n",
			conn_fd(conn), -conn->send_proxy_ofs, ret);

		if (ret < 0) {
			goto out_error;
		}

		conn->send_proxy_ofs += ret; /* becomes zero once complete */
		if (conn->send_proxy_ofs != 0) {
			goto out_wait;
		}
	}

	/* OK we've the whole request sent */
	conn->flags &= ~CO_FL_SOCKS4_SEND;

	/* The connection is ready now, simply return and let the connection
	 * handler notify upper layers if needed.
	 */
	conn->flags &= ~CO_FL_WAIT_L4_CONN;

	if (conn->flags & CO_FL_SEND_PROXY) {
		/*
		 * Get the send_proxy_ofs ready for the send_proxy due to we are
		 * reusing the "send_proxy_ofs", and SOCKS4 handshake should be done
		 * before sending PROXY Protocol.
		 */
		conn->send_proxy_ofs = 1;
	}
	return 1;

 out_error:
	/* Write error on the file descriptor */
	conn->flags |= CO_FL_ERROR;
	if (conn->err_code == CO_ER_NONE) {
		conn->err_code = CO_ER_SOCKS4_SEND;
	}
	return 0;

 out_wait:
	return 0;
}

int conn_recv_socks4_proxy_response(struct connection *conn)
{
	char line[SOCKS4_HS_RSP_LEN];
	int ret;

	if (!conn_ctrl_ready(conn))
		goto fail;

	BUG_ON(conn->flags & CO_FL_FDLESS);

	if (!fd_recv_ready(conn->handle.fd))
		goto not_ready;

	while (1) {
		/* SOCKS4 Proxy will response with 8 bytes, 0x00 | 0x5A | 0x00 0x00 | 0x00 0x00 0x00 0x00
		 * Try to peek into it, before all 8 bytes ready.
		 */
		ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, MSG_PEEK);

		if (ret == 0) {
			/* the socket has been closed or shutdown for send */
			DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d], looks like the socket has been closed or shutdown for send\n",
					conn->handle.fd, ret, errno);
			if (conn->err_code == CO_ER_NONE) {
				conn->err_code = CO_ER_SOCKS4_RECV;
			}
			goto fail;
		}

		if (ret > 0) {
			if (ret == SOCKS4_HS_RSP_LEN) {
				DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received 8 bytes, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
						conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
			}else{
				DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], first byte is [%02X], last bye is [%02X]\n", conn->handle.fd, ret, line[0], line[ret-1]);
			}
		} else {
			DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d]\n", conn->handle.fd, ret, errno);
		}

		if (ret < 0) {
			if (errno == EINTR) {
				continue;
			}
			if (errno == EAGAIN || errno == EWOULDBLOCK) {
				fd_cant_recv(conn->handle.fd);
				goto not_ready;
			}
			goto recv_abort;
		}
		break;
	}

	conn->flags &= ~CO_FL_WAIT_L4_CONN;

	if (ret < SOCKS4_HS_RSP_LEN) {
		/* Missing data. Since we're using MSG_PEEK, we can only poll again if
		 * we are not able to read enough data.
		 */
		goto not_ready;
	}

	/*
	 * Base on the SOCSK4 protocol:
	 *
	 *			+----+----+----+----+----+----+----+----+
	 *			| VN | CD | DSTPORT |      DSTIP        |
	 *			+----+----+----+----+----+----+----+----+
	 *	# of bytes:	   1    1      2              4
	 *	VN is the version of the reply code and should be 0. CD is the result
	 *	code with one of the following values:
	 *	90: request granted
	 *	91: request rejected or failed
	 *	92: request rejected because SOCKS server cannot connect to identd on the client
	 *	93: request rejected because the client program and identd report different user-ids
	 *	The remaining fields are ignored.
	 */
	if (line[1] != 90) {
		conn->flags &= ~CO_FL_SOCKS4_RECV;

		DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: FAIL, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
				conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
		if (conn->err_code == CO_ER_NONE) {
			conn->err_code = CO_ER_SOCKS4_DENY;
		}
		goto fail;
	}

	/* remove the 8 bytes response from the stream */
	while (1) {
		ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, 0);
		if (ret < 0 && errno == EINTR) {
			continue;
		}
		if (ret != SOCKS4_HS_RSP_LEN) {
			if (conn->err_code == CO_ER_NONE) {
				conn->err_code = CO_ER_SOCKS4_RECV;
			}
			goto fail;
		}
		break;
	}

	conn->flags &= ~CO_FL_SOCKS4_RECV;
	return 1;

 not_ready:
	return 0;

 recv_abort:
	if (conn->err_code == CO_ER_NONE) {
		conn->err_code = CO_ER_SOCKS4_ABORT;
	}
	conn->flags |= (CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH);
	goto fail;

 fail:
	conn->flags |= CO_FL_ERROR;
	return 0;
}

/* registers proto mux list <list>. Modifies the list element! */
void register_mux_proto(struct mux_proto_list *list)
{
	LIST_APPEND(&mux_proto_list.list, &list->list);
}

/* Lists the known proto mux on <out>. This function is used by "haproxy -vv"
 * and is suitable for early boot just after the "REGISTER" stage because it
 * doesn't depend on anything to be already allocated.
 */
void list_mux_proto(FILE *out)
{
	struct mux_proto_list *item;
	struct ist proto;
	char *mode, *side;
	int done;

	fprintf(out, "Available multiplexer protocols :\n"
		"(protocols marked as <default> cannot be specified using 'proto' keyword)\n");
	list_for_each_entry(item, &mux_proto_list.list, list) {
		proto = item->token;

		if (item->mode == PROTO_MODE_ANY)
			mode = "TCP|HTTP";
		else if (item->mode == PROTO_MODE_TCP)
			mode = "TCP";
		else if (item->mode == PROTO_MODE_HTTP)
			mode = "HTTP";
		else
			mode = "NONE";

		if (item->side == PROTO_SIDE_BOTH)
			side = "FE|BE";
		else if (item->side == PROTO_SIDE_FE)
			side = "FE";
		else if (item->side == PROTO_SIDE_BE)
			side = "BE";
		else
			side = "NONE";

		fprintf(out, " %10s : mode=%-5s side=%-6s mux=%-5s flags=",
			(proto.len ? proto.ptr : "<default>"), mode, side, item->mux->name);

		done = 0;

		/* note: the block below could be simplified using macros but for only
		 * 4 flags it's not worth it.
		 */
		if (item->mux->flags & MX_FL_HTX)
			done |= fprintf(out, "%sHTX", done ? "|" : "");

		if (item->mux->flags & MX_FL_HOL_RISK)
			done |= fprintf(out, "%sHOL_RISK", done ? "|" : "");

		if (item->mux->flags & MX_FL_NO_UPG)
			done |= fprintf(out, "%sNO_UPG", done ? "|" : "");

		if (item->mux->flags & MX_FL_FRAMED)
			done |= fprintf(out, "%sFRAMED", done ? "|" : "");

		fprintf(out, "\n");
	}
}

/* Makes a PROXY protocol line from the two addresses. The output is sent to
 * buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
 * It returns the number of bytes composing this line (including the trailing
 * LF), or zero in case of failure (eg: not enough space). It supports TCP4,
 * TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
 * emitted as well.
 */
static int make_proxy_line_v1(char *buf, int buf_len, const struct sockaddr_storage *src, const struct sockaddr_storage *dst)
{
	int ret = 0;
	char * protocol;
	char src_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
	char dst_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
	in_port_t src_port;
	in_port_t dst_port;

	if (   !src
	    || !dst
	    || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
	    || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
		/* unknown family combination */
		ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
		if (ret >= buf_len)
			return 0;

		return ret;
	}

	/* IPv4 for both src and dst */
	if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
		protocol = "TCP4";
		if (!inet_ntop(AF_INET, &((struct sockaddr_in *)src)->sin_addr, src_str, sizeof(src_str)))
			return 0;
		src_port = ((struct sockaddr_in *)src)->sin_port;
		if (!inet_ntop(AF_INET, &((struct sockaddr_in *)dst)->sin_addr, dst_str, sizeof(dst_str)))
			return 0;
		dst_port = ((struct sockaddr_in *)dst)->sin_port;
	}
	/* IPv6 for at least one of src and dst */
	else {
		struct in6_addr tmp;

		protocol = "TCP6";

		if (src->ss_family == AF_INET) {
			/* Convert src to IPv6 */
			v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
			src_port = ((struct sockaddr_in *)src)->sin_port;
		}
		else {
			tmp = ((struct sockaddr_in6 *)src)->sin6_addr;
			src_port = ((struct sockaddr_in6 *)src)->sin6_port;
		}

		if (!inet_ntop(AF_INET6, &tmp, src_str, sizeof(src_str)))
			return 0;

		if (dst->ss_family == AF_INET) {
			/* Convert dst to IPv6 */
			v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
			dst_port = ((struct sockaddr_in *)dst)->sin_port;
		}
		else {
			tmp = ((struct sockaddr_in6 *)dst)->sin6_addr;
			dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
		}

		if (!inet_ntop(AF_INET6, &tmp, dst_str, sizeof(dst_str)))
			return 0;
	}

	ret = snprintf(buf, buf_len, "PROXY %s %s %s %u %u\r\n", protocol, src_str, dst_str, ntohs(src_port), ntohs(dst_port));
	if (ret >= buf_len)
		return 0;

	return ret;
}

static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
{
	struct tlv *tlv;

	if (!dest || (length + sizeof(*tlv) > dest_len))
		return 0;

	tlv = (struct tlv *)dest;

	tlv->type = type;
	tlv->length_hi = length >> 8;
	tlv->length_lo = length & 0x00ff;
	memcpy(tlv->value, value, length);
	return length + sizeof(*tlv);
}

/* Note: <remote> is explicitly allowed to be NULL */
static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
{
	const char pp2_signature[] = PP2_SIGNATURE;
	void *tlv_crc32c_p = NULL;
	int ret = 0;
	struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
	struct sockaddr_storage null_addr = { .ss_family = 0 };
	struct srv_pp_tlv_list *srv_tlv = NULL;
	const struct sockaddr_storage *src = &null_addr;
	const struct sockaddr_storage *dst = &null_addr;
	const char *value = "";
	int value_len = 0;

	if (buf_len < PP2_HEADER_LEN)
		return 0;
	memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);

	if (strm) {
		src = sc_src(strm->scf);
		dst = sc_dst(strm->scf);
	}
	else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
		src = conn_src(remote);
		dst = conn_dst(remote);
	}

	/* At least one of src or dst is not of AF_INET or AF_INET6 */
	if (  !src
	   || !dst
	   || (!pp2_never_send_local && conn_is_back(remote)) // locally initiated connection
	   || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
	   || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
		if (buf_len < PP2_HDR_LEN_UNSPEC)
			return 0;
		hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
		hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
		ret = PP2_HDR_LEN_UNSPEC;
	}
	else {
		hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
		/* IPv4 for both src and dst */
		if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
			if (buf_len < PP2_HDR_LEN_INET)
				return 0;
			hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
			hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
			hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
			hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
			hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
			ret = PP2_HDR_LEN_INET;
		}
		/* IPv6 for at least one of src and dst */
		else {
			struct in6_addr tmp;

			if (buf_len < PP2_HDR_LEN_INET6)
				return 0;
			hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
			if (src->ss_family == AF_INET) {
				v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
				memcpy(hdr->addr.ip6.src_addr, &tmp, 16);
				hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
			}
			else {
				memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
				hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
			}
			if (dst->ss_family == AF_INET) {
				v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
				memcpy(hdr->addr.ip6.dst_addr, &tmp, 16);
				hdr->addr.ip6.dst_port = ((struct sockaddr_in *)dst)->sin_port;
			}
			else {
				memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
				hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
			}

			ret = PP2_HDR_LEN_INET6;
		}
	}

	if (strm) {
		struct buffer *replace = NULL;

		list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) {
			replace = NULL;

			/* Users will always need to provide a value, in case of forwarding, they should use fc_pp_tlv.
			 * for generic types. Otherwise, we will send an empty TLV.
			 */
			if (!LIST_ISEMPTY(&srv_tlv->fmt)) {
				replace = alloc_trash_chunk();
				if (unlikely(!replace))
					return 0;

				replace->data = build_logline(strm, replace->area, replace->size, &srv_tlv->fmt);

				if (unlikely((buf_len - ret) < sizeof(struct tlv))) {
					free_trash_chunk(replace);
					return 0;
				}
				ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, replace->data, replace->area);
				free_trash_chunk(replace);
			}
			else {
				/* Create empty TLV as no value was specified */
				ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, 0, NULL);
			}
		}
	}

	/* Handle predefined TLVs as usual */
	if (srv->pp_opts & SRV_PP_V2_CRC32C) {
		uint32_t zero_crc32c = 0;

		if ((buf_len - ret) < sizeof(struct tlv))
			return 0;
		tlv_crc32c_p = (void *)((struct tlv *)&buf[ret])->value;
		ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_CRC32C, sizeof(zero_crc32c), (const char *)&zero_crc32c);
	}

	if (remote && conn_get_alpn(remote, &value, &value_len)) {
		if ((buf_len - ret) < sizeof(struct tlv))
			return 0;
		ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_ALPN, value_len, value);
	}

	if (srv->pp_opts & SRV_PP_V2_AUTHORITY) {
		struct conn_tlv_list *tlv = conn_get_tlv(remote, PP2_TYPE_AUTHORITY);

		value = NULL;
		if (tlv) {
			value_len = tlv->len;
			value = tlv->value;
		}
#ifdef USE_OPENSSL
		else {
			if ((value = ssl_sock_get_sni(remote)))
				value_len = strlen(value);
		}
#endif
		if (value) {
			if ((buf_len - ret) < sizeof(struct tlv))
				return 0;
			ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_AUTHORITY, value_len, value);
		}
	}

	if (strm && (srv->pp_opts & SRV_PP_V2_UNIQUE_ID)) {
		struct session* sess = strm_sess(strm);
		struct ist unique_id = stream_generate_unique_id(strm, &sess->fe->format_unique_id);

		value = unique_id.ptr;
		value_len = unique_id.len;

		if (value_len >= 0) {
			if ((buf_len - ret) < sizeof(struct tlv))
				return 0;
			ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_UNIQUE_ID, value_len, value);
		}
	}

#ifdef USE_OPENSSL
	if (srv->pp_opts & SRV_PP_V2_SSL) {
		struct tlv_ssl *tlv;
		int ssl_tlv_len = 0;

		if ((buf_len - ret) < sizeof(struct tlv_ssl))
			return 0;
		tlv = (struct tlv_ssl *)&buf[ret];
		memset(tlv, 0, sizeof(struct tlv_ssl));
		ssl_tlv_len += sizeof(struct tlv_ssl);
		tlv->tlv.type = PP2_TYPE_SSL;
		if (conn_is_ssl(remote)) {
			tlv->client |= PP2_CLIENT_SSL;
			value = ssl_sock_get_proto_version(remote);
			if (value) {
				ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
			}
			if (ssl_sock_get_cert_used_sess(remote)) {
				tlv->client |= PP2_CLIENT_CERT_SESS;
				tlv->verify = htonl(ssl_sock_get_verify_result(remote));
				if (ssl_sock_get_cert_used_conn(remote))
					tlv->client |= PP2_CLIENT_CERT_CONN;
			}
			if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
				struct buffer *cn_trash = get_trash_chunk();
				if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
					ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN,
								cn_trash->data,
								cn_trash->area);
				}
			}
			if (srv->pp_opts & SRV_PP_V2_SSL_KEY_ALG) {
				struct buffer *pkey_trash = get_trash_chunk();
				if (ssl_sock_get_pkey_algo(remote, pkey_trash) > 0) {
					ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_KEY_ALG,
								pkey_trash->data,
								pkey_trash->area);
				}
			}
			if (srv->pp_opts & SRV_PP_V2_SSL_SIG_ALG) {
				value = ssl_sock_get_cert_sig(remote);
				if (value) {
					ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_SIG_ALG, strlen(value), value);
				}
			}
			if (srv->pp_opts & SRV_PP_V2_SSL_CIPHER) {
				value = ssl_sock_get_cipher_name(remote);
				if (value) {
					ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CIPHER, strlen(value), value);
				}
			}
		}
		tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
		tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
		ret += ssl_tlv_len;
	}
#endif

#ifdef USE_NS
	if (remote && (remote->proxy_netns)) {
		if ((buf_len - ret) < sizeof(struct tlv))
			return 0;
		ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
	}
#endif

	hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));

	if (tlv_crc32c_p) {
		write_u32(tlv_crc32c_p, htonl(hash_crc32c(buf, ret)));
	}

	return ret;
}

/* Note: <remote> is explicitly allowed to be NULL */
int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
{
	int ret = 0;

	if (srv && (srv->pp_opts & SRV_PP_V2)) {
		ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm);
	}
	else {
		const struct sockaddr_storage *src = NULL;
		const struct sockaddr_storage *dst = NULL;

		if (strm) {
			src = sc_src(strm->scf);
			dst = sc_dst(strm->scf);
		}
		else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
			src = conn_src(remote);
			dst = conn_dst(remote);
		}

		if (src && dst)
			ret = make_proxy_line_v1(buf, buf_len, src, dst);
		else
			ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
	}

	return ret;
}

/* returns 0 on success */
static int cfg_parse_pp2_never_send_local(char **args, int section_type, struct proxy *curpx,
                                          const struct proxy *defpx, const char *file, int line,
                                          char **err)
{
	if (too_many_args(0, args, err, NULL))
		return -1;
	pp2_never_send_local = 1;
	return 0;
}

/* extracts some info from the connection and appends them to buffer <buf>. The
 * connection's pointer, its direction, target (fe/be/srv), xprt/ctrl, source
 * when set, destination when set, are printed in a compact human-readable format
 * fitting on a single line. This is handy to complete traces or debug output.
 * It is permitted to pass a NULL conn pointer. The number of characters emitted
 * is returned. A prefix <pfx> might be prepended before the first field if not
 * NULL.
 */
int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx)
{
	const struct listener *li;
	const struct server   *sv;
	const struct proxy    *px;
	char addr[40];
	int old_len = buf->data;

	if (!conn)
		return 0;

	chunk_appendf(buf, "%sconn=%p(%s)", pfx ? pfx : "", conn, conn_is_back(conn) ? "OUT" : "IN");

	if ((li = objt_listener(conn->target)))
		chunk_appendf(buf, " fe=%s", li->bind_conf->frontend->id);
	else if ((sv = objt_server(conn->target)))
		chunk_appendf(buf, " sv=%s/%s", sv->proxy->id, sv->id);
	else if ((px = objt_proxy(conn->target)))
		chunk_appendf(buf, " be=%s", px->id);

	chunk_appendf(buf, " %s/%s", conn_get_xprt_name(conn), conn_get_ctrl_name(conn));

	if (conn->src && addr_to_str(conn->src, addr, sizeof(addr)))
		chunk_appendf(buf, " src=%s:%d", addr, get_host_port(conn->src));

	if (conn->dst && addr_to_str(conn->dst, addr, sizeof(addr)))
		chunk_appendf(buf, " dst=%s:%d", addr, get_host_port(conn->dst));

	return buf->data - old_len;
}

/* return the number of glitches experienced on the mux connection. */
static int
smp_fetch_fc_glitches(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct connection *conn = NULL;
	int ret;

	if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
                conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
        else
                conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
			smp->strm ? sc_conn(smp->strm->scb) : NULL;

	/* No connection or a connection with an unsupported mux */
	if (!conn || (conn->mux && !conn->mux->ctl))
		return 0;

	/* Mux not installed yet, this may change */
	if (!conn->mux) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	ret = conn->mux->ctl(conn, MUX_CTL_GET_GLITCHES, NULL);
	if (ret < 0) {
		/* not supported by the mux */
		return 0;
	}

	smp->data.type = SMP_T_SINT;
	smp->data.u.sint = ret;
	return 1;
}

/* return the major HTTP version as 1 or 2 depending on how the request arrived
 * before being processed.
 *
 * WARNING: Should be updated if a new major HTTP version is added.
 */
static int
smp_fetch_fc_http_major(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct connection *conn = NULL;
	const char *mux_name = NULL;

	if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
                conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
        else
                conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
			smp->strm ? sc_conn(smp->strm->scb) : NULL;

	/* No connection or a connection with a RAW muxx */
	if (!conn || (conn->mux && !(conn->mux->flags & MX_FL_HTX)))
		return 0;

	/* No mux install, this may change */
	if (!conn->mux) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	mux_name = conn_get_mux_name(conn);

	smp->data.type = SMP_T_SINT;
	if (strcmp(mux_name, "QUIC") == 0)
		smp->data.u.sint = 3;
	else if (strcmp(mux_name, "H2") == 0)
		smp->data.u.sint = 2;
	else
		smp->data.u.sint = 1;

	return 1;
}

/* fetch if the received connection used a PROXY protocol header */
int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct connection *conn;

	conn = objt_conn(smp->sess->origin);
	if (!conn)
		return 0;

	if (conn->flags & CO_FL_WAIT_XPRT) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	smp->flags = 0;
	smp->data.type = SMP_T_BOOL;
	smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;

	return 1;
}

/*
 * This function checks the TLV type converter configuration.
 * It expects the corresponding TLV type as a string representing the number
 * or a constant. args[0] will be turned into the numerical value of the
 * TLV type string.
 */
static int smp_check_tlv_type(struct arg *args, char **err)
{
	int type;
	char *endp;
	struct ist input = ist2(args[0].data.str.area, args[0].data.str.data);

	if (isteqi(input, ist("ALPN")) != 0)
		type = PP2_TYPE_ALPN;
	else if (isteqi(input, ist("AUTHORITY")) != 0)
		type = PP2_TYPE_AUTHORITY;
	else if (isteqi(input, ist("CRC32C")) != 0)
		type = PP2_TYPE_CRC32C;
	else if (isteqi(input, ist("NOOP")) != 0)
		type = PP2_TYPE_NOOP;
	else if (isteqi(input, ist("UNIQUE_ID")) != 0)
		type = PP2_TYPE_UNIQUE_ID;
	else if (isteqi(input, ist("SSL")) != 0)
		type = PP2_TYPE_SSL;
	else if (isteqi(input, ist("SSL_VERSION")) != 0)
		type = PP2_SUBTYPE_SSL_VERSION;
	else if (isteqi(input, ist("SSL_CN")) != 0)
		type = PP2_SUBTYPE_SSL_CN;
	else if (isteqi(input, ist("SSL_CIPHER")) != 0)
		type = PP2_SUBTYPE_SSL_CIPHER;
	else if (isteqi(input, ist("SSL_SIG_ALG")) != 0)
		type = PP2_SUBTYPE_SSL_SIG_ALG;
	else if (isteqi(input, ist("SSL_KEY_ALG")) != 0)
		type = PP2_SUBTYPE_SSL_KEY_ALG;
	else if (isteqi(input, ist("NETNS")) != 0)
		type = PP2_TYPE_NETNS;
	else {
		type = strtoul(input.ptr, &endp, 0);
		if (endp && *endp != '\0') {
			memprintf(err, "Could not convert type '%s'", input.ptr);
			return 0;
		}
	}

	if (type < 0 || type > 255) {
		memprintf(err, "Invalid TLV Type '%s'", input.ptr);
		return 0;
	}

	chunk_destroy(&args[0].data.str);
	args[0].type = ARGT_SINT;
	args[0].data.sint = type;

	return 1;
}

/* fetch an arbitrary TLV from a PROXY protocol v2 header */
int smp_fetch_fc_pp_tlv(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	int idx;
	struct connection *conn = NULL;
	struct conn_tlv_list *conn_tlv = NULL;

	conn = objt_conn(smp->sess->origin);
	if (!conn)
		return 0;

	if (conn->flags & CO_FL_WAIT_XPRT) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	if (args[0].type != ARGT_SINT)
		return 0;

	idx = args[0].data.sint;
	conn_tlv = smp->ctx.p ? smp->ctx.p : LIST_ELEM(conn->tlv_list.n, struct conn_tlv_list *, list);
	list_for_each_entry_from(conn_tlv, &conn->tlv_list, list) {
		if (conn_tlv->type == idx) {
			smp->flags |= SMP_F_NOT_LAST;
			smp->data.type = SMP_T_STR;
			smp->data.u.str.area = conn_tlv->value;
			smp->data.u.str.data = conn_tlv->len;
			smp->ctx.p = conn_tlv;

			return 1;
		}
	}

	smp->flags &= ~SMP_F_NOT_LAST;

	return 0;
}

/* fetch the authority TLV from a PROXY protocol header */
int smp_fetch_fc_pp_authority(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct arg tlv_arg;
	int ret;

	set_tlv_arg(PP2_TYPE_AUTHORITY, &tlv_arg);
	ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
	smp->flags &= ~SMP_F_NOT_LAST; // return only the first authority
	return ret;
}

/* fetch the unique ID TLV from a PROXY protocol header */
int smp_fetch_fc_pp_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct arg tlv_arg;
	int ret;

	set_tlv_arg(PP2_TYPE_UNIQUE_ID, &tlv_arg);
	ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
	smp->flags &= ~SMP_F_NOT_LAST; // return only the first unique ID
	return ret;
}

/* fetch the error code of a connection */
int smp_fetch_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct connection *conn;

	if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
                conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
        else
                conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
			smp->strm ? sc_conn(smp->strm->scb) : NULL;

	if (!conn)
		return 0;

	if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	smp->flags = 0;
	smp->data.type = SMP_T_SINT;
	smp->data.u.sint = (unsigned long long int)conn->err_code;

	return 1;
}

/* fetch a string representation of the error code of a connection */
int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
	struct connection *conn;
	const char *err_code_str;

	if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
                conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
        else
                conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
			smp->strm ? sc_conn(smp->strm->scb) : NULL;

	if (!conn)
		return 0;

	if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
		smp->flags |= SMP_F_MAY_CHANGE;
		return 0;
	}

	err_code_str = conn_err_code_str(conn);

	if (!err_code_str)
		return 0;

	smp->flags = 0;
	smp->data.type = SMP_T_STR;
	smp->data.u.str.area = (char*)err_code_str;
	smp->data.u.str.data = strlen(err_code_str);

	return 1;
}

/* Note: must not be declared <const> as its list will be overwritten.
 * Note: fetches that may return multiple types should be declared using the
 * appropriate pseudo-type. If not available it must be declared as the lowest
 * common denominator, the type that can be casted into all other ones.
 */
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
	{ "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
	{ "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
	{ "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
	{ "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
	{ "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
	{ "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
	{ "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
	{ "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
	{ "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
	{ "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
	{ "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
	{ "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L4CLI },
	{ /* END */ },
}};

INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);

static struct cfg_kw_list cfg_kws = {ILH, {
	{ CFG_GLOBAL, "pp2-never-send-local", cfg_parse_pp2_never_send_local },
	{ /* END */ },
}};

INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);

/* Generate the hash of a connection with params as input
 * Each non-null field of params is taken into account for the hash calcul.
 */
uint64_t conn_hash_prehash(char *buf, size_t size)
{
	return XXH64(buf, size, 0);
}

/* Computes <data> hash into <hash>. In the same time, <flags>
 * are updated with <type> for the hash header.
 */
static void conn_hash_update(XXH64_state_t *hash,
                             const void *data, size_t size,
                             enum conn_hash_params_t *flags,
                             enum conn_hash_params_t type)
{
	XXH64_update(hash, data, size);
	*flags |= type;
}

static uint64_t conn_hash_digest(XXH64_state_t *hash,
                                 enum conn_hash_params_t flags)
{
	const uint64_t flags_u64 = (uint64_t)flags;
	const uint64_t f_hash = XXH64_digest(hash);

	return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(f_hash);
}

/* private function to handle sockaddr as input for connection hash */
static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss,
                                         XXH64_state_t *hash,
                                         enum conn_hash_params_t *hash_flags,
                                         enum conn_hash_params_t param_type_addr,
                                         enum conn_hash_params_t param_type_port)
{
	struct sockaddr_in *addr;
	struct sockaddr_in6 *addr6;

	switch (ss->ss_family) {
	case AF_INET:
		addr = (struct sockaddr_in *)ss;

		conn_hash_update(hash,
		                 &addr->sin_addr, sizeof(addr->sin_addr),
		                 hash_flags, param_type_addr);

		if (addr->sin_port) {
			conn_hash_update(hash,
			                 &addr->sin_port, sizeof(addr->sin_port),
			                 hash_flags, param_type_port);
		}

		break;

	case AF_INET6:
		addr6 = (struct sockaddr_in6 *)ss;

		conn_hash_update(hash,
		                 &addr6->sin6_addr, sizeof(addr6->sin6_addr),
		                 hash_flags, param_type_addr);

		if (addr6->sin6_port) {
			conn_hash_update(hash,
			                 &addr6->sin6_port, sizeof(addr6->sin6_port),
			                 hash_flags, param_type_port);
		}

		break;
	}
}

uint64_t conn_calculate_hash(const struct conn_hash_params *params)
{
	enum conn_hash_params_t hash_flags = 0;
	XXH64_state_t hash;

	XXH64_reset(&hash, 0);

	conn_hash_update(&hash, &params->target, sizeof(params->target), &hash_flags, 0);

	if (params->sni_prehash) {
		conn_hash_update(&hash,
		                 &params->sni_prehash, sizeof(params->sni_prehash),
		                 &hash_flags, CONN_HASH_PARAMS_TYPE_SNI);
	}

	if (params->dst_addr) {
		conn_calculate_hash_sockaddr(params->dst_addr,
		                             &hash, &hash_flags,
		                             CONN_HASH_PARAMS_TYPE_DST_ADDR,
		                             CONN_HASH_PARAMS_TYPE_DST_PORT);
	}

	if (params->src_addr) {
		conn_calculate_hash_sockaddr(params->src_addr,
		                             &hash, &hash_flags,
		                             CONN_HASH_PARAMS_TYPE_SRC_ADDR,
		                             CONN_HASH_PARAMS_TYPE_SRC_PORT);
	}

	if (params->proxy_prehash) {
		conn_hash_update(&hash,
		                 &params->proxy_prehash, sizeof(params->proxy_prehash),
		                 &hash_flags, CONN_HASH_PARAMS_TYPE_PROXY);
	}

	if (params->mark_tos_prehash) {
		conn_hash_update(&hash,
		                 &params->mark_tos_prehash, sizeof(params->mark_tos_prehash),
		                 &hash_flags, CONN_HASH_PARAMS_TYPE_MARK_TOS);
	}

	return conn_hash_digest(&hash, hash_flags);
}

/* Reverse a <conn> connection instance. This effectively moves the connection
 * from frontend to backend side or vice-versa depending on its initial status.
 *
 * For active reversal, 'reverse' member points to the listener used as the new
 * connection target. Once transition is completed, the connection needs to be
 * accepted on the listener to instantiate its parent session before using
 * streams.
 *
 * For passive reversal, 'reverse' member points to the server used as the new
 * connection target. Once transition is completed, the connection appears as a
 * normal backend connection.
 *
 * Returns 0 on success else non-zero.
 */
int conn_reverse(struct connection *conn)
{
	struct conn_hash_params hash_params;
	int64_t hash = 0;
	struct session *sess = conn->owner;

	if (!conn_is_back(conn)) {
		/* srv must have been set by a previous 'attach-srv' rule. */
		struct server *srv = objt_server(conn->reverse.target);
		BUG_ON(!srv);

		if (conn_backend_init(conn))
			return 1;

		/* Initialize hash value for usage as idle conns. */
		memset(&hash_params, 0, sizeof(hash_params));
		hash_params.target = srv;

		if (b_data(&conn->reverse.name)) {
			/* data cannot wrap else prehash usage is incorrect */
			BUG_ON(b_data(&conn->reverse.name) != b_contig_data(&conn->reverse.name, 0));

			hash_params.sni_prehash =
			  conn_hash_prehash(b_head(&conn->reverse.name),
			                    b_data(&conn->reverse.name));
		}

		hash = conn_calculate_hash(&hash_params);
		conn->hash_node->node.key = hash;

		conn->target = &srv->obj_type;
		srv_use_conn(srv, conn);

		/* Free the session after detaching the connection from it. */
		session_unown_conn(sess, conn);
		sess->origin = NULL;
		session_free(sess);
		conn_set_owner(conn, NULL, NULL);

		conn->flags |= CO_FL_REVERSED;
	}
	else {
		/* Wake up receiver to proceed to connection accept. */
		struct listener *l = __objt_listener(conn->reverse.target);

		conn_backend_deinit(conn);

		conn->target = &l->obj_type;
		conn->flags |= CO_FL_ACT_REVERSING;
		task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
	}

	/* Invert source and destination addresses if already set. */
	SWAP(conn->src, conn->dst);

	conn->reverse.target = NULL;
	ha_free(&conn->reverse.name.area);
	conn->reverse.name = BUF_NULL;

	return 0;
}

/* Handler of the task of mux_stopping_data.
 * Called on soft-stop.
 */
static struct task *mux_stopping_process(struct task *t, void *ctx, unsigned int state)
{
	struct connection *conn, *back;

	list_for_each_entry_safe(conn, back, &mux_stopping_data[tid].list, stopping_list) {
		if (conn->mux && conn->mux->wake)
			conn->mux->wake(conn);
	}

	return t;
}

static int allocate_mux_cleanup(void)
{
	/* allocates the thread bound mux_stopping_data task */
	mux_stopping_data[tid].task = task_new_here();
	if (!mux_stopping_data[tid].task) {
		ha_alert("Failed to allocate the task for connection cleanup on thread %d.\n", tid);
		return 0;
	}

	mux_stopping_data[tid].task->process = mux_stopping_process;
	LIST_INIT(&mux_stopping_data[tid].list);

	return 1;
}
REGISTER_PER_THREAD_ALLOC(allocate_mux_cleanup);

static int deallocate_mux_cleanup(void)
{
	task_destroy(mux_stopping_data[tid].task);
	return 1;
}
REGISTER_PER_THREAD_FREE(deallocate_mux_cleanup);

static void deinit_idle_conns(void)
{
	int i;

	for (i = 0; i < global.nbthread; i++) {
		task_destroy(idle_conns[i].cleanup_task);
	}
}
REGISTER_POST_DEINIT(deinit_idle_conns);