haproxy/include/types/connection.h

555 lines
24 KiB
C
Raw Normal View History

/*
* include/types/connection.h
* This file describes the connection struct and associated constants.
*
* Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, version 2.1
* exclusively.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TYPES_CONNECTION_H
#define _TYPES_CONNECTION_H
#include <stdlib.h>
#include <sys/socket.h>
#include <common/config.h>
#include <common/ist.h>
#include <types/listener.h>
#include <types/obj_type.h>
#include <types/port_range.h>
#include <types/protocol.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
/* referenced below */
struct connection;
struct conn_stream;
struct buffer;
struct server;
struct pipe;
enum sub_event_type {
SUB_CAN_SEND = 0x00000001, /* Schedule the tasklet when we can send more */
SUB_CAN_RECV = 0x00000002, /* Schedule the tasklet when we can recv more */
};
struct wait_list {
struct tasklet *task;
struct list list;
int wait_reason;
};
/* A connection handle is how we differenciate two connections on the lower
* layers. It usually is a file descriptor but can be a connection id.
*/
union conn_handle {
int fd; /* file descriptor, for regular sockets */
};
/* conn_stream flags */
enum {
CS_FL_NONE = 0x00000000, /* Just for initialization purposes */
CS_FL_DATA_RD_ENA = 0x00000001, /* receiving data is allowed */
CS_FL_DATA_WR_ENA = 0x00000002, /* sending data is desired */
CS_FL_SHRD = 0x00000010, /* read shut, draining extra data */
CS_FL_SHRR = 0x00000020, /* read shut, resetting extra data */
CS_FL_SHR = CS_FL_SHRD | CS_FL_SHRR, /* read shut status */
CS_FL_SHWN = 0x00000040, /* write shut, verbose mode */
CS_FL_SHWS = 0x00000080, /* write shut, silent mode */
CS_FL_SHW = CS_FL_SHWN | CS_FL_SHWS, /* write shut status */
CS_FL_ERROR = 0x00000100, /* a fatal error was reported */
CS_FL_RCV_MORE = 0x00000200, /* more bytes to receive but not enough room */
CS_FL_EOS = 0x00001000, /* End of stream delivered to data layer */
CS_FL_REOS = 0x00002000, /* End of stream received (buffer not empty) */
CS_FL_WAIT_FOR_HS = 0x00010000, /* This stream is waiting for handhskae */
};
/* cs_shutr() modes */
enum cs_shr_mode {
CS_SHR_DRAIN = 0, /* read shutdown, drain any extra stuff */
CS_SHR_RESET = 1, /* read shutdown, reset any extra stuff */
};
/* cs_shutw() modes */
enum cs_shw_mode {
CS_SHW_NORMAL = 0, /* regular write shutdown */
CS_SHW_SILENT = 1, /* imminent close, don't notify peer */
};
/* For each direction, we have a CO_FL_{SOCK,DATA}_<DIR>_ENA flag, which
* indicates if read or write is desired in that direction for the respective
* layers. The current status corresponding to the current layer being used is
* remembered in the CO_FL_CURR_<DIR>_ENA flag. The need to poll (ie receipt of
* EAGAIN) is remembered at the file descriptor level so that even when the
* activity is stopped and restarted, we still remember whether it was needed
* to poll before attempting the I/O.
*
* The CO_FL_CURR_<DIR>_ENA flag is set from the FD status in
* conn_refresh_polling_flags(). The FD state is updated according to these
* flags in conn_cond_update_polling().
*/
/* flags for use in connection->flags */
enum {
CO_FL_NONE = 0x00000000, /* Just for initialization purposes */
/* Do not change these values without updating conn_*_poll_changes() ! */
CO_FL_SOCK_RD_ENA = 0x00000001, /* receiving handshakes is allowed */
CO_FL_XPRT_RD_ENA = 0x00000002, /* receiving data is allowed */
CO_FL_CURR_RD_ENA = 0x00000004, /* receiving is currently allowed */
/* unused : 0x00000008 */
CO_FL_SOCK_WR_ENA = 0x00000010, /* sending handshakes is desired */
CO_FL_XPRT_WR_ENA = 0x00000020, /* sending data is desired */
CO_FL_CURR_WR_ENA = 0x00000040, /* sending is currently desired */
/* unused : 0x00000080 */
MAJOR: connection: add two new flags to indicate readiness of control/transport Currently the control and transport layers of a connection are supposed to be initialized when their respective pointers are not NULL. This will not work anymore when we plan to reuse connections, because there is an asymmetry between the accept() side and the connect() side : - on accept() side, the fd is set first, then the ctrl layer then the transport layer ; upon error, they must be undone in the reverse order, then the FD must be closed. The FD must not be deleted if the control layer was not yet initialized ; - on the connect() side, the fd is set last and there is no reliable way to know if it has been initialized or not. In practice it's initialized to -1 first but this is hackish and supposes that local FDs only will be used forever. Also, there are even less solutions for keeping trace of the transport layer's state. Also it is possible to support delayed close() when something (eg: logs) tracks some information requiring the transport and/or control layers, making it even more difficult to clean them. So the proposed solution is to add two flags to the connection : - CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert) and cleared after it's released (fd_delete). - CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init) and cleared after it's released (xprt->close). The functions have been adapted to rely on this and not on the pointers anymore. conn_xprt_close() was unused and dangerous : it did not close the control layer (eg: the socket itself) but still marks the transport layer as closed, preventing any future call to conn_full_close() from finishing the job. The problem comes from conn_full_close() in fact. It needs to close the xprt and ctrl layers independantly. After that we're still having an issue : we don't know based on ->ctrl alone whether the fd was registered or not. For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what remains to be done on the connection. In order not to miss some flag assignments, we introduce conn_ctrl_init() to initialize the control layer, register the fd using fd_insert() and set the flag, and conn_ctrl_close() which unregisters the fd and removes the flag, but only if the transport layer was closed. Similarly, at the transport layer, conn_xprt_init() calls ->init and sets the flag, while conn_xprt_close() checks the flag, calls ->close and clears the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init and the ->close functions are called only once each and in the correct order. Note that conn_xprt_close() does nothing if the transport layer is still tracked. conn_full_close() now simply calls conn_xprt_close() then conn_full_close() in turn, which do nothing if CO_FL_XPRT_TRACKED is set. In order to handle the error path, we also provide conn_force_close() which ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers in turns. All relevant instances of fd_delete() have been replaced with conn_force_close(). Now we always know what state the connection is in and we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
/* These flags indicate whether the Control and Transport layers are initialized */
CO_FL_CTRL_READY = 0x00000100, /* FD was registered, fd_delete() needed */
CO_FL_XPRT_READY = 0x00000200, /* xprt_init() done, xprt_close() needed */
CO_FL_WILL_UPDATE = 0x00000400, /* the conn handler will take care of updating the polling */
/* This flag is used by data layers to indicate they had to stop
* receiving data because a buffer was full. The connection handler
* clears it before first calling the I/O and data callbacks.
*/
CO_FL_WAIT_ROOM = 0x00000800, /* data sink is full */
/* These flags are used to report whether the from/to addresses are set or not */
CO_FL_ADDR_FROM_SET = 0x00001000, /* addr.from is set */
CO_FL_ADDR_TO_SET = 0x00002000, /* addr.to is set */
CO_FL_EARLY_SSL_HS = 0x00004000, /* We have early data pending, don't start SSL handshake yet */
CO_FL_EARLY_DATA = 0x00008000, /* At least some of the data are early data */
/* unused : 0x00010000 */
/* unused : 0x00020000 */
/* flags used to remember what shutdown have been performed/reported */
CO_FL_SOCK_RD_SH = 0x00040000, /* SOCK layer was notified about shutr/read0 */
CO_FL_SOCK_WR_SH = 0x00080000, /* SOCK layer asked for shutw */
BUG/MEDIUM: connection: ensure to always report the end of handshakes Despite the previous commit working fine on all tests, it's still not sufficient to completely address the problem. If the connection handler is called with an event validating an L4 connection but some handshakes remain (eg: accept-proxy), it will still wake the function up, which will not report the activity, and will not detect a change once the handshake it complete so it will not notify the ->wake() handler. In fact the only reason why the ->wake() handler is still called here is because after dropping the last handshake, we try to call ->recv() and ->send() in turn and change the flags in order to detect a data activity. But if for any reason the data layer is not interested in reading nor writing, it will not get these events. A cleaner way to address this is to call the ->wake() handler only on definitive status changes (shut, error), on real data activity, and on a complete connection setup, measured as CONNECTED with no more handshake pending. It could be argued that the handshake flags have to be made part of the condition to set CO_FL_CONNECTED but that would currently break a part of the health checks. Also a handshake could appear at any moment even after a connection is established so we'd lose the ability to detect a second end of handshake. For now the situation around CO_FL_CONNECTED is not clean : - session_accept() only sets CO_FL_CONNECTED if there's no pending handshake ; - conn_fd_handler() will set it once L4 and L6 are complete, which will do what session_accept() above refrained from doing even if an accept_proxy handshake is still pending ; - ssl_sock_infocbk() and ssl_sock_handshake() consider that a handshake performed with CO_FL_CONNECTED set is a renegociation ; => they should instead filter on CO_FL_WAIT_L6_CONN - all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before accepting to fetch information => they should also get rid of any pending handshake - smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of CO_FL_ACCEPT_PROXY - health checks (standard and tcp-checks) don't check for HANDSHAKE and may report a successful check based on CO_FL_CONNECTED while not yet done (eg: send buffer full on send_proxy). This patch aims at solving some of these side effects in a backportable way before this is reworked in depth : - we need to call ->wake() to report connection success, measure connection time, notify that the data layer is ready and update the data layer after activity ; this has to be done either if we switch from pending {L4,L6}_CONN to nothing with no handshakes left, or if we notice some handshakes were pending and are now done. - we document that CO_FL_CONNECTED exactly means "L4 connection setup confirmed at least once, L6 connection setup confirmed at least once or not necessary, all this regardless of any possibly remaining handshakes or future L6 negociations". This patch also renames CO_FL_CONN_STATUS to the more explicit CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting in setting an impossible combination of flags to notify the data layer, by simply clearing the current flags. This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
/* flags used to report connection errors or other closing conditions */
CO_FL_ERROR = 0x00100000, /* a fatal error was reported */
CO_FL_NOTIFY_DONE = 0x001C0000, /* any xprt shut/error flags above needs to be reported */
CO_FL_NOTIFY_DATA = 0x001C0000, /* any shut/error flags above needs to be reported */
BUG/MEDIUM: connection: ensure to always report the end of handshakes Despite the previous commit working fine on all tests, it's still not sufficient to completely address the problem. If the connection handler is called with an event validating an L4 connection but some handshakes remain (eg: accept-proxy), it will still wake the function up, which will not report the activity, and will not detect a change once the handshake it complete so it will not notify the ->wake() handler. In fact the only reason why the ->wake() handler is still called here is because after dropping the last handshake, we try to call ->recv() and ->send() in turn and change the flags in order to detect a data activity. But if for any reason the data layer is not interested in reading nor writing, it will not get these events. A cleaner way to address this is to call the ->wake() handler only on definitive status changes (shut, error), on real data activity, and on a complete connection setup, measured as CONNECTED with no more handshake pending. It could be argued that the handshake flags have to be made part of the condition to set CO_FL_CONNECTED but that would currently break a part of the health checks. Also a handshake could appear at any moment even after a connection is established so we'd lose the ability to detect a second end of handshake. For now the situation around CO_FL_CONNECTED is not clean : - session_accept() only sets CO_FL_CONNECTED if there's no pending handshake ; - conn_fd_handler() will set it once L4 and L6 are complete, which will do what session_accept() above refrained from doing even if an accept_proxy handshake is still pending ; - ssl_sock_infocbk() and ssl_sock_handshake() consider that a handshake performed with CO_FL_CONNECTED set is a renegociation ; => they should instead filter on CO_FL_WAIT_L6_CONN - all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before accepting to fetch information => they should also get rid of any pending handshake - smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of CO_FL_ACCEPT_PROXY - health checks (standard and tcp-checks) don't check for HANDSHAKE and may report a successful check based on CO_FL_CONNECTED while not yet done (eg: send buffer full on send_proxy). This patch aims at solving some of these side effects in a backportable way before this is reworked in depth : - we need to call ->wake() to report connection success, measure connection time, notify that the data layer is ready and update the data layer after activity ; this has to be done either if we switch from pending {L4,L6}_CONN to nothing with no handshakes left, or if we notice some handshakes were pending and are now done. - we document that CO_FL_CONNECTED exactly means "L4 connection setup confirmed at least once, L6 connection setup confirmed at least once or not necessary, all this regardless of any possibly remaining handshakes or future L6 negociations". This patch also renames CO_FL_CONN_STATUS to the more explicit CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting in setting an impossible combination of flags to notify the data layer, by simply clearing the current flags. This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
/* flags used to report connection status updates */
CO_FL_CONNECTED = 0x00200000, /* L4+L6 now ready ; extra handshakes may or may not exist */
CO_FL_WAIT_L4_CONN = 0x00400000, /* waiting for L4 to be connected */
CO_FL_WAIT_L6_CONN = 0x00800000, /* waiting for L6 to be connected (eg: SSL) */
/*** All the flags below are used for connection handshakes. Any new
* handshake should be added after this point, and CO_FL_HANDSHAKE
* should be updated.
*/
CO_FL_SEND_PROXY = 0x01000000, /* send a valid PROXY protocol header */
CO_FL_SSL_WAIT_HS = 0x02000000, /* wait for an SSL handshake to complete */
CO_FL_ACCEPT_PROXY = 0x04000000, /* receive a valid PROXY protocol header */
CO_FL_ACCEPT_CIP = 0x08000000, /* receive a valid NetScaler Client IP header */
/* below we have all handshake flags grouped into one */
CO_FL_HANDSHAKE = CO_FL_SEND_PROXY | CO_FL_SSL_WAIT_HS | CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP,
/* when any of these flags is set, polling is defined by socket-layer
* operations, as opposed to data-layer. Transport is explicitly not
* mentionned here to avoid any confusion, since it can be the same
* as DATA or SOCK on some implementations.
*/
CO_FL_POLL_SOCK = CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN,
/* This connection may not be shared between clients */
CO_FL_PRIVATE = 0x10000000,
/* This flag is used to know that a PROXY protocol header was sent by the client */
CO_FL_RCVD_PROXY = 0x20000000,
/* unused : 0x40000000 */
MAJOR: connection: add two new flags to indicate readiness of control/transport Currently the control and transport layers of a connection are supposed to be initialized when their respective pointers are not NULL. This will not work anymore when we plan to reuse connections, because there is an asymmetry between the accept() side and the connect() side : - on accept() side, the fd is set first, then the ctrl layer then the transport layer ; upon error, they must be undone in the reverse order, then the FD must be closed. The FD must not be deleted if the control layer was not yet initialized ; - on the connect() side, the fd is set last and there is no reliable way to know if it has been initialized or not. In practice it's initialized to -1 first but this is hackish and supposes that local FDs only will be used forever. Also, there are even less solutions for keeping trace of the transport layer's state. Also it is possible to support delayed close() when something (eg: logs) tracks some information requiring the transport and/or control layers, making it even more difficult to clean them. So the proposed solution is to add two flags to the connection : - CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert) and cleared after it's released (fd_delete). - CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init) and cleared after it's released (xprt->close). The functions have been adapted to rely on this and not on the pointers anymore. conn_xprt_close() was unused and dangerous : it did not close the control layer (eg: the socket itself) but still marks the transport layer as closed, preventing any future call to conn_full_close() from finishing the job. The problem comes from conn_full_close() in fact. It needs to close the xprt and ctrl layers independantly. After that we're still having an issue : we don't know based on ->ctrl alone whether the fd was registered or not. For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what remains to be done on the connection. In order not to miss some flag assignments, we introduce conn_ctrl_init() to initialize the control layer, register the fd using fd_insert() and set the flag, and conn_ctrl_close() which unregisters the fd and removes the flag, but only if the transport layer was closed. Similarly, at the transport layer, conn_xprt_init() calls ->init and sets the flag, while conn_xprt_close() checks the flag, calls ->close and clears the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init and the ->close functions are called only once each and in the correct order. Note that conn_xprt_close() does nothing if the transport layer is still tracked. conn_full_close() now simply calls conn_xprt_close() then conn_full_close() in turn, which do nothing if CO_FL_XPRT_TRACKED is set. In order to handle the error path, we also provide conn_force_close() which ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers in turns. All relevant instances of fd_delete() have been replaced with conn_force_close(). Now we always know what state the connection is in and we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
/* This last flag indicates that the transport layer is used (for instance
* by logs) and must not be cleared yet. The last call to conn_xprt_close()
* must be done after clearing this flag.
*/
CO_FL_XPRT_TRACKED = 0x80000000,
};
/* possible connection error codes */
enum {
CO_ER_NONE, /* no error */
CO_ER_CONF_FDLIM, /* reached process' configured FD limitation */
CO_ER_PROC_FDLIM, /* reached process' FD limitation */
CO_ER_SYS_FDLIM, /* reached system's FD limitation */
CO_ER_SYS_MEMLIM, /* reached system buffers limitation */
CO_ER_NOPROTO, /* protocol not supported */
CO_ER_SOCK_ERR, /* other socket error */
CO_ER_PORT_RANGE, /* source port range exhausted */
CO_ER_CANT_BIND, /* can't bind to source address */
CO_ER_FREE_PORTS, /* no more free ports on the system */
CO_ER_ADDR_INUSE, /* local address already in use */
CO_ER_PRX_EMPTY, /* nothing received in PROXY protocol header */
CO_ER_PRX_ABORT, /* client abort during PROXY protocol header */
CO_ER_PRX_TIMEOUT, /* timeout while waiting for a PROXY header */
CO_ER_PRX_TRUNCATED, /* truncated PROXY protocol header */
CO_ER_PRX_NOT_HDR, /* not a PROXY protocol header */
CO_ER_PRX_BAD_HDR, /* bad PROXY protocol header */
CO_ER_PRX_BAD_PROTO, /* unsupported protocol in PROXY header */
CO_ER_CIP_EMPTY, /* nothing received in NetScaler Client IP header */
CO_ER_CIP_ABORT, /* client abort during NetScaler Client IP header */
CO_ER_CIP_TIMEOUT, /* timeout while waiting for a NetScaler Client IP header */
CO_ER_CIP_TRUNCATED, /* truncated NetScaler Client IP header */
CO_ER_CIP_BAD_MAGIC, /* bad magic number in NetScaler Client IP header */
CO_ER_CIP_BAD_PROTO, /* unsupported protocol in NetScaler Client IP header */
CO_ER_SSL_EMPTY, /* client closed during SSL handshake */
CO_ER_SSL_ABORT, /* client abort during SSL handshake */
CO_ER_SSL_TIMEOUT, /* timeout during SSL handshake */
CO_ER_SSL_TOO_MANY, /* too many SSL connections */
CO_ER_SSL_NO_MEM, /* no more memory to allocate an SSL connection */
CO_ER_SSL_RENEG, /* forbidden client renegociation */
CO_ER_SSL_CA_FAIL, /* client cert verification failed in the CA chain */
CO_ER_SSL_CRT_FAIL, /* client cert verification failed on the certificate */
CO_ER_SSL_MISMATCH, /* Server presented an SSL certificate different from the configured one */
CO_ER_SSL_MISMATCH_SNI, /* Server presented an SSL certificate different from the expected one */
CO_ER_SSL_HANDSHAKE, /* SSL error during handshake */
CO_ER_SSL_HANDSHAKE_HB, /* SSL error during handshake with heartbeat present */
CO_ER_SSL_KILLED_HB, /* Stopped a TLSv1 heartbeat attack (CVE-2014-0160) */
CO_ER_SSL_NO_TARGET, /* unknown target (not client nor server) */
CO_ER_SSL_EARLY_FAILED, /* Server refused early data */
};
/* source address settings for outgoing connections */
enum {
/* Tproxy exclusive values from 0 to 7 */
CO_SRC_TPROXY_ADDR = 0x0001, /* bind to this non-local address when connecting */
CO_SRC_TPROXY_CIP = 0x0002, /* bind to the client's IP address when connecting */
CO_SRC_TPROXY_CLI = 0x0003, /* bind to the client's IP+port when connecting */
CO_SRC_TPROXY_DYN = 0x0004, /* bind to a dynamically computed non-local address */
CO_SRC_TPROXY_MASK = 0x0007, /* bind to a non-local address when connecting */
CO_SRC_BIND = 0x0008, /* bind to a specific source address when connecting */
};
/* flags that can be passed to xprt->rcv_buf() and mux->rcv_buf() */
enum {
CO_RFL_BUF_WET = 0x0001, /* Buffer still has some output data present */
};
/* flags that can be passed to xprt->snd_buf() and mux->snd_buf() */
enum {
CO_SFL_MSG_MORE = 0x0001, /* More data to come afterwards */
CO_SFL_STREAMER = 0x0002, /* Producer is continuously streaming data */
};
/* known transport layers (for ease of lookup) */
enum {
XPRT_RAW = 0,
XPRT_SSL = 1,
XPRT_ENTRIES /* must be last one */
};
/* MUX-specific flags */
enum {
MX_FL_NONE = 0x00000000,
MX_FL_CLEAN_ABRT = 0x00000001, /* abort is clearly reported as an error */
};
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
/* xprt_ops describes transport-layer operations for a connection. They
* generally run over a socket-based control layer, but not always. Some
* of them are used for data transfer with the upper layer (rcv_*, snd_*)
* and the other ones are used to setup and release the transport layer.
*/
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
struct xprt_ops {
size_t (*rcv_buf)(struct connection *conn, struct buffer *buf, size_t count, int flags); /* recv callback */
size_t (*snd_buf)(struct connection *conn, const struct buffer *buf, size_t count, int flags); /* send callback */
int (*rcv_pipe)(struct connection *conn, struct pipe *pipe, unsigned int count); /* recv-to-pipe callback */
int (*snd_pipe)(struct connection *conn, struct pipe *pipe); /* send-to-pipe callback */
void (*shutr)(struct connection *, int); /* shutr function */
void (*shutw)(struct connection *, int); /* shutw function */
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
void (*close)(struct connection *); /* close the transport layer */
int (*init)(struct connection *conn); /* initialize the transport layer */
int (*prepare_bind_conf)(struct bind_conf *conf); /* prepare a whole bind_conf */
void (*destroy_bind_conf)(struct bind_conf *conf); /* destroy a whole bind_conf */
int (*prepare_srv)(struct server *srv); /* prepare a server context */
void (*destroy_srv)(struct server *srv); /* destroy a server context */
int (*get_alpn)(const struct connection *conn, const char **str, int *len); /* get application layer name */
char name[8]; /* transport layer name, zero-terminated */
int (*subscribe)(struct connection *conn, int event_type, void *param); /* Subscribe to events, such as "being able to send" */
};
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
/* mux_ops describes the mux operations, which are to be performed at the
* connection level after data are exchanged with the transport layer in order
* to propagate them to streams. The <init> function will automatically be
* called once the mux is instanciated by the connection's owner at the end
* of a transport handshake, when it is about to transfer data and the data
* layer is not ready yet.
*/
struct mux_ops {
int (*init)(struct connection *conn); /* early initialization */
void (*recv)(struct connection *conn); /* mux-layer recv callback */
int (*wake)(struct connection *conn); /* mux-layer callback to report activity, mandatory */
void (*update_poll)(struct conn_stream *cs); /* commit cs flags to mux/conn */
size_t (*rcv_buf)(struct conn_stream *cs, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to get data */
size_t (*snd_buf)(struct conn_stream *cs, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to send data */
int (*rcv_pipe)(struct conn_stream *cs, struct pipe *pipe, unsigned int count); /* recv-to-pipe callback */
int (*snd_pipe)(struct conn_stream *cs, struct pipe *pipe); /* send-to-pipe callback */
void (*shutr)(struct conn_stream *cs, enum cs_shr_mode); /* shutr function */
void (*shutw)(struct conn_stream *cs, enum cs_shw_mode); /* shutw function */
struct conn_stream *(*attach)(struct connection *); /* Create and attach a conn_stream to an outgoing connection */
void (*detach)(struct conn_stream *); /* Detach a conn_stream from an outgoing connection, when the request is done */
void (*show_fd)(struct buffer *, struct connection *); /* append some data about connection into chunk for "show fd" */
int (*subscribe)(struct conn_stream *cs, int event_type, void *param); /* Subscribe to events, such as "being able to send" */
unsigned int flags; /* some flags characterizing the mux's capabilities (MX_FL_*) */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
char name[8]; /* mux layer name, zero-terminated */
};
/* data_cb describes the data layer's recv and send callbacks which are called
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
* when I/O activity was detected after the transport layer is ready. These
* callbacks are supposed to make use of the xprt_ops above to exchange data
* from/to buffers and pipes. The <wake> callback is used to report activity
* at the transport layer, which can be a connection opening/close, or any
* data movement. It may abort a connection by returning < 0.
*/
struct data_cb {
void (*recv)(struct conn_stream *cs); /* data-layer recv callback */
int (*wake)(struct conn_stream *cs); /* data-layer callback to report activity */
int (*subscribe)(struct conn_stream *cs, int event_type, void *param); /* Subscribe to events, such as "being able to send" */
char name[8]; /* data layer name, zero-terminated */
};
struct my_tcphdr {
uint16_t source;
uint16_t dest;
};
/* a connection source profile defines all the parameters needed to properly
* bind an outgoing connection for a server or proxy.
*/
struct conn_src {
unsigned int opts; /* CO_SRC_* */
int iface_len; /* bind interface name length */
char *iface_name; /* bind interface name or NULL */
struct port_range *sport_range; /* optional per-server TCP source ports */
struct sockaddr_storage source_addr; /* the address to which we want to bind for connect() */
#if defined(CONFIG_HAP_TRANSPARENT)
struct sockaddr_storage tproxy_addr; /* non-local address we want to bind to for connect() */
char *bind_hdr_name; /* bind to this header name if defined */
int bind_hdr_len; /* length of the name of the header above */
int bind_hdr_occ; /* occurrence number of header above: >0 = from first, <0 = from end, 0=disabled */
#endif
};
/*
* This structure describes the elements of a connection relevant to a stream
*/
struct conn_stream {
enum obj_type obj_type; /* differentiates connection from applet context */
/* 3 bytes hole here */
unsigned int flags; /* CS_FL_* */
struct connection *conn; /* xprt-level connection */
struct wait_list wait_list; /* We're in a wait list for send */
struct list send_wait_list; /* list of tasks to wake when we're ready to send */
void *data; /* pointer to upper layer's entity (eg: stream interface) */
const struct data_cb *data_cb; /* data layer callbacks. Must be set before xprt->init() */
void *ctx; /* mux-specific context */
};
/* This structure describes a connection with its methods and data.
* A connection may be performed to proxy or server via a local or remote
* socket, and can also be made to an internal applet. It can support
* several transport schemes (raw, ssl, ...). It can support several
* connection control schemes, generally a protocol for socket-oriented
* connections, but other methods for applets. The xprt_done_cb() callback
* is called once the transport layer initialization is done (success or
* failure). It may return < 0 to report an error and require an abort of the
* connection being instanciated. It must be removed once done.
*/
struct connection {
/* first cache line */
enum obj_type obj_type; /* differentiates connection from applet context */
unsigned char err_code; /* CO_ER_* */
signed short send_proxy_ofs; /* <0 = offset to (re)send from the end, >0 = send all */
unsigned int flags; /* CO_FL_* */
const struct protocol *ctrl; /* operations at the socket layer */
const struct xprt_ops *xprt; /* operations at the transport layer */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
const struct mux_ops *mux; /* mux layer opreations. Must be set before xprt->init() */
void *xprt_ctx; /* general purpose pointer, initialized to NULL */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
void *mux_ctx; /* mux-specific context, initialized to NULL */
void *owner; /* pointer to the owner session for incoming connections, or NULL */
enum obj_type *target; /* the target to connect to (server, proxy, applet, ...) */
/* second cache line */
struct list send_wait_list; /* list of tasks to wake when we're ready to send */
struct list list; /* attach point to various connection lists (idle, ...) */
int xprt_st; /* transport layer state, initialized to zero */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
int tmp_early_data; /* 1st byte of early data, if any */
int sent_early_data; /* Amount of early data we sent so far */
union conn_handle handle; /* connection handle at the socket layer */
const struct netns_entry *proxy_netns;
int (*xprt_done_cb)(struct connection *conn); /* callback to notify of end of handshake */
/* third cache line and beyond */
void (*destroy_cb)(struct connection *conn); /* callback to notify of imminent death of the connection */
struct {
struct sockaddr_storage from; /* client address, or address to spoof when connecting to the server */
struct sockaddr_storage to; /* address reached by the client, or address to connect to */
} addr; /* addresses of the remote side, client for producer and server for consumer */
};
/* PROTO token registration */
enum proto_proxy_mode {
PROTO_MODE_NONE = 0,
PROTO_MODE_TCP = 1 << 0, // must not be changed!
PROTO_MODE_HTTP = 1 << 1, // must not be changed!
PROTO_MODE_ANY = PROTO_MODE_TCP | PROTO_MODE_HTTP,
};
enum proto_proxy_side {
PROTO_SIDE_NONE = 0,
PROTO_SIDE_FE = 1, // same as PR_CAP_FE
PROTO_SIDE_BE = 2, // same as PR_CAP_BE
PROTO_SIDE_BOTH = PROTO_SIDE_FE | PROTO_SIDE_BE,
};
struct mux_proto_list {
const struct ist token; /* token name and length. Empty is catch-all */
enum proto_proxy_mode mode;
enum proto_proxy_side side;
const struct mux_ops *mux;
struct list list;
};
/* proxy protocol v2 definitions */
#define PP2_SIGNATURE "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"
#define PP2_SIGNATURE_LEN 12
#define PP2_HEADER_LEN 16
/* ver_cmd byte */
#define PP2_CMD_LOCAL 0x00
#define PP2_CMD_PROXY 0x01
#define PP2_CMD_MASK 0x0F
#define PP2_VERSION 0x20
#define PP2_VERSION_MASK 0xF0
/* fam byte */
#define PP2_TRANS_UNSPEC 0x00
#define PP2_TRANS_STREAM 0x01
#define PP2_TRANS_DGRAM 0x02
#define PP2_TRANS_MASK 0x0F
#define PP2_FAM_UNSPEC 0x00
#define PP2_FAM_INET 0x10
#define PP2_FAM_INET6 0x20
#define PP2_FAM_UNIX 0x30
#define PP2_FAM_MASK 0xF0
#define PP2_ADDR_LEN_UNSPEC (0)
#define PP2_ADDR_LEN_INET (4 + 4 + 2 + 2)
#define PP2_ADDR_LEN_INET6 (16 + 16 + 2 + 2)
#define PP2_ADDR_LEN_UNIX (108 + 108)
#define PP2_HDR_LEN_UNSPEC (PP2_HEADER_LEN + PP2_ADDR_LEN_UNSPEC)
#define PP2_HDR_LEN_INET (PP2_HEADER_LEN + PP2_ADDR_LEN_INET)
#define PP2_HDR_LEN_INET6 (PP2_HEADER_LEN + PP2_ADDR_LEN_INET6)
#define PP2_HDR_LEN_UNIX (PP2_HEADER_LEN + PP2_ADDR_LEN_UNIX)
struct proxy_hdr_v2 {
uint8_t sig[12]; /* hex 0D 0A 0D 0A 00 0D 0A 51 55 49 54 0A */
uint8_t ver_cmd; /* protocol version and command */
uint8_t fam; /* protocol family and transport */
uint16_t len; /* number of following bytes part of the header */
union {
struct { /* for TCP/UDP over IPv4, len = 12 */
uint32_t src_addr;
uint32_t dst_addr;
uint16_t src_port;
uint16_t dst_port;
} ip4;
struct { /* for TCP/UDP over IPv6, len = 36 */
uint8_t src_addr[16];
uint8_t dst_addr[16];
uint16_t src_port;
uint16_t dst_port;
} ip6;
struct { /* for AF_UNIX sockets, len = 216 */
uint8_t src_addr[108];
uint8_t dst_addr[108];
} unx;
} addr;
};
#define PP2_TYPE_ALPN 0x01
#define PP2_TYPE_AUTHORITY 0x02
#define PP2_TYPE_CRC32C 0x03
#define PP2_TYPE_NOOP 0x04
#define PP2_TYPE_SSL 0x20
#define PP2_SUBTYPE_SSL_VERSION 0x21
#define PP2_SUBTYPE_SSL_CN 0x22
#define PP2_SUBTYPE_SSL_CIPHER 0x23
#define PP2_SUBTYPE_SSL_SIG_ALG 0x24
#define PP2_SUBTYPE_SSL_KEY_ALG 0x25
#define PP2_TYPE_NETNS 0x30
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
#define TLV_HEADER_SIZE 3
struct tlv {
uint8_t type;
uint8_t length_hi;
uint8_t length_lo;
uint8_t value[0];
}__attribute__((packed));
struct tlv_ssl {
struct tlv tlv;
uint8_t client;
uint32_t verify;
uint8_t sub_tlv[0];
}__attribute__((packed));
#define PP2_CLIENT_SSL 0x01
#define PP2_CLIENT_CERT_CONN 0x02
#define PP2_CLIENT_CERT_SESS 0x04
/*
* Linux seems to be able to send 253 fds per sendmsg(), not sure
* about the other OSes.
*/
/* Max number of file descriptors we send in one sendmsg() */
#define MAX_SEND_FD 253
#endif /* _TYPES_CONNECTION_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/