haproxy/include/types/connection.h

613 lines
28 KiB
C
Raw Normal View History

/*
* include/types/connection.h
* This file describes the connection struct and associated constants.
*
* Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, version 2.1
* exclusively.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TYPES_CONNECTION_H
#define _TYPES_CONNECTION_H
#include <stdlib.h>
#include <sys/socket.h>
#include <common/config.h>
#include <common/ist.h>
#include <types/listener.h>
#include <types/obj_type.h>
#include <types/port_range.h>
#include <types/protocol.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
/* referenced below */
struct connection;
struct conn_stream;
struct cs_info;
struct buffer;
struct proxy;
struct server;
struct session;
struct pipe;
/* socks4 upstream proxy definitions */
struct socks4_request {
uint8_t version; /* SOCKS version number, 1 byte, must be 0x04 for this version */
uint8_t command; /* 0x01 = establish a TCP/IP stream connection */
uint16_t port; /* port number, 2 bytes (in network byte order) */
uint32_t ip; /* IP address, 4 bytes (in network byte order) */
char user_id[8]; /* the user ID string, variable length, terminated with a null (0x00); Using "HAProxy\0" */
};
/* Note: subscribing to these events is only valid after the caller has really
* attempted to perform the operation, and failed to proceed or complete.
*/
enum sub_event_type {
SUB_RETRY_RECV = 0x00000001, /* Schedule the tasklet when we can attempt to recv again */
SUB_RETRY_SEND = 0x00000002, /* Schedule the tasklet when we can attempt to send again */
};
/* Describes a set of subscriptions. Multiple events may be registered at the
* same time. The callee should assume everything not pending for completion is
* implicitly possible. It's illegal to change the tasklet if events are still
* registered.
*/
struct wait_event {
struct tasklet *tasklet;
int events; /* set of enum sub_event_type above */
};
/* A connection handle is how we differentiate two connections on the lower
* layers. It usually is a file descriptor but can be a connection id.
*/
union conn_handle {
int fd; /* file descriptor, for regular sockets */
};
/* conn_stream flags */
enum {
CS_FL_NONE = 0x00000000, /* Just for initialization purposes */
CS_FL_SHRD = 0x00000010, /* read shut, draining extra data */
CS_FL_SHRR = 0x00000020, /* read shut, resetting extra data */
CS_FL_SHR = CS_FL_SHRD | CS_FL_SHRR, /* read shut status */
CS_FL_SHWN = 0x00000040, /* write shut, verbose mode */
CS_FL_SHWS = 0x00000080, /* write shut, silent mode */
CS_FL_SHW = CS_FL_SHWN | CS_FL_SHWS, /* write shut status */
CS_FL_ERROR = 0x00000100, /* a fatal error was reported */
CS_FL_RCV_MORE = 0x00000200, /* We may have more bytes to transfert */
CS_FL_WANT_ROOM = 0x00000400, /* More bytes to transfert, but not enough room */
CS_FL_ERR_PENDING = 0x00000800, /* An error is pending, but there's still data to be read */
CS_FL_EOS = 0x00001000, /* End of stream delivered to data layer */
/* unused: 0x00002000 */
CS_FL_EOI = 0x00004000, /* end-of-input reached */
BUG/MEDIUM: connection: add a mux flag to indicate splice usability Commit c640ef1a7d ("BUG/MINOR: stream-int: avoid calling rcv_buf() when splicing is still possible") fixed splicing in TCP and legacy mode but broke it badly in HTX mode. What happens in HTX mode is that the channel's to_forward value remains set to CHN_INFINITE_FORWARD during the whole transfer, and as such it is not a reliable signal anymore to indicate whether more data are expected or not. Thus, when data are spliced out of the mux using rcv_pipe(), even when the end is reached (that only the mux knows about), the call to rcv_buf() to get the final HTX blocks completing the message were skipped and there was often no new event to wake this up, resulting in transfer timeouts at the end of large objects. All this goes down to the fact that the channel has no more information about whether it can splice or not despite being the one having to take the decision to call rcv_pipe() or not. And we cannot afford to call rcv_buf() inconditionally because, as the commit above showed, this reduces the forwarding performance by 2 to 3 in TCP and legacy modes due to data lying in the buffer preventing splicing from being used later. The approach taken by this patch consists in offering the muxes the ability to report a bit more information to the upper layers via the conn_stream. This information could simply be to indicate that more data are awaited but the real need being to distinguish splicing and receiving, here instead we clearly report the mux's willingness to be called for splicing or not. Hence the flag's name, CS_FL_MAY_SPLICE. The mux sets this flag when it knows that its buffer is empty and that data waiting past what is currently known may be spliced, and clears it when it knows there's no more data or that the caller must fall back to rcv_buf() instead. The stream-int code now uses this to determine if splicing may be used or not instead of looking at the rcv_pipe() callbacks through the whole chain. And after the rcv_pipe() call, it checks the flag again to decide whether it may safely skip rcv_buf() or not. All this bitfield dance remains a bit complex and it starts to appear obvious that splicing vs reading should be a decision of the mux based on permission granted by the data layer. This would however increase the API's complexity but definitely need to be thought about, and should even significantly simplify the data processing layer. The way it was integrated in mux-h1 will also result in no more calls to rcv_pipe() on chunked encoded data, since these ones are currently disabled at the mux level. However once the issue with chunks+splice is fixed, it will be important to explicitly check for curr_len|CHNK to set MAY_SPLICE, so that we don't call rcv_buf() after each chunk. This fix must be backported to 2.1 and 2.0.
2020-01-17 10:19:34 -05:00
CS_FL_MAY_SPLICE = 0x00008000, /* caller may use rcv_pipe() only if this flag is set */
CS_FL_WAIT_FOR_HS = 0x00010000, /* This stream is waiting for handhskae */
CS_FL_KILL_CONN = 0x00020000, /* must kill the connection when the CS closes */
/* following flags are supposed to be set by the mux and read/unset by
* the stream-interface :
*/
CS_FL_NOT_FIRST = 0x00100000, /* this stream is not the first one */
CS_FL_READ_PARTIAL = 0x00200000, /* some data were received (not necessarily xferred) */
};
/* cs_shutr() modes */
enum cs_shr_mode {
CS_SHR_DRAIN = 0, /* read shutdown, drain any extra stuff */
CS_SHR_RESET = 1, /* read shutdown, reset any extra stuff */
};
/* cs_shutw() modes */
enum cs_shw_mode {
CS_SHW_NORMAL = 0, /* regular write shutdown */
CS_SHW_SILENT = 1, /* imminent close, don't notify peer */
};
/* For each direction, we have a CO_FL_XPRT_<DIR>_ENA flag, which
* indicates if read or write is desired in that direction for the respective
* layers. The current status corresponding to the current layer being used is
* remembered in the CO_FL_XPRT_<DIR>_ENA flag. The need to poll (ie receipt of
* EAGAIN) is remembered at the file descriptor level so that even when the
* activity is stopped and restarted, we still remember whether it was needed
* to poll before attempting the I/O.
*
* The FD state is updated according to CO_FL_XPRT_<DIR>_ENA in
* conn_cond_update_polling().
*/
/* flags for use in connection->flags */
enum {
CO_FL_NONE = 0x00000000, /* Just for initialization purposes */
/* Do not change these values without updating conn_*_poll_changes() ! */
CO_FL_SAFE_LIST = 0x00000001, /* 0 = not in any list, 1 = in safe_list */
CO_FL_IDLE_LIST = 0x00000002, /* 2 = in idle_list, 3 = invalid */
CO_FL_LIST_MASK = 0x00000003, /* Is the connection in any server-managed list ? */
/* unused : 0x00000004, 0x00000008 */
/* unused : 0x00000010 */
MEDIUM: connection: remove the intermediary polling state from the connection Historically we used to require that the connections held the desired polling states for the data layer and the socket layer. Then with muxes these were more or less merged into the transport layer, and now it happens that with all transport layers having their own state, the "transport layer state" as we have it in the connection (XPRT_RD_ENA, XPRT_WR_ENA) is only an exact copy of the undelying file descriptor state, but with a delay. All of this is causing some difficulties at many places in the code because there are still some locations which use the conn_want_* API to remain clean and only rely on connection, and count on a later collection call to conn_cond_update_polling(), while others need an immediate action and directly use the FD updates. Since our updates are now much cheaper, most of them being only an atomic test-and-set operation, and since our I/O callbacks are deferred, there's no benefit anymore in trying to "cache" the transient state change in the connection flags hoping to cancel them before they become an FD event. Better make such calls transparent indirections to the FD layer instead and get rid of the deferred operations which needlessly complicate the logic inside. This removes flags CO_FL_XPRT_{RD,WR}_ENA and CO_FL_WILL_UPDATE. A number of functions related to polling updates were either greatly simplified or removed. Two places were using CO_FL_XPRT_WR_ENA as a hint to know if more data were expected to be sent after a PROXY protocol or SOCKSv4 header. These ones were simply replaced with a check on the subscription which is where we ought to get the autoritative information from. Now the __conn_xprt_want_* and their conn_xprt_want_* counterparts are the same. conn_stop_polling() and conn_xprt_stop_both() are the same as well. conn_cond_update_polling() only causes errors to stop polling. It also becomes way more obvious that muxes should not at all employ conn_xprt_{want|stop}_{recv,send}(), and that the call to __conn_xprt_stop_recv() in case a mux failed to allocate a buffer is inappropriate, it ought to unsubscribe from reads instead. All of this definitely requires a serious cleanup.
2020-02-21 02:46:19 -05:00
/* unused : 0x00000020 */
/* unused : 0x00000040, 0x00000080 */
MAJOR: connection: add two new flags to indicate readiness of control/transport Currently the control and transport layers of a connection are supposed to be initialized when their respective pointers are not NULL. This will not work anymore when we plan to reuse connections, because there is an asymmetry between the accept() side and the connect() side : - on accept() side, the fd is set first, then the ctrl layer then the transport layer ; upon error, they must be undone in the reverse order, then the FD must be closed. The FD must not be deleted if the control layer was not yet initialized ; - on the connect() side, the fd is set last and there is no reliable way to know if it has been initialized or not. In practice it's initialized to -1 first but this is hackish and supposes that local FDs only will be used forever. Also, there are even less solutions for keeping trace of the transport layer's state. Also it is possible to support delayed close() when something (eg: logs) tracks some information requiring the transport and/or control layers, making it even more difficult to clean them. So the proposed solution is to add two flags to the connection : - CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert) and cleared after it's released (fd_delete). - CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init) and cleared after it's released (xprt->close). The functions have been adapted to rely on this and not on the pointers anymore. conn_xprt_close() was unused and dangerous : it did not close the control layer (eg: the socket itself) but still marks the transport layer as closed, preventing any future call to conn_full_close() from finishing the job. The problem comes from conn_full_close() in fact. It needs to close the xprt and ctrl layers independantly. After that we're still having an issue : we don't know based on ->ctrl alone whether the fd was registered or not. For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what remains to be done on the connection. In order not to miss some flag assignments, we introduce conn_ctrl_init() to initialize the control layer, register the fd using fd_insert() and set the flag, and conn_ctrl_close() which unregisters the fd and removes the flag, but only if the transport layer was closed. Similarly, at the transport layer, conn_xprt_init() calls ->init and sets the flag, while conn_xprt_close() checks the flag, calls ->close and clears the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init and the ->close functions are called only once each and in the correct order. Note that conn_xprt_close() does nothing if the transport layer is still tracked. conn_full_close() now simply calls conn_xprt_close() then conn_full_close() in turn, which do nothing if CO_FL_XPRT_TRACKED is set. In order to handle the error path, we also provide conn_force_close() which ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers in turns. All relevant instances of fd_delete() have been replaced with conn_force_close(). Now we always know what state the connection is in and we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
/* These flags indicate whether the Control and Transport layers are initialized */
CO_FL_CTRL_READY = 0x00000100, /* FD was registered, fd_delete() needed */
CO_FL_XPRT_READY = 0x00000200, /* xprt_init() done, xprt_close() needed */
MEDIUM: connection: remove the intermediary polling state from the connection Historically we used to require that the connections held the desired polling states for the data layer and the socket layer. Then with muxes these were more or less merged into the transport layer, and now it happens that with all transport layers having their own state, the "transport layer state" as we have it in the connection (XPRT_RD_ENA, XPRT_WR_ENA) is only an exact copy of the undelying file descriptor state, but with a delay. All of this is causing some difficulties at many places in the code because there are still some locations which use the conn_want_* API to remain clean and only rely on connection, and count on a later collection call to conn_cond_update_polling(), while others need an immediate action and directly use the FD updates. Since our updates are now much cheaper, most of them being only an atomic test-and-set operation, and since our I/O callbacks are deferred, there's no benefit anymore in trying to "cache" the transient state change in the connection flags hoping to cancel them before they become an FD event. Better make such calls transparent indirections to the FD layer instead and get rid of the deferred operations which needlessly complicate the logic inside. This removes flags CO_FL_XPRT_{RD,WR}_ENA and CO_FL_WILL_UPDATE. A number of functions related to polling updates were either greatly simplified or removed. Two places were using CO_FL_XPRT_WR_ENA as a hint to know if more data were expected to be sent after a PROXY protocol or SOCKSv4 header. These ones were simply replaced with a check on the subscription which is where we ought to get the autoritative information from. Now the __conn_xprt_want_* and their conn_xprt_want_* counterparts are the same. conn_stop_polling() and conn_xprt_stop_both() are the same as well. conn_cond_update_polling() only causes errors to stop polling. It also becomes way more obvious that muxes should not at all employ conn_xprt_{want|stop}_{recv,send}(), and that the call to __conn_xprt_stop_recv() in case a mux failed to allocate a buffer is inappropriate, it ought to unsubscribe from reads instead. All of this definitely requires a serious cleanup.
2020-02-21 02:46:19 -05:00
/* unused : 0x00000400 */
/* This flag is used by data layers to indicate they had to stop
* receiving data because a buffer was full. The connection handler
* clears it before first calling the I/O and data callbacks.
*/
CO_FL_WAIT_ROOM = 0x00000800, /* data sink is full */
/* These flags are used to report whether the from/to addresses are set or not */
CO_FL_ADDR_FROM_SET = 0x00001000, /* addr.from is set */
CO_FL_ADDR_TO_SET = 0x00002000, /* addr.to is set */
CO_FL_EARLY_SSL_HS = 0x00004000, /* We have early data pending, don't start SSL handshake yet */
CO_FL_EARLY_DATA = 0x00008000, /* At least some of the data are early data */
CO_FL_SOCKS4_SEND = 0x00010000, /* handshaking with upstream SOCKS4 proxy, going to send the handshake */
CO_FL_SOCKS4_RECV = 0x00020000, /* handshaking with upstream SOCKS4 proxy, going to check if handshake succeed */
/* flags used to remember what shutdown have been performed/reported */
CO_FL_SOCK_RD_SH = 0x00040000, /* SOCK layer was notified about shutr/read0 */
CO_FL_SOCK_WR_SH = 0x00080000, /* SOCK layer asked for shutw */
BUG/MEDIUM: connection: ensure to always report the end of handshakes Despite the previous commit working fine on all tests, it's still not sufficient to completely address the problem. If the connection handler is called with an event validating an L4 connection but some handshakes remain (eg: accept-proxy), it will still wake the function up, which will not report the activity, and will not detect a change once the handshake it complete so it will not notify the ->wake() handler. In fact the only reason why the ->wake() handler is still called here is because after dropping the last handshake, we try to call ->recv() and ->send() in turn and change the flags in order to detect a data activity. But if for any reason the data layer is not interested in reading nor writing, it will not get these events. A cleaner way to address this is to call the ->wake() handler only on definitive status changes (shut, error), on real data activity, and on a complete connection setup, measured as CONNECTED with no more handshake pending. It could be argued that the handshake flags have to be made part of the condition to set CO_FL_CONNECTED but that would currently break a part of the health checks. Also a handshake could appear at any moment even after a connection is established so we'd lose the ability to detect a second end of handshake. For now the situation around CO_FL_CONNECTED is not clean : - session_accept() only sets CO_FL_CONNECTED if there's no pending handshake ; - conn_fd_handler() will set it once L4 and L6 are complete, which will do what session_accept() above refrained from doing even if an accept_proxy handshake is still pending ; - ssl_sock_infocbk() and ssl_sock_handshake() consider that a handshake performed with CO_FL_CONNECTED set is a renegociation ; => they should instead filter on CO_FL_WAIT_L6_CONN - all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before accepting to fetch information => they should also get rid of any pending handshake - smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of CO_FL_ACCEPT_PROXY - health checks (standard and tcp-checks) don't check for HANDSHAKE and may report a successful check based on CO_FL_CONNECTED while not yet done (eg: send buffer full on send_proxy). This patch aims at solving some of these side effects in a backportable way before this is reworked in depth : - we need to call ->wake() to report connection success, measure connection time, notify that the data layer is ready and update the data layer after activity ; this has to be done either if we switch from pending {L4,L6}_CONN to nothing with no handshakes left, or if we notice some handshakes were pending and are now done. - we document that CO_FL_CONNECTED exactly means "L4 connection setup confirmed at least once, L6 connection setup confirmed at least once or not necessary, all this regardless of any possibly remaining handshakes or future L6 negociations". This patch also renames CO_FL_CONN_STATUS to the more explicit CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting in setting an impossible combination of flags to notify the data layer, by simply clearing the current flags. This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
/* flags used to report connection errors or other closing conditions */
CO_FL_ERROR = 0x00100000, /* a fatal error was reported */
CO_FL_NOTIFY_DONE = 0x001C0000, /* any xprt shut/error flags above needs to be reported */
BUG/MEDIUM: connection: ensure to always report the end of handshakes Despite the previous commit working fine on all tests, it's still not sufficient to completely address the problem. If the connection handler is called with an event validating an L4 connection but some handshakes remain (eg: accept-proxy), it will still wake the function up, which will not report the activity, and will not detect a change once the handshake it complete so it will not notify the ->wake() handler. In fact the only reason why the ->wake() handler is still called here is because after dropping the last handshake, we try to call ->recv() and ->send() in turn and change the flags in order to detect a data activity. But if for any reason the data layer is not interested in reading nor writing, it will not get these events. A cleaner way to address this is to call the ->wake() handler only on definitive status changes (shut, error), on real data activity, and on a complete connection setup, measured as CONNECTED with no more handshake pending. It could be argued that the handshake flags have to be made part of the condition to set CO_FL_CONNECTED but that would currently break a part of the health checks. Also a handshake could appear at any moment even after a connection is established so we'd lose the ability to detect a second end of handshake. For now the situation around CO_FL_CONNECTED is not clean : - session_accept() only sets CO_FL_CONNECTED if there's no pending handshake ; - conn_fd_handler() will set it once L4 and L6 are complete, which will do what session_accept() above refrained from doing even if an accept_proxy handshake is still pending ; - ssl_sock_infocbk() and ssl_sock_handshake() consider that a handshake performed with CO_FL_CONNECTED set is a renegociation ; => they should instead filter on CO_FL_WAIT_L6_CONN - all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before accepting to fetch information => they should also get rid of any pending handshake - smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of CO_FL_ACCEPT_PROXY - health checks (standard and tcp-checks) don't check for HANDSHAKE and may report a successful check based on CO_FL_CONNECTED while not yet done (eg: send buffer full on send_proxy). This patch aims at solving some of these side effects in a backportable way before this is reworked in depth : - we need to call ->wake() to report connection success, measure connection time, notify that the data layer is ready and update the data layer after activity ; this has to be done either if we switch from pending {L4,L6}_CONN to nothing with no handshakes left, or if we notice some handshakes were pending and are now done. - we document that CO_FL_CONNECTED exactly means "L4 connection setup confirmed at least once, L6 connection setup confirmed at least once or not necessary, all this regardless of any possibly remaining handshakes or future L6 negociations". This patch also renames CO_FL_CONN_STATUS to the more explicit CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting in setting an impossible combination of flags to notify the data layer, by simply clearing the current flags. This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
/* flags used to report connection status updates */
CO_FL_WAIT_L4_CONN = 0x00400000, /* waiting for L4 to be connected */
CO_FL_WAIT_L6_CONN = 0x00800000, /* waiting for L6 to be connected (eg: SSL) */
MEDIUM: connection: remove CO_FL_CONNECTED and only rely on CO_FL_WAIT_* Commit 477902bd2e ("MEDIUM: connections: Get ride of the xprt_done callback.") broke the master CLI for a very obscure reason. It happens that short requests immediately terminated by a shutdown are properly received, CS_FL_EOS is correctly set, but in si_cs_recv(), we refrain from setting CF_SHUTR on the channel because CO_FL_CONNECTED was not yet set on the connection since we've not passed again through conn_fd_handler() and it was not done in conn_complete_session(). While commit a8a415d31a ("BUG/MEDIUM: connections: Set CO_FL_CONNECTED in conn_complete_session()") fixed the issue, such accident may happen again as the root cause is deeper and actually comes down to the fact that CO_FL_CONNECTED is lazily set at various check points in the code but not every time we drop one wait bit. It is not the first time we face this situation. Originally this flag was used to detect the transition between WAIT_* and CONNECTED in order to call ->wake() from the FD handler. But since at least 1.8-dev1 with commit 7bf3fa3c23 ("BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer"), CO_FL_CONNECTED is always synchronized against the two others before being checked. Moreover, with the I/Os moved to tasklets, the decision to call the ->wake() function is performed after the I/Os in si_cs_process() and equivalent, which don't care about this transition either. So in essence, checking for CO_FL_CONNECTED has become a lazy wait to check for (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN), but that always relies on someone else having synchronized it. This patch addresses it once for all by killing this flag and only checking the two others (for which a composite mask CO_FL_WAIT_L4L6 was added). This revealed a number of inconsistencies that were purposely not addressed here for the sake of bisectability: - while most places do check both L4+L6 and HANDSHAKE at the same time, some places like assign_server() or back_handle_st_con() and a few sample fetches looking for proxy protocol do check for L4+L6 but don't care about HANDSHAKE ; these ones will probably fail on TCP request session rules if the handshake is not complete. - some handshake handlers do validate that a connection is established at L4 but didn't clear CO_FL_WAIT_L4_CONN - the ->ctl method of mux_fcgi, mux_pt and mux_h1 only checks for L4+L6 before declaring the mux ready while the snd_buf function also checks for the handshake's completion. Likely the former should validate the handshake as well and we should get rid of these extra tests in snd_buf. - raw_sock_from_buf() would directly set CO_FL_CONNECTED and would only later clear CO_FL_WAIT_L4_CONN. - xprt_handshake would set CO_FL_CONNECTED itself without actually clearing CO_FL_WAIT_L4_CONN, which could apparently happen only if waiting for a pure Rx handshake. - most places in ssl_sock that were checking CO_FL_CONNECTED don't need to include the L4 check as an L6 check is enough to decide whether to wait for more info or not. It also becomes obvious when reading the test in si_cs_recv() that caused the failure mentioned above that once converted it doesn't make any sense anymore: having CS_FL_EOS set while still waiting for L4 and L6 to complete cannot happen since for CS_FL_EOS to be set, the other ones must have been validated. Some of these parts will still deserve further cleanup, and some of the observations above may induce some backports of potential bug fixes once totally analyzed in their context. The risk of breaking existing stuff is too high to blindly backport everything.
2020-01-23 03:11:58 -05:00
CO_FL_WAIT_L4L6 = 0x00C00000, /* waiting for L4 and/or L6 to be connected */
/* All the flags below are used for connection handshakes. Any new
* handshake should be added after this point, and CO_FL_HANDSHAKE
* should be updated.
*/
CO_FL_SEND_PROXY = 0x01000000, /* send a valid PROXY protocol header */
CO_FL_ACCEPT_PROXY = 0x02000000, /* receive a valid PROXY protocol header */
CO_FL_ACCEPT_CIP = 0x04000000, /* receive a valid NetScaler Client IP header */
/* below we have all handshake flags grouped into one */
CO_FL_HANDSHAKE = CO_FL_SEND_PROXY | CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP | CO_FL_SOCKS4_SEND | CO_FL_SOCKS4_RECV,
CO_FL_WAIT_XPRT = CO_FL_WAIT_L4_CONN | CO_FL_HANDSHAKE | CO_FL_WAIT_L6_CONN,
CO_FL_SSL_WAIT_HS = 0x08000000, /* wait for an SSL handshake to complete */
/* This connection may not be shared between clients */
CO_FL_PRIVATE = 0x10000000,
/* This flag is used to know that a PROXY protocol header was sent by the client */
CO_FL_RCVD_PROXY = 0x20000000,
/* The connection is unused by its owner */
CO_FL_SESS_IDLE = 0x40000000,
MAJOR: connection: add two new flags to indicate readiness of control/transport Currently the control and transport layers of a connection are supposed to be initialized when their respective pointers are not NULL. This will not work anymore when we plan to reuse connections, because there is an asymmetry between the accept() side and the connect() side : - on accept() side, the fd is set first, then the ctrl layer then the transport layer ; upon error, they must be undone in the reverse order, then the FD must be closed. The FD must not be deleted if the control layer was not yet initialized ; - on the connect() side, the fd is set last and there is no reliable way to know if it has been initialized or not. In practice it's initialized to -1 first but this is hackish and supposes that local FDs only will be used forever. Also, there are even less solutions for keeping trace of the transport layer's state. Also it is possible to support delayed close() when something (eg: logs) tracks some information requiring the transport and/or control layers, making it even more difficult to clean them. So the proposed solution is to add two flags to the connection : - CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert) and cleared after it's released (fd_delete). - CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init) and cleared after it's released (xprt->close). The functions have been adapted to rely on this and not on the pointers anymore. conn_xprt_close() was unused and dangerous : it did not close the control layer (eg: the socket itself) but still marks the transport layer as closed, preventing any future call to conn_full_close() from finishing the job. The problem comes from conn_full_close() in fact. It needs to close the xprt and ctrl layers independantly. After that we're still having an issue : we don't know based on ->ctrl alone whether the fd was registered or not. For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what remains to be done on the connection. In order not to miss some flag assignments, we introduce conn_ctrl_init() to initialize the control layer, register the fd using fd_insert() and set the flag, and conn_ctrl_close() which unregisters the fd and removes the flag, but only if the transport layer was closed. Similarly, at the transport layer, conn_xprt_init() calls ->init and sets the flag, while conn_xprt_close() checks the flag, calls ->close and clears the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init and the ->close functions are called only once each and in the correct order. Note that conn_xprt_close() does nothing if the transport layer is still tracked. conn_full_close() now simply calls conn_xprt_close() then conn_full_close() in turn, which do nothing if CO_FL_XPRT_TRACKED is set. In order to handle the error path, we also provide conn_force_close() which ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers in turns. All relevant instances of fd_delete() have been replaced with conn_force_close(). Now we always know what state the connection is in and we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
/* This last flag indicates that the transport layer is used (for instance
* by logs) and must not be cleared yet. The last call to conn_xprt_close()
* must be done after clearing this flag.
*/
CO_FL_XPRT_TRACKED = 0x80000000,
/* below we have all SOCKS handshake flags grouped into one */
CO_FL_SOCKS4 = CO_FL_SOCKS4_SEND | CO_FL_SOCKS4_RECV,
};
/* possible connection error codes */
enum {
CO_ER_NONE, /* no error */
CO_ER_CONF_FDLIM, /* reached process' configured FD limitation */
CO_ER_PROC_FDLIM, /* reached process' FD limitation */
CO_ER_SYS_FDLIM, /* reached system's FD limitation */
CO_ER_SYS_MEMLIM, /* reached system buffers limitation */
CO_ER_NOPROTO, /* protocol not supported */
CO_ER_SOCK_ERR, /* other socket error */
CO_ER_PORT_RANGE, /* source port range exhausted */
CO_ER_CANT_BIND, /* can't bind to source address */
CO_ER_FREE_PORTS, /* no more free ports on the system */
CO_ER_ADDR_INUSE, /* local address already in use */
CO_ER_PRX_EMPTY, /* nothing received in PROXY protocol header */
CO_ER_PRX_ABORT, /* client abort during PROXY protocol header */
CO_ER_PRX_TIMEOUT, /* timeout while waiting for a PROXY header */
CO_ER_PRX_TRUNCATED, /* truncated PROXY protocol header */
CO_ER_PRX_NOT_HDR, /* not a PROXY protocol header */
CO_ER_PRX_BAD_HDR, /* bad PROXY protocol header */
CO_ER_PRX_BAD_PROTO, /* unsupported protocol in PROXY header */
CO_ER_CIP_EMPTY, /* nothing received in NetScaler Client IP header */
CO_ER_CIP_ABORT, /* client abort during NetScaler Client IP header */
CO_ER_CIP_TIMEOUT, /* timeout while waiting for a NetScaler Client IP header */
CO_ER_CIP_TRUNCATED, /* truncated NetScaler Client IP header */
CO_ER_CIP_BAD_MAGIC, /* bad magic number in NetScaler Client IP header */
CO_ER_CIP_BAD_PROTO, /* unsupported protocol in NetScaler Client IP header */
CO_ER_SSL_EMPTY, /* client closed during SSL handshake */
CO_ER_SSL_ABORT, /* client abort during SSL handshake */
CO_ER_SSL_TIMEOUT, /* timeout during SSL handshake */
CO_ER_SSL_TOO_MANY, /* too many SSL connections */
CO_ER_SSL_NO_MEM, /* no more memory to allocate an SSL connection */
CO_ER_SSL_RENEG, /* forbidden client renegotiation */
CO_ER_SSL_CA_FAIL, /* client cert verification failed in the CA chain */
CO_ER_SSL_CRT_FAIL, /* client cert verification failed on the certificate */
CO_ER_SSL_MISMATCH, /* Server presented an SSL certificate different from the configured one */
CO_ER_SSL_MISMATCH_SNI, /* Server presented an SSL certificate different from the expected one */
CO_ER_SSL_HANDSHAKE, /* SSL error during handshake */
CO_ER_SSL_HANDSHAKE_HB, /* SSL error during handshake with heartbeat present */
CO_ER_SSL_KILLED_HB, /* Stopped a TLSv1 heartbeat attack (CVE-2014-0160) */
CO_ER_SSL_NO_TARGET, /* unknown target (not client nor server) */
CO_ER_SSL_EARLY_FAILED, /* Server refused early data */
CO_ER_SOCKS4_SEND, /* SOCKS4 Proxy write error during handshake */
CO_ER_SOCKS4_RECV, /* SOCKS4 Proxy read error during handshake */
CO_ER_SOCKS4_DENY, /* SOCKS4 Proxy deny the request */
CO_ER_SOCKS4_ABORT, /* SOCKS4 Proxy handshake aborted by server */
};
/* source address settings for outgoing connections */
enum {
/* Tproxy exclusive values from 0 to 7 */
CO_SRC_TPROXY_ADDR = 0x0001, /* bind to this non-local address when connecting */
CO_SRC_TPROXY_CIP = 0x0002, /* bind to the client's IP address when connecting */
CO_SRC_TPROXY_CLI = 0x0003, /* bind to the client's IP+port when connecting */
CO_SRC_TPROXY_DYN = 0x0004, /* bind to a dynamically computed non-local address */
CO_SRC_TPROXY_MASK = 0x0007, /* bind to a non-local address when connecting */
CO_SRC_BIND = 0x0008, /* bind to a specific source address when connecting */
};
/* flags that can be passed to xprt->rcv_buf() and mux->rcv_buf() */
enum {
CO_RFL_BUF_WET = 0x0001, /* Buffer still has some output data present */
CO_RFL_BUF_FLUSH = 0x0002, /* Flush mux's buffers but don't read more data */
CO_RFL_READ_ONCE = 0x0004, /* don't loop even if the request/response is small */
};
/* flags that can be passed to xprt->snd_buf() and mux->snd_buf() */
enum {
CO_SFL_MSG_MORE = 0x0001, /* More data to come afterwards */
CO_SFL_STREAMER = 0x0002, /* Producer is continuously streaming data */
};
/* known transport layers (for ease of lookup) */
enum {
XPRT_RAW = 0,
XPRT_SSL = 1,
XPRT_HANDSHAKE = 2,
XPRT_ENTRIES /* must be last one */
};
/* MUX-specific flags */
enum {
MX_FL_NONE = 0x00000000,
MX_FL_CLEAN_ABRT = 0x00000001, /* abort is clearly reported as an error */
MX_FL_HTX = 0x00000002, /* set if it is an HTX multiplexer */
};
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
/* xprt_ops describes transport-layer operations for a connection. They
* generally run over a socket-based control layer, but not always. Some
* of them are used for data transfer with the upper layer (rcv_*, snd_*)
* and the other ones are used to setup and release the transport layer.
*/
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
struct xprt_ops {
size_t (*rcv_buf)(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags); /* recv callback */
size_t (*snd_buf)(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags); /* send callback */
int (*rcv_pipe)(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count); /* recv-to-pipe callback */
int (*snd_pipe)(struct connection *conn, void *xprt_ctx, struct pipe *pipe); /* send-to-pipe callback */
void (*shutr)(struct connection *conn, void *xprt_ctx, int); /* shutr function */
void (*shutw)(struct connection *conn, void *xprt_ctx, int); /* shutw function */
void (*close)(struct connection *conn, void *xprt_ctx); /* close the transport layer */
int (*init)(struct connection *conn, void **ctx); /* initialize the transport layer */
int (*prepare_bind_conf)(struct bind_conf *conf); /* prepare a whole bind_conf */
void (*destroy_bind_conf)(struct bind_conf *conf); /* destroy a whole bind_conf */
int (*prepare_srv)(struct server *srv); /* prepare a server context */
void (*destroy_srv)(struct server *srv); /* destroy a server context */
int (*get_alpn)(const struct connection *conn, void *xprt_ctx, const char **str, int *len); /* get application layer name */
char name[8]; /* transport layer name, zero-terminated */
int (*subscribe)(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); /* Subscribe <es> to events, such as "being able to send" */
int (*unsubscribe)(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es); /* Unsubscribe <es> from events */
int (*remove_xprt)(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx); /* Remove an xprt from the connection, used by temporary xprt such as the handshake one */
int (*add_xprt)(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops); /* Add a new XPRT as the new xprt, and return the old one */
};
enum mux_ctl_type {
MUX_STATUS, /* Expects an int as output, sets it to a combinaison of MUX_STATUS flags */
};
#define MUX_STATUS_READY (1 << 0)
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
/* mux_ops describes the mux operations, which are to be performed at the
* connection level after data are exchanged with the transport layer in order
* to propagate them to streams. The <init> function will automatically be
* called once the mux is instantiated by the connection's owner at the end
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
* of a transport handshake, when it is about to transfer data and the data
* layer is not ready yet.
*/
struct mux_ops {
int (*init)(struct connection *conn, struct proxy *prx, struct session *sess, struct buffer *input); /* early initialization */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
int (*wake)(struct connection *conn); /* mux-layer callback to report activity, mandatory */
size_t (*rcv_buf)(struct conn_stream *cs, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to get data */
size_t (*snd_buf)(struct conn_stream *cs, struct buffer *buf, size_t count, int flags); /* Called from the upper layer to send data */
int (*rcv_pipe)(struct conn_stream *cs, struct pipe *pipe, unsigned int count); /* recv-to-pipe callback */
int (*snd_pipe)(struct conn_stream *cs, struct pipe *pipe); /* send-to-pipe callback */
void (*shutr)(struct conn_stream *cs, enum cs_shr_mode); /* shutr function */
void (*shutw)(struct conn_stream *cs, enum cs_shw_mode); /* shutw function */
struct conn_stream *(*attach)(struct connection *, struct session *sess); /* Create and attach a conn_stream to an outgoing connection */
const struct conn_stream *(*get_first_cs)(const struct connection *); /* retrieves any valid conn_stream from this connection */
void (*detach)(struct conn_stream *); /* Detach a conn_stream from an outgoing connection, when the request is done */
void (*show_fd)(struct buffer *, struct connection *); /* append some data about connection into chunk for "show fd" */
int (*subscribe)(struct conn_stream *cs, int event_type, struct wait_event *es); /* Subscribe <es> to events, such as "being able to send" */
int (*unsubscribe)(struct conn_stream *cs, int event_type, struct wait_event *es); /* Unsubscribe <es> from events */
int (*avail_streams)(struct connection *conn); /* Returns the number of streams still available for a connection */
int (*used_streams)(struct connection *conn); /* Returns the number of streams in use on a connection. */
void (*destroy)(void *ctx); /* Let the mux know one of its users left, so it may have to disappear */
void (*reset)(struct connection *conn); /* Reset the mux, because we're re-trying to connect */
const struct cs_info *(*get_cs_info)(struct conn_stream *cs); /* Return info on the specified conn_stream or NULL if not defined */
int (*ctl)(struct connection *conn, enum mux_ctl_type mux_ctl, void *arg); /* Provides information about the mux */
int (*takeover)(struct connection *conn); /* Attempts to migrate the connection to the current thread */
unsigned int flags; /* some flags characterizing the mux's capabilities (MX_FL_*) */
MEDIUM: connection: start to introduce a mux layer between xprt and data For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside a connection. After quite a long brainstorming, it appears that the connection interface to the existing streams is appropriate just like the connection interface to the lower layers. In fact we need to have the mux layer in the middle of the connection, between the transport and the data layer. A mux can exist on two directions/sides. On the inbound direction, it instanciates new streams from incoming connections, while on the outbound direction it muxes streams into outgoing connections. The difference is visible on the mux->init() call : in one case, an upper context is already known (outgoing connection), and in the other case, the upper context is not yet known (incoming connection) and will have to be allocated by the mux. The session doesn't have to create the new streams anymore, as this is performed by the mux itself. This patch introduces this and creates a pass-through mux called "mux_pt" which is used for all new connections and which only calls the data layer's recv,send,wake() calls. One incoming stream is immediately created when init() is called on the inbound direction. There should not be any visible impact. Note that the connection's mux is purposely not set until the session is completed so that we don't accidently run with the wrong mux. This must not cause any issue as the xprt_done_cb function is always called prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
char name[8]; /* mux layer name, zero-terminated */
};
/* data_cb describes the data layer's recv and send callbacks which are called
REORG: connection: rename the data layer the "transport layer" While working on the changes required to make the health checks use the new connections, it started to become obvious that some naming was not logical at all in the connections. Specifically, it is not logical to call the "data layer" the layer which is in charge for all the handshake and which does not yet provide a data layer once established until a session has allocated all the required buffers. In fact, it's more a transport layer, which makes much more sense. The transport layer offers a medium on which data can transit, and it offers the functions to move these data when the upper layer requests this. And it is the upper layer which iterates over the transport layer's functions to move data which should be called the data layer. The use case where it's obvious is with embryonic sessions : an incoming SSL connection is accepted. Only the connection is allocated, not the buffers nor stream interface, etc... The connection handles the SSL handshake by itself. Once this handshake is complete, we can't use the data functions because the buffers and stream interface are not there yet. Hence we have to first call a specific function to complete the session initialization, after which we'll be able to use the data functions. This clearly proves that SSL here is only a transport layer and that the stream interface constitutes the data layer. A similar change will be performed to rename app_cb => data, but the two could not be in the same commit for obvious reasons.
2012-10-02 18:19:48 -04:00
* when I/O activity was detected after the transport layer is ready. These
* callbacks are supposed to make use of the xprt_ops above to exchange data
* from/to buffers and pipes. The <wake> callback is used to report activity
* at the transport layer, which can be a connection opening/close, or any
* data movement. It may abort a connection by returning < 0.
*/
struct data_cb {
int (*wake)(struct conn_stream *cs); /* data-layer callback to report activity */
char name[8]; /* data layer name, zero-terminated */
};
struct my_tcphdr {
uint16_t source;
uint16_t dest;
};
/* a connection source profile defines all the parameters needed to properly
* bind an outgoing connection for a server or proxy.
*/
struct conn_src {
unsigned int opts; /* CO_SRC_* */
int iface_len; /* bind interface name length */
char *iface_name; /* bind interface name or NULL */
struct port_range *sport_range; /* optional per-server TCP source ports */
struct sockaddr_storage source_addr; /* the address to which we want to bind for connect() */
#if defined(CONFIG_HAP_TRANSPARENT)
struct sockaddr_storage tproxy_addr; /* non-local address we want to bind to for connect() */
char *bind_hdr_name; /* bind to this header name if defined */
int bind_hdr_len; /* length of the name of the header above */
int bind_hdr_occ; /* occurrence number of header above: >0 = from first, <0 = from end, 0=disabled */
#endif
};
/*
* This structure describes the elements of a connection relevant to a stream
*/
struct conn_stream {
enum obj_type obj_type; /* differentiates connection from applet context */
/* 3 bytes hole here */
unsigned int flags; /* CS_FL_* */
struct connection *conn; /* xprt-level connection */
void *data; /* pointer to upper layer's entity (eg: stream interface) */
const struct data_cb *data_cb; /* data layer callbacks. Must be set before xprt->init() */
void *ctx; /* mux-specific context */
};
/*
* This structure describes the info related to a conn_stream known by the mux
* only but useful for the upper layer.
* For now, only some dates and durations are reported. This structure will
* envolved. But for now, only the bare minimum is referenced.
*/
struct cs_info {
struct timeval create_date; /* Creation date of the conn_stream in user date */
struct timeval tv_create; /* Creation date of the conn_stream in internal date (monotonic) */
long t_handshake; /* handshake duration, -1 if never occurs */
long t_idle; /* idle duration, -1 if never occurs */
};
/* This structure describes a connection with its methods and data.
* A connection may be performed to proxy or server via a local or remote
* socket, and can also be made to an internal applet. It can support
* several transport schemes (raw, ssl, ...). It can support several
* connection control schemes, generally a protocol for socket-oriented
* connections, but other methods for applets.
*/
struct connection {
/* first cache line */
enum obj_type obj_type; /* differentiates connection from applet context */
unsigned char err_code; /* CO_ER_* */
signed short send_proxy_ofs; /* <0 = offset to (re)send from the end, >0 = send all (reused for SOCKS4) */
unsigned int flags; /* CO_FL_* */
const struct protocol *ctrl; /* operations at the socket layer */
const struct xprt_ops *xprt; /* operations at the transport layer */
const struct mux_ops *mux; /* mux layer operations. Must be set before xprt->init() */
void *xprt_ctx; /* general purpose pointer, initialized to NULL */
void *ctx; /* highest level context (usually the mux), initialized to NULL */
void *owner; /* pointer to the owner session, or NULL */
enum obj_type *target; /* the target to connect to (server, proxy, applet, ...) */
/* second cache line */
struct wait_event *subs; /* Task to wake when awaited events are ready */
struct mt_list list; /* attach point to various connection lists (idle, ...) */
struct list session_list; /* List of attached connections to a session */
union conn_handle handle; /* connection handle at the socket layer */
const struct netns_entry *proxy_netns;
/* third cache line and beyond */
void (*destroy_cb)(struct connection *conn); /* callback to notify of imminent death of the connection */
struct sockaddr_storage *src; /* source address (pool), when known, otherwise NULL */
struct sockaddr_storage *dst; /* destination address (pool), when known, otherwise NULL */
char *proxy_authority; /* Value of authority TLV received via PROXYv2 */
unsigned int idle_time; /* Time the connection was added to the idle list, or 0 if not in the idle list */
uint8_t proxy_authority_len; /* Length of authority TLV received via PROXYv2 */
struct ist proxy_unique_id; /* Value of the unique ID TLV received via PROXYv2 */
};
/* PROTO token registration */
enum proto_proxy_mode {
PROTO_MODE_NONE = 0,
PROTO_MODE_TCP = 1 << 0, // must not be changed!
PROTO_MODE_HTTP = 1 << 1, // must not be changed!
PROTO_MODE_ANY = PROTO_MODE_TCP | PROTO_MODE_HTTP,
};
enum proto_proxy_side {
PROTO_SIDE_NONE = 0,
PROTO_SIDE_FE = 1, // same as PR_CAP_FE
PROTO_SIDE_BE = 2, // same as PR_CAP_BE
PROTO_SIDE_BOTH = PROTO_SIDE_FE | PROTO_SIDE_BE,
};
struct mux_proto_list {
const struct ist token; /* token name and length. Empty is catch-all */
enum proto_proxy_mode mode;
enum proto_proxy_side side;
const struct mux_ops *mux;
struct list list;
};
/* proxy protocol v2 definitions */
#define PP2_SIGNATURE "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A"
#define PP2_SIGNATURE_LEN 12
#define PP2_HEADER_LEN 16
/* ver_cmd byte */
#define PP2_CMD_LOCAL 0x00
#define PP2_CMD_PROXY 0x01
#define PP2_CMD_MASK 0x0F
#define PP2_VERSION 0x20
#define PP2_VERSION_MASK 0xF0
/* fam byte */
#define PP2_TRANS_UNSPEC 0x00
#define PP2_TRANS_STREAM 0x01
#define PP2_TRANS_DGRAM 0x02
#define PP2_TRANS_MASK 0x0F
#define PP2_FAM_UNSPEC 0x00
#define PP2_FAM_INET 0x10
#define PP2_FAM_INET6 0x20
#define PP2_FAM_UNIX 0x30
#define PP2_FAM_MASK 0xF0
#define PP2_ADDR_LEN_UNSPEC (0)
#define PP2_ADDR_LEN_INET (4 + 4 + 2 + 2)
#define PP2_ADDR_LEN_INET6 (16 + 16 + 2 + 2)
#define PP2_ADDR_LEN_UNIX (108 + 108)
#define PP2_HDR_LEN_UNSPEC (PP2_HEADER_LEN + PP2_ADDR_LEN_UNSPEC)
#define PP2_HDR_LEN_INET (PP2_HEADER_LEN + PP2_ADDR_LEN_INET)
#define PP2_HDR_LEN_INET6 (PP2_HEADER_LEN + PP2_ADDR_LEN_INET6)
#define PP2_HDR_LEN_UNIX (PP2_HEADER_LEN + PP2_ADDR_LEN_UNIX)
struct proxy_hdr_v2 {
uint8_t sig[12]; /* hex 0D 0A 0D 0A 00 0D 0A 51 55 49 54 0A */
uint8_t ver_cmd; /* protocol version and command */
uint8_t fam; /* protocol family and transport */
uint16_t len; /* number of following bytes part of the header */
union {
struct { /* for TCP/UDP over IPv4, len = 12 */
uint32_t src_addr;
uint32_t dst_addr;
uint16_t src_port;
uint16_t dst_port;
} ip4;
struct { /* for TCP/UDP over IPv6, len = 36 */
uint8_t src_addr[16];
uint8_t dst_addr[16];
uint16_t src_port;
uint16_t dst_port;
} ip6;
struct { /* for AF_UNIX sockets, len = 216 */
uint8_t src_addr[108];
uint8_t dst_addr[108];
} unx;
} addr;
};
#define PP2_TYPE_ALPN 0x01
#define PP2_TYPE_AUTHORITY 0x02
#define PP2_TYPE_CRC32C 0x03
#define PP2_TYPE_NOOP 0x04
#define PP2_TYPE_UNIQUE_ID 0x05
#define PP2_TYPE_SSL 0x20
#define PP2_SUBTYPE_SSL_VERSION 0x21
#define PP2_SUBTYPE_SSL_CN 0x22
#define PP2_SUBTYPE_SSL_CIPHER 0x23
#define PP2_SUBTYPE_SSL_SIG_ALG 0x24
#define PP2_SUBTYPE_SSL_KEY_ALG 0x25
#define PP2_TYPE_NETNS 0x30
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
#define TLV_HEADER_SIZE 3
struct tlv {
uint8_t type;
uint8_t length_hi;
uint8_t length_lo;
uint8_t value[0];
}__attribute__((packed));
struct tlv_ssl {
struct tlv tlv;
uint8_t client;
uint32_t verify;
uint8_t sub_tlv[0];
}__attribute__((packed));
#define PP2_CLIENT_SSL 0x01
#define PP2_CLIENT_CERT_CONN 0x02
#define PP2_CLIENT_CERT_SESS 0x04
/* Max length of the authority TLV */
#define PP2_AUTHORITY_MAX 255
/*
* Linux seems to be able to send 253 fds per sendmsg(), not sure
* about the other OSes.
*/
/* Max number of file descriptors we send in one sendmsg() */
#define MAX_SEND_FD 253
#define SOCKS4_HS_RSP_LEN 8
#endif /* _TYPES_CONNECTION_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/