2012-07-06 08:13:49 -04:00
|
|
|
/*
|
|
|
|
|
* Connection management functions
|
|
|
|
|
*
|
|
|
|
|
* Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2012-10-04 18:10:55 -04:00
|
|
|
#include <errno.h>
|
|
|
|
|
|
2012-07-06 08:13:49 -04:00
|
|
|
#include <common/compat.h>
|
|
|
|
|
#include <common/config.h>
|
2014-11-17 09:11:45 -05:00
|
|
|
#include <common/namespace.h>
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2012-08-24 12:12:41 -04:00
|
|
|
#include <proto/connection.h>
|
2012-09-02 16:34:23 -04:00
|
|
|
#include <proto/fd.h>
|
2012-10-04 17:55:57 -04:00
|
|
|
#include <proto/frontend.h>
|
2012-07-23 09:07:23 -04:00
|
|
|
#include <proto/proto_tcp.h>
|
2012-07-06 11:12:34 -04:00
|
|
|
#include <proto/stream_interface.h>
|
2017-01-05 09:11:44 -05:00
|
|
|
#include <proto/sample.h>
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2012-05-18 09:47:34 -04:00
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
#include <proto/ssl_sock.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2012-10-26 14:10:28 -04:00
|
|
|
struct pool_head *pool2_connection;
|
2016-12-22 14:25:26 -05:00
|
|
|
struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };
|
2012-10-26 14:10:28 -04:00
|
|
|
|
2017-09-21 13:40:52 -04:00
|
|
|
/* List head of all known muxes for ALPN */
|
|
|
|
|
struct alpn_mux_list alpn_mux_list = {
|
|
|
|
|
.list = LIST_HEAD_INIT(alpn_mux_list.list)
|
|
|
|
|
};
|
|
|
|
|
|
2012-10-26 14:10:28 -04:00
|
|
|
/* perform minimal intializations, report 0 in case of error, 1 if OK. */
|
|
|
|
|
int init_connection()
|
|
|
|
|
{
|
|
|
|
|
pool2_connection = create_pool("connection", sizeof (struct connection), MEM_F_SHARED);
|
|
|
|
|
return pool2_connection != NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2012-07-06 08:13:49 -04:00
|
|
|
/* I/O callback for fd-based connections. It calls the read/write handlers
|
2016-04-14 05:13:20 -04:00
|
|
|
* provided by the connection's sock_ops, which must be valid.
|
2012-07-06 08:13:49 -04:00
|
|
|
*/
|
2016-04-14 05:13:20 -04:00
|
|
|
void conn_fd_handler(int fd)
|
2012-07-06 08:13:49 -04:00
|
|
|
{
|
2012-07-06 08:54:49 -04:00
|
|
|
struct connection *conn = fdtab[fd].owner;
|
2012-10-03 15:04:48 -04:00
|
|
|
unsigned int flags;
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2012-07-12 09:32:13 -04:00
|
|
|
if (unlikely(!conn))
|
2016-04-14 05:13:20 -04:00
|
|
|
return;
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2012-12-16 13:19:13 -05:00
|
|
|
conn_refresh_polling_flags(conn);
|
2017-10-25 03:22:43 -04:00
|
|
|
conn->flags |= CO_FL_WILL_UPDATE;
|
|
|
|
|
|
2012-12-16 13:19:13 -05:00
|
|
|
flags = conn->flags & ~CO_FL_ERROR; /* ensure to call the wake handler upon error */
|
2012-12-10 10:33:38 -05:00
|
|
|
|
2012-07-12 09:32:13 -04:00
|
|
|
process_handshake:
|
2012-08-17 11:33:53 -04:00
|
|
|
/* The handshake callbacks are called in sequence. If either of them is
|
|
|
|
|
* missing something, it must enable the required polling at the socket
|
|
|
|
|
* layer of the connection. Polling state is not guaranteed when entering
|
|
|
|
|
* these handlers, so any handshake handler which does not complete its
|
2013-11-25 02:41:15 -05:00
|
|
|
* work must explicitly disable events it's not interested in. Error
|
|
|
|
|
* handling is also performed here in order to reduce the number of tests
|
|
|
|
|
* around.
|
2012-08-17 11:33:53 -04:00
|
|
|
*/
|
2013-11-25 02:41:15 -05:00
|
|
|
while (unlikely(conn->flags & (CO_FL_HANDSHAKE | CO_FL_ERROR))) {
|
2014-01-22 13:46:33 -05:00
|
|
|
if (unlikely(conn->flags & CO_FL_ERROR))
|
2012-07-06 11:12:34 -04:00
|
|
|
goto leave;
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2016-06-04 10:11:10 -04:00
|
|
|
if (conn->flags & CO_FL_ACCEPT_CIP)
|
|
|
|
|
if (!conn_recv_netscaler_cip(conn, CO_FL_ACCEPT_CIP))
|
|
|
|
|
goto leave;
|
|
|
|
|
|
2012-08-31 11:43:29 -04:00
|
|
|
if (conn->flags & CO_FL_ACCEPT_PROXY)
|
|
|
|
|
if (!conn_recv_proxy(conn, CO_FL_ACCEPT_PROXY))
|
|
|
|
|
goto leave;
|
|
|
|
|
|
2013-10-24 16:01:26 -04:00
|
|
|
if (conn->flags & CO_FL_SEND_PROXY)
|
|
|
|
|
if (!conn_si_send_proxy(conn, CO_FL_SEND_PROXY))
|
2012-10-04 17:55:57 -04:00
|
|
|
goto leave;
|
2012-05-18 09:47:34 -04:00
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
if (conn->flags & CO_FL_SSL_WAIT_HS)
|
|
|
|
|
if (!ssl_sock_handshake(conn, CO_FL_SSL_WAIT_HS))
|
|
|
|
|
goto leave;
|
|
|
|
|
#endif
|
2012-07-12 09:32:13 -04:00
|
|
|
}
|
|
|
|
|
|
2012-08-17 11:33:53 -04:00
|
|
|
/* Once we're purely in the data phase, we disable handshake polling */
|
|
|
|
|
if (!(conn->flags & CO_FL_POLL_SOCK))
|
|
|
|
|
__conn_sock_stop_both(conn);
|
2012-07-12 09:32:13 -04:00
|
|
|
|
2017-08-28 09:46:01 -04:00
|
|
|
/* The connection owner might want to be notified about an end of
|
|
|
|
|
* handshake indicating the connection is ready, before we proceed with
|
|
|
|
|
* any data exchange. The callback may fail and cause the connection to
|
|
|
|
|
* be destroyed, thus we must not use it anymore and should immediately
|
|
|
|
|
* leave instead. The caller must immediately unregister itself once
|
|
|
|
|
* called.
|
2012-08-31 10:01:23 -04:00
|
|
|
*/
|
2017-08-28 09:46:01 -04:00
|
|
|
if (conn->xprt_done_cb && conn->xprt_done_cb(conn) < 0)
|
2016-04-14 05:13:20 -04:00
|
|
|
return;
|
2012-08-31 10:01:23 -04:00
|
|
|
|
2017-04-11 13:59:33 -04:00
|
|
|
if (conn->xprt && fd_send_ready(fd) &&
|
2017-09-13 12:30:23 -04:00
|
|
|
((conn->flags & (CO_FL_XPRT_WR_ENA|CO_FL_ERROR|CO_FL_HANDSHAKE)) == CO_FL_XPRT_WR_ENA)) {
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
/* force reporting of activity by clearing the previous flags :
|
|
|
|
|
* we'll have at least ERROR or CONNECTED at the end of an I/O,
|
|
|
|
|
* both of which will be detected below.
|
2012-10-03 15:04:48 -04:00
|
|
|
*/
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
flags = 0;
|
MEDIUM: connection: start to introduce a mux layer between xprt and data
For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside
a connection. After quite a long brainstorming, it appears that the
connection interface to the existing streams is appropriate just like
the connection interface to the lower layers. In fact we need to have
the mux layer in the middle of the connection, between the transport
and the data layer.
A mux can exist on two directions/sides. On the inbound direction, it
instanciates new streams from incoming connections, while on the outbound
direction it muxes streams into outgoing connections. The difference is
visible on the mux->init() call : in one case, an upper context is already
known (outgoing connection), and in the other case, the upper context is
not yet known (incoming connection) and will have to be allocated by the
mux. The session doesn't have to create the new streams anymore, as this
is performed by the mux itself.
This patch introduces this and creates a pass-through mux called
"mux_pt" which is used for all new connections and which only
calls the data layer's recv,send,wake() calls. One incoming stream
is immediately created when init() is called on the inbound direction.
There should not be any visible impact.
Note that the connection's mux is purposely not set until the session
is completed so that we don't accidently run with the wrong mux. This
must not cause any issue as the xprt_done_cb function is always called
prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
|
|
|
conn->mux->send(conn);
|
2012-10-03 15:04:48 -04:00
|
|
|
}
|
2012-07-06 08:13:49 -04:00
|
|
|
|
2017-04-11 13:59:33 -04:00
|
|
|
/* The data transfer starts here and stops on error and handshakes. Note
|
|
|
|
|
* that we must absolutely test conn->xprt at each step in case it suddenly
|
|
|
|
|
* changes due to a quick unexpected close().
|
|
|
|
|
*/
|
|
|
|
|
if (conn->xprt && fd_recv_ready(fd) &&
|
2017-09-13 12:30:23 -04:00
|
|
|
((conn->flags & (CO_FL_XPRT_RD_ENA|CO_FL_WAIT_ROOM|CO_FL_ERROR|CO_FL_HANDSHAKE)) == CO_FL_XPRT_RD_ENA)) {
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
/* force reporting of activity by clearing the previous flags :
|
|
|
|
|
* we'll have at least ERROR or CONNECTED at the end of an I/O,
|
|
|
|
|
* both of which will be detected below.
|
2012-10-03 15:04:48 -04:00
|
|
|
*/
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
flags = 0;
|
MEDIUM: connection: start to introduce a mux layer between xprt and data
For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside
a connection. After quite a long brainstorming, it appears that the
connection interface to the existing streams is appropriate just like
the connection interface to the lower layers. In fact we need to have
the mux layer in the middle of the connection, between the transport
and the data layer.
A mux can exist on two directions/sides. On the inbound direction, it
instanciates new streams from incoming connections, while on the outbound
direction it muxes streams into outgoing connections. The difference is
visible on the mux->init() call : in one case, an upper context is already
known (outgoing connection), and in the other case, the upper context is
not yet known (incoming connection) and will have to be allocated by the
mux. The session doesn't have to create the new streams anymore, as this
is performed by the mux itself.
This patch introduces this and creates a pass-through mux called
"mux_pt" which is used for all new connections and which only
calls the data layer's recv,send,wake() calls. One incoming stream
is immediately created when init() is called on the inbound direction.
There should not be any visible impact.
Note that the connection's mux is purposely not set until the session
is completed so that we don't accidently run with the wrong mux. This
must not cause any issue as the xprt_done_cb function is always called
prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
|
|
|
conn->mux->recv(conn);
|
2012-10-03 15:04:48 -04:00
|
|
|
}
|
2012-07-23 09:07:23 -04:00
|
|
|
|
2012-07-12 09:32:13 -04:00
|
|
|
/* It may happen during the data phase that a handshake is
|
|
|
|
|
* enabled again (eg: SSL)
|
|
|
|
|
*/
|
2013-11-25 02:41:15 -05:00
|
|
|
if (unlikely(conn->flags & (CO_FL_HANDSHAKE | CO_FL_ERROR)))
|
2012-07-12 09:32:13 -04:00
|
|
|
goto process_handshake;
|
|
|
|
|
|
2014-01-20 09:13:07 -05:00
|
|
|
if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN)) {
|
2012-09-01 11:59:22 -04:00
|
|
|
/* still waiting for a connection to establish and nothing was
|
|
|
|
|
* attempted yet to probe the connection. Then let's retry the
|
|
|
|
|
* connect().
|
2012-07-23 09:07:23 -04:00
|
|
|
*/
|
2012-07-23 12:53:03 -04:00
|
|
|
if (!tcp_connect_probe(conn))
|
2012-08-09 08:45:22 -04:00
|
|
|
goto leave;
|
2012-07-23 09:07:23 -04:00
|
|
|
}
|
2012-07-06 11:12:34 -04:00
|
|
|
leave:
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
/* Verify if the connection just established. */
|
BUG/MAJOR: connection: update CO_FL_CONNECTED before calling the data layer
Matthias Fechner reported a regression in 1.7.3 brought by the backport
of commit 819efbf ("BUG/MEDIUM: tcp: don't poll for write when connect()
succeeds"), causing some connections to fail to establish once in a while.
While this commit itself was a fix for a bad sequencing of connection
events, it in fact unveiled a much deeper bug going back to the connection
rework era in v1.5-dev12 : 8f8c92f ("MAJOR: connection: add a new
CO_FL_CONNECTED flag").
It's worth noting that in a lab reproducing a similar environment as
Matthias' about only 1 every 19000 connections exhibit this behaviour,
making the issue not so easy to observe. A trick to make the problem
more observable consists in disabling non-blocking mode on the socket
before calling connect() and re-enabling it later, so that connect()
always succeeds. Then it becomes 100% reproducible.
The problem is that this CO_FL_CONNECTED flag is tested after deciding to
call the data layer (typically the stream interface but might be a health
check as well), and that the decision to call the data layer relies on a
change of one of the flags covered by the CO_FL_CONN_STATE set, which is
made of CO_FL_CONNECTED among others.
Before the fix above, this bug couldn't appear with TCP but it could
appear with Unix sockets. Indeed, connect() was always considered
blocking so the CO_FL_WAIT_L4_CONN connection flag was always set, and
polling for write events was always enabled. This used to guarantee that
the conn_fd_handler() could detect a change among the CO_FL_CONN_STATE
flags.
Now with the fix above, if a connect() immediately succeeds for non-ssl
connection with send-proxy enabled, and no data in the buffer (thus TCP
mode only), the CO_FL_WAIT_L4_CONN flag is not set, the lack of data in
the buffer doesn't enable polling flags for the data layer, the
CO_FL_CONNECTED flag is not set due to send-proxy still being pending,
and once send-proxy is done, its completion doesn't cause the data layer
to be woken up due to the fact that CO_FL_CONNECT is still not present
and that the CO_FL_SEND_PROXY flag is not watched in CO_FL_CONN_STATE.
Then no progress is made when data are received from the client (and
attempted to be forwarded), because a CF_WRITE_NULL (or CF_WRITE_PARTIAL)
flag is needed for the stream-interface state to turn from SI_ST_CON to
SI_ST_EST, allowing ->chk_snd() to be called when new data arrive. And
the only way to set this flag is to call the data layer of course.
After the connect timeout, the connection gets killed and if in the mean
time some data have accumulated in the buffer, the retry will succeed.
This patch fixes this situation by simply placing the update of
CO_FL_CONNECTED where it should have been, before the check for a flag
change needed to wake up the data layer and not after.
This fix must be backported to 1.7, 1.6 and 1.5. Versions not having
the patch above are still affected for unix sockets.
Special thanks to Matthias Fechner who provided a very detailed bug
report with a bisection designating the faulty patch, and to Olivier
Houchard for providing full access to a pretty similar environment where
the issue could first be reproduced.
2017-03-14 15:19:29 -04:00
|
|
|
if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED))))
|
|
|
|
|
conn->flags |= CO_FL_CONNECTED;
|
|
|
|
|
|
2017-08-28 09:46:01 -04:00
|
|
|
/* The connection owner might want to be notified about failures to
|
|
|
|
|
* complete the handshake. The callback may fail and cause the
|
|
|
|
|
* connection to be destroyed, thus we must not use it anymore and
|
|
|
|
|
* should immediately leave instead. The caller must immediately
|
|
|
|
|
* unregister itself once called.
|
|
|
|
|
*/
|
|
|
|
|
if (((conn->flags ^ flags) & CO_FL_NOTIFY_DONE) &&
|
|
|
|
|
conn->xprt_done_cb && conn->xprt_done_cb(conn) < 0)
|
|
|
|
|
return;
|
|
|
|
|
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
/* The wake callback is normally used to notify the data layer about
|
|
|
|
|
* data layer activity (successful send/recv), connection establishment,
|
|
|
|
|
* shutdown and fatal errors. We need to consider the following
|
|
|
|
|
* situations to wake up the data layer :
|
|
|
|
|
* - change among the CO_FL_NOTIFY_DATA flags :
|
|
|
|
|
* {DATA,SOCK}_{RD,WR}_SH, ERROR,
|
|
|
|
|
* - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
|
|
|
|
|
* end of handshake and transition to CONNECTED
|
|
|
|
|
* - raise of CONNECTED with HANDSHAKE down
|
|
|
|
|
* - end of HANDSHAKE with CONNECTED set
|
|
|
|
|
* - regular data layer activity
|
|
|
|
|
*
|
|
|
|
|
* Note that the wake callback is allowed to release the connection and
|
|
|
|
|
* the fd (and return < 0 in this case).
|
2012-10-03 15:12:16 -04:00
|
|
|
*/
|
2017-03-18 10:39:57 -04:00
|
|
|
if ((((conn->flags ^ flags) & CO_FL_NOTIFY_DATA) ||
|
BUG/MEDIUM: connection: ensure to always report the end of handshakes
Despite the previous commit working fine on all tests, it's still not
sufficient to completely address the problem. If the connection handler
is called with an event validating an L4 connection but some handshakes
remain (eg: accept-proxy), it will still wake the function up, which
will not report the activity, and will not detect a change once the
handshake it complete so it will not notify the ->wake() handler.
In fact the only reason why the ->wake() handler is still called here
is because after dropping the last handshake, we try to call ->recv()
and ->send() in turn and change the flags in order to detect a data
activity. But if for any reason the data layer is not interested in
reading nor writing, it will not get these events.
A cleaner way to address this is to call the ->wake() handler only
on definitive status changes (shut, error), on real data activity,
and on a complete connection setup, measured as CONNECTED with no
more handshake pending.
It could be argued that the handshake flags have to be made part of
the condition to set CO_FL_CONNECTED but that would currently break
a part of the health checks. Also a handshake could appear at any
moment even after a connection is established so we'd lose the
ability to detect a second end of handshake.
For now the situation around CO_FL_CONNECTED is not clean :
- session_accept() only sets CO_FL_CONNECTED if there's no pending
handshake ;
- conn_fd_handler() will set it once L4 and L6 are complete, which
will do what session_accept() above refrained from doing even if
an accept_proxy handshake is still pending ;
- ssl_sock_infocbk() and ssl_sock_handshake() consider that a
handshake performed with CO_FL_CONNECTED set is a renegociation ;
=> they should instead filter on CO_FL_WAIT_L6_CONN
- all ssl_fc_* sample fetch functions wait for CO_FL_CONNECTED before
accepting to fetch information
=> they should also get rid of any pending handshake
- smp_fetch_fc_rcvd_proxy() uses !CO_FL_CONNECTED instead of
CO_FL_ACCEPT_PROXY
- health checks (standard and tcp-checks) don't check for HANDSHAKE
and may report a successful check based on CO_FL_CONNECTED while
not yet done (eg: send buffer full on send_proxy).
This patch aims at solving some of these side effects in a backportable
way before this is reworked in depth :
- we need to call ->wake() to report connection success, measure
connection time, notify that the data layer is ready and update
the data layer after activity ; this has to be done either if
we switch from pending {L4,L6}_CONN to nothing with no handshakes
left, or if we notice some handshakes were pending and are now
done.
- we document that CO_FL_CONNECTED exactly means "L4 connection
setup confirmed at least once, L6 connection setup confirmed
at least once or not necessary, all this regardless of any
possibly remaining handshakes or future L6 negociations".
This patch also renames CO_FL_CONN_STATUS to the more explicit
CO_FL_NOTIFY_DATA, and works around the previous flags trick consiting
in setting an impossible combination of flags to notify the data layer,
by simply clearing the current flags.
This fix should be backported to 1.7, 1.6 and 1.5.
2017-03-19 02:54:28 -04:00
|
|
|
((flags & (CO_FL_CONNECTED|CO_FL_HANDSHAKE)) != CO_FL_CONNECTED &&
|
|
|
|
|
(conn->flags & (CO_FL_CONNECTED|CO_FL_HANDSHAKE)) == CO_FL_CONNECTED)) &&
|
MEDIUM: connection: start to introduce a mux layer between xprt and data
For HTTP/2 and QUIC, we'll need to deal with multiplexed streams inside
a connection. After quite a long brainstorming, it appears that the
connection interface to the existing streams is appropriate just like
the connection interface to the lower layers. In fact we need to have
the mux layer in the middle of the connection, between the transport
and the data layer.
A mux can exist on two directions/sides. On the inbound direction, it
instanciates new streams from incoming connections, while on the outbound
direction it muxes streams into outgoing connections. The difference is
visible on the mux->init() call : in one case, an upper context is already
known (outgoing connection), and in the other case, the upper context is
not yet known (incoming connection) and will have to be allocated by the
mux. The session doesn't have to create the new streams anymore, as this
is performed by the mux itself.
This patch introduces this and creates a pass-through mux called
"mux_pt" which is used for all new connections and which only
calls the data layer's recv,send,wake() calls. One incoming stream
is immediately created when init() is called on the inbound direction.
There should not be any visible impact.
Note that the connection's mux is purposely not set until the session
is completed so that we don't accidently run with the wrong mux. This
must not cause any issue as the xprt_done_cb function is always called
prior to using mux's recv/send functions.
2017-08-28 04:53:00 -04:00
|
|
|
conn->mux->wake(conn) < 0)
|
2016-04-14 05:13:20 -04:00
|
|
|
return;
|
2012-07-23 12:24:25 -04:00
|
|
|
|
2012-07-23 06:14:26 -04:00
|
|
|
/* remove the events before leaving */
|
2012-12-06 18:09:43 -05:00
|
|
|
fdtab[fd].ev &= FD_POLL_STICKY;
|
2012-08-17 11:33:53 -04:00
|
|
|
|
|
|
|
|
/* commit polling changes */
|
2017-10-25 03:22:43 -04:00
|
|
|
conn->flags &= ~CO_FL_WILL_UPDATE;
|
2012-08-17 11:33:53 -04:00
|
|
|
conn_cond_update_polling(conn);
|
2016-04-14 05:13:20 -04:00
|
|
|
return;
|
2012-07-06 08:13:49 -04:00
|
|
|
}
|
2012-08-17 05:55:04 -04:00
|
|
|
|
2012-09-01 11:26:16 -04:00
|
|
|
/* Update polling on connection <c>'s file descriptor depending on its current
|
|
|
|
|
* state as reported in the connection's CO_FL_CURR_* flags, reports of EAGAIN
|
2017-09-13 12:30:23 -04:00
|
|
|
* in CO_FL_WAIT_*, and the data layer expectations indicated by CO_FL_XPRT_*.
|
2012-09-01 11:26:16 -04:00
|
|
|
* The connection flags are updated with the new flags at the end of the
|
2012-10-04 16:21:15 -04:00
|
|
|
* operation. Polling is totally disabled if an error was reported.
|
2012-08-17 05:55:04 -04:00
|
|
|
*/
|
2017-09-13 12:30:23 -04:00
|
|
|
void conn_update_xprt_polling(struct connection *c)
|
2012-08-17 05:55:04 -04:00
|
|
|
{
|
2012-09-01 11:26:16 -04:00
|
|
|
unsigned int f = c->flags;
|
2012-08-17 05:55:04 -04:00
|
|
|
|
2014-01-23 07:50:42 -05:00
|
|
|
if (!conn_ctrl_ready(c))
|
MAJOR: connection: add two new flags to indicate readiness of control/transport
Currently the control and transport layers of a connection are supposed
to be initialized when their respective pointers are not NULL. This will
not work anymore when we plan to reuse connections, because there is an
asymmetry between the accept() side and the connect() side :
- on accept() side, the fd is set first, then the ctrl layer then the
transport layer ; upon error, they must be undone in the reverse order,
then the FD must be closed. The FD must not be deleted if the control
layer was not yet initialized ;
- on the connect() side, the fd is set last and there is no reliable way
to know if it has been initialized or not. In practice it's initialized
to -1 first but this is hackish and supposes that local FDs only will
be used forever. Also, there are even less solutions for keeping trace
of the transport layer's state.
Also it is possible to support delayed close() when something (eg: logs)
tracks some information requiring the transport and/or control layers,
making it even more difficult to clean them.
So the proposed solution is to add two flags to the connection :
- CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert)
and cleared after it's released (fd_delete).
- CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init)
and cleared after it's released (xprt->close).
The functions have been adapted to rely on this and not on the pointers
anymore. conn_xprt_close() was unused and dangerous : it did not close
the control layer (eg: the socket itself) but still marks the transport
layer as closed, preventing any future call to conn_full_close() from
finishing the job.
The problem comes from conn_full_close() in fact. It needs to close the
xprt and ctrl layers independantly. After that we're still having an issue :
we don't know based on ->ctrl alone whether the fd was registered or not.
For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We
now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what
remains to be done on the connection.
In order not to miss some flag assignments, we introduce conn_ctrl_init()
to initialize the control layer, register the fd using fd_insert() and set
the flag, and conn_ctrl_close() which unregisters the fd and removes the
flag, but only if the transport layer was closed.
Similarly, at the transport layer, conn_xprt_init() calls ->init and sets
the flag, while conn_xprt_close() checks the flag, calls ->close and clears
the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init
and the ->close functions are called only once each and in the correct order.
Note that conn_xprt_close() does nothing if the transport layer is still
tracked.
conn_full_close() now simply calls conn_xprt_close() then conn_full_close()
in turn, which do nothing if CO_FL_XPRT_TRACKED is set.
In order to handle the error path, we also provide conn_force_close() which
ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers
in turns. All relevant instances of fd_delete() have been replaced with
conn_force_close(). Now we always know what state the connection is in and
we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
|
|
|
return;
|
|
|
|
|
|
2012-08-17 05:55:04 -04:00
|
|
|
/* update read status if needed */
|
2017-09-13 12:30:23 -04:00
|
|
|
if (unlikely((f & (CO_FL_CURR_RD_ENA|CO_FL_XPRT_RD_ENA)) == CO_FL_XPRT_RD_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_want_recv(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f |= CO_FL_CURR_RD_ENA;
|
|
|
|
|
}
|
2017-09-13 12:30:23 -04:00
|
|
|
else if (unlikely((f & (CO_FL_CURR_RD_ENA|CO_FL_XPRT_RD_ENA)) == CO_FL_CURR_RD_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_stop_recv(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f &= ~CO_FL_CURR_RD_ENA;
|
2012-09-01 11:26:16 -04:00
|
|
|
}
|
2012-08-17 05:55:04 -04:00
|
|
|
|
|
|
|
|
/* update write status if needed */
|
2017-09-13 12:30:23 -04:00
|
|
|
if (unlikely((f & (CO_FL_CURR_WR_ENA|CO_FL_XPRT_WR_ENA)) == CO_FL_XPRT_WR_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_want_send(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f |= CO_FL_CURR_WR_ENA;
|
|
|
|
|
}
|
2017-09-13 12:30:23 -04:00
|
|
|
else if (unlikely((f & (CO_FL_CURR_WR_ENA|CO_FL_XPRT_WR_ENA)) == CO_FL_CURR_WR_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_stop_send(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f &= ~CO_FL_CURR_WR_ENA;
|
2012-09-01 11:26:16 -04:00
|
|
|
}
|
2014-01-22 13:46:33 -05:00
|
|
|
c->flags = f;
|
2012-09-01 11:26:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Update polling on connection <c>'s file descriptor depending on its current
|
|
|
|
|
* state as reported in the connection's CO_FL_CURR_* flags, reports of EAGAIN
|
|
|
|
|
* in CO_FL_WAIT_*, and the sock layer expectations indicated by CO_FL_SOCK_*.
|
|
|
|
|
* The connection flags are updated with the new flags at the end of the
|
2012-10-04 16:21:15 -04:00
|
|
|
* operation. Polling is totally disabled if an error was reported.
|
2012-09-01 11:26:16 -04:00
|
|
|
*/
|
|
|
|
|
void conn_update_sock_polling(struct connection *c)
|
|
|
|
|
{
|
|
|
|
|
unsigned int f = c->flags;
|
2012-08-17 05:55:04 -04:00
|
|
|
|
2014-01-23 07:50:42 -05:00
|
|
|
if (!conn_ctrl_ready(c))
|
MAJOR: connection: add two new flags to indicate readiness of control/transport
Currently the control and transport layers of a connection are supposed
to be initialized when their respective pointers are not NULL. This will
not work anymore when we plan to reuse connections, because there is an
asymmetry between the accept() side and the connect() side :
- on accept() side, the fd is set first, then the ctrl layer then the
transport layer ; upon error, they must be undone in the reverse order,
then the FD must be closed. The FD must not be deleted if the control
layer was not yet initialized ;
- on the connect() side, the fd is set last and there is no reliable way
to know if it has been initialized or not. In practice it's initialized
to -1 first but this is hackish and supposes that local FDs only will
be used forever. Also, there are even less solutions for keeping trace
of the transport layer's state.
Also it is possible to support delayed close() when something (eg: logs)
tracks some information requiring the transport and/or control layers,
making it even more difficult to clean them.
So the proposed solution is to add two flags to the connection :
- CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert)
and cleared after it's released (fd_delete).
- CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init)
and cleared after it's released (xprt->close).
The functions have been adapted to rely on this and not on the pointers
anymore. conn_xprt_close() was unused and dangerous : it did not close
the control layer (eg: the socket itself) but still marks the transport
layer as closed, preventing any future call to conn_full_close() from
finishing the job.
The problem comes from conn_full_close() in fact. It needs to close the
xprt and ctrl layers independantly. After that we're still having an issue :
we don't know based on ->ctrl alone whether the fd was registered or not.
For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We
now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what
remains to be done on the connection.
In order not to miss some flag assignments, we introduce conn_ctrl_init()
to initialize the control layer, register the fd using fd_insert() and set
the flag, and conn_ctrl_close() which unregisters the fd and removes the
flag, but only if the transport layer was closed.
Similarly, at the transport layer, conn_xprt_init() calls ->init and sets
the flag, while conn_xprt_close() checks the flag, calls ->close and clears
the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init
and the ->close functions are called only once each and in the correct order.
Note that conn_xprt_close() does nothing if the transport layer is still
tracked.
conn_full_close() now simply calls conn_xprt_close() then conn_full_close()
in turn, which do nothing if CO_FL_XPRT_TRACKED is set.
In order to handle the error path, we also provide conn_force_close() which
ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers
in turns. All relevant instances of fd_delete() have been replaced with
conn_force_close(). Now we always know what state the connection is in and
we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
|
|
|
return;
|
|
|
|
|
|
2012-09-01 11:26:16 -04:00
|
|
|
/* update read status if needed */
|
2014-01-22 13:46:33 -05:00
|
|
|
if (unlikely((f & (CO_FL_CURR_RD_ENA|CO_FL_SOCK_RD_ENA)) == CO_FL_SOCK_RD_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_want_recv(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f |= CO_FL_CURR_RD_ENA;
|
|
|
|
|
}
|
|
|
|
|
else if (unlikely((f & (CO_FL_CURR_RD_ENA|CO_FL_SOCK_RD_ENA)) == CO_FL_CURR_RD_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_stop_recv(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f &= ~CO_FL_CURR_RD_ENA;
|
2012-09-01 11:26:16 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* update write status if needed */
|
2014-01-22 13:46:33 -05:00
|
|
|
if (unlikely((f & (CO_FL_CURR_WR_ENA|CO_FL_SOCK_WR_ENA)) == CO_FL_SOCK_WR_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_want_send(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f |= CO_FL_CURR_WR_ENA;
|
|
|
|
|
}
|
|
|
|
|
else if (unlikely((f & (CO_FL_CURR_WR_ENA|CO_FL_SOCK_WR_ENA)) == CO_FL_CURR_WR_ENA)) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_stop_send(c->handle.fd);
|
2012-11-05 11:52:26 -05:00
|
|
|
f &= ~CO_FL_CURR_WR_ENA;
|
2012-09-01 11:26:16 -04:00
|
|
|
}
|
2014-01-22 13:46:33 -05:00
|
|
|
c->flags = f;
|
2012-08-17 05:55:04 -04:00
|
|
|
}
|
2012-10-04 18:10:55 -04:00
|
|
|
|
2015-03-12 18:56:52 -04:00
|
|
|
/* Send a message over an established connection. It makes use of send() and
|
|
|
|
|
* returns the same return code and errno. If the socket layer is not ready yet
|
|
|
|
|
* then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
|
|
|
|
|
* as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
|
|
|
|
|
* EMSGSIZE if called with a zero length message. The purpose is to simplify
|
|
|
|
|
* some rare attempts to directly write on the socket from above the connection
|
|
|
|
|
* (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
|
|
|
|
|
* It automatically retries on EINTR. Other errors cause the connection to be
|
|
|
|
|
* marked as in error state. It takes similar arguments as send() except the
|
|
|
|
|
* first one which is the connection instead of the file descriptor. Note,
|
|
|
|
|
* MSG_DONTWAIT and MSG_NOSIGNAL are forced on the flags.
|
|
|
|
|
*/
|
|
|
|
|
int conn_sock_send(struct connection *conn, const void *buf, int len, int flags)
|
|
|
|
|
{
|
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
|
|
ret = -1;
|
|
|
|
|
errno = ENOTSOCK;
|
|
|
|
|
|
|
|
|
|
if (conn->flags & CO_FL_SOCK_WR_SH)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (!conn_ctrl_ready(conn))
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
errno = EMSGSIZE;
|
|
|
|
|
if (!len)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (!fd_send_ready(conn->handle.fd))
|
2015-03-12 18:56:52 -04:00
|
|
|
goto wait;
|
|
|
|
|
|
|
|
|
|
do {
|
2017-08-24 08:31:19 -04:00
|
|
|
ret = send(conn->handle.fd, buf, len, flags | MSG_DONTWAIT | MSG_NOSIGNAL);
|
2015-03-12 18:56:52 -04:00
|
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (ret > 0)
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
if (ret == 0 || errno == EAGAIN || errno == ENOTCONN) {
|
|
|
|
|
wait:
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_cant_send(conn->handle.fd);
|
2015-03-12 18:56:52 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
fail:
|
|
|
|
|
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-12 19:40:28 -04:00
|
|
|
/* Drains possibly pending incoming data on the file descriptor attached to the
|
|
|
|
|
* connection and update the connection's flags accordingly. This is used to
|
|
|
|
|
* know whether we need to disable lingering on close. Returns non-zero if it
|
|
|
|
|
* is safe to close without disabling lingering, otherwise zero. The SOCK_RD_SH
|
|
|
|
|
* flag may also be updated if the incoming shutdown was reported by the drain()
|
|
|
|
|
* function.
|
|
|
|
|
*/
|
|
|
|
|
int conn_sock_drain(struct connection *conn)
|
|
|
|
|
{
|
|
|
|
|
if (!conn_ctrl_ready(conn))
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
|
|
|
|
|
return 1;
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (fdtab[conn->handle.fd].ev & (FD_POLL_ERR|FD_POLL_HUP)) {
|
|
|
|
|
fdtab[conn->handle.fd].linger_risk = 0;
|
2015-03-12 19:40:28 -04:00
|
|
|
}
|
|
|
|
|
else {
|
2017-08-24 08:31:19 -04:00
|
|
|
if (!fd_recv_ready(conn->handle.fd))
|
2015-03-12 19:40:28 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* disable draining if we were called and have no drain function */
|
|
|
|
|
if (!conn->ctrl->drain) {
|
2017-09-13 12:30:23 -04:00
|
|
|
__conn_xprt_stop_recv(conn);
|
2015-03-12 19:40:28 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (conn->ctrl->drain(conn->handle.fd) <= 0)
|
2015-03-12 19:40:28 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
conn->flags |= CO_FL_SOCK_RD_SH;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-17 09:11:45 -05:00
|
|
|
/*
|
|
|
|
|
* Get data length from tlv
|
|
|
|
|
*/
|
|
|
|
|
static int get_tlv_length(const struct tlv *src)
|
|
|
|
|
{
|
|
|
|
|
return (src->length_hi << 8) | src->length_lo;
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-04 18:10:55 -04:00
|
|
|
/* This handshake handler waits a PROXY protocol header at the beginning of the
|
|
|
|
|
* raw data stream. The header looks like this :
|
|
|
|
|
*
|
|
|
|
|
* "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
|
|
|
|
|
*
|
|
|
|
|
* There must be exactly one space between each field. Fields are :
|
|
|
|
|
* - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
|
|
|
|
|
* - SRC3 : layer 3 (eg: IP) source address in standard text form
|
|
|
|
|
* - DST3 : layer 3 (eg: IP) destination address in standard text form
|
|
|
|
|
* - SRC4 : layer 4 (eg: TCP port) source address in standard text form
|
|
|
|
|
* - DST4 : layer 4 (eg: TCP port) destination address in standard text form
|
|
|
|
|
*
|
|
|
|
|
* This line MUST be at the beginning of the buffer and MUST NOT wrap.
|
|
|
|
|
*
|
|
|
|
|
* The header line is small and in all cases smaller than the smallest normal
|
|
|
|
|
* TCP MSS. So it MUST always be delivered as one segment, which ensures we
|
|
|
|
|
* can safely use MSG_PEEK and avoid buffering.
|
|
|
|
|
*
|
|
|
|
|
* Once the data is fetched, the values are set in the connection's address
|
|
|
|
|
* fields, and data are removed from the socket's buffer. The function returns
|
|
|
|
|
* zero if it needs to wait for more data or if it fails, or 1 if it completed
|
|
|
|
|
* and removed itself.
|
|
|
|
|
*/
|
|
|
|
|
int conn_recv_proxy(struct connection *conn, int flag)
|
|
|
|
|
{
|
|
|
|
|
char *line, *end;
|
2014-06-14 05:06:17 -04:00
|
|
|
struct proxy_hdr_v2 *hdr_v2;
|
|
|
|
|
const char v2sig[] = PP2_SIGNATURE;
|
2014-11-17 09:11:45 -05:00
|
|
|
int tlv_length = 0;
|
2015-07-03 08:09:10 -04:00
|
|
|
int tlv_offset = 0;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
/* we might have been called just after an asynchronous shutr */
|
|
|
|
|
if (conn->flags & CO_FL_SOCK_RD_SH)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
2014-01-23 07:50:42 -05:00
|
|
|
if (!conn_ctrl_ready(conn))
|
MAJOR: connection: add two new flags to indicate readiness of control/transport
Currently the control and transport layers of a connection are supposed
to be initialized when their respective pointers are not NULL. This will
not work anymore when we plan to reuse connections, because there is an
asymmetry between the accept() side and the connect() side :
- on accept() side, the fd is set first, then the ctrl layer then the
transport layer ; upon error, they must be undone in the reverse order,
then the FD must be closed. The FD must not be deleted if the control
layer was not yet initialized ;
- on the connect() side, the fd is set last and there is no reliable way
to know if it has been initialized or not. In practice it's initialized
to -1 first but this is hackish and supposes that local FDs only will
be used forever. Also, there are even less solutions for keeping trace
of the transport layer's state.
Also it is possible to support delayed close() when something (eg: logs)
tracks some information requiring the transport and/or control layers,
making it even more difficult to clean them.
So the proposed solution is to add two flags to the connection :
- CO_FL_CTRL_READY is set when the control layer is initialized (fd_insert)
and cleared after it's released (fd_delete).
- CO_FL_XPRT_READY is set when the control layer is initialized (xprt->init)
and cleared after it's released (xprt->close).
The functions have been adapted to rely on this and not on the pointers
anymore. conn_xprt_close() was unused and dangerous : it did not close
the control layer (eg: the socket itself) but still marks the transport
layer as closed, preventing any future call to conn_full_close() from
finishing the job.
The problem comes from conn_full_close() in fact. It needs to close the
xprt and ctrl layers independantly. After that we're still having an issue :
we don't know based on ->ctrl alone whether the fd was registered or not.
For this we use the two new flags CO_FL_XPRT_READY and CO_FL_CTRL_READY. We
now rely on this and not on conn->xprt nor conn->ctrl anymore to decide what
remains to be done on the connection.
In order not to miss some flag assignments, we introduce conn_ctrl_init()
to initialize the control layer, register the fd using fd_insert() and set
the flag, and conn_ctrl_close() which unregisters the fd and removes the
flag, but only if the transport layer was closed.
Similarly, at the transport layer, conn_xprt_init() calls ->init and sets
the flag, while conn_xprt_close() checks the flag, calls ->close and clears
the flag, regardless xprt_ctx or xprt_st. This also ensures that the ->init
and the ->close functions are called only once each and in the correct order.
Note that conn_xprt_close() does nothing if the transport layer is still
tracked.
conn_full_close() now simply calls conn_xprt_close() then conn_full_close()
in turn, which do nothing if CO_FL_XPRT_TRACKED is set.
In order to handle the error path, we also provide conn_force_close() which
ignores CO_FL_XPRT_TRACKED and closes the transport and the control layers
in turns. All relevant instances of fd_delete() have been replaced with
conn_force_close(). Now we always know what state the connection is in and
we can expect to split its initialization.
2013-10-21 10:30:56 -04:00
|
|
|
goto fail;
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (!fd_recv_ready(conn->handle.fd))
|
2014-01-20 09:13:07 -05:00
|
|
|
return 0;
|
|
|
|
|
|
2012-10-04 18:10:55 -04:00
|
|
|
do {
|
2017-08-24 08:31:19 -04:00
|
|
|
trash.len = recv(conn->handle.fd, trash.str, trash.size, MSG_PEEK);
|
2012-10-29 11:51:55 -04:00
|
|
|
if (trash.len < 0) {
|
2012-10-04 18:10:55 -04:00
|
|
|
if (errno == EINTR)
|
|
|
|
|
continue;
|
|
|
|
|
if (errno == EAGAIN) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_cant_recv(conn->handle.fd);
|
2012-10-04 18:10:55 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
2012-12-03 09:41:18 -05:00
|
|
|
goto recv_abort;
|
2012-10-04 18:10:55 -04:00
|
|
|
}
|
|
|
|
|
} while (0);
|
|
|
|
|
|
2012-12-03 09:41:18 -05:00
|
|
|
if (!trash.len) {
|
|
|
|
|
/* client shutdown */
|
|
|
|
|
conn->err_code = CO_ER_PRX_EMPTY;
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-29 11:51:55 -04:00
|
|
|
if (trash.len < 6)
|
2012-10-04 18:10:55 -04:00
|
|
|
goto missing;
|
|
|
|
|
|
2012-10-29 11:51:55 -04:00
|
|
|
line = trash.str;
|
|
|
|
|
end = trash.str + trash.len;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
/* Decode a possible proxy request, fail early if it does not match */
|
2014-06-14 05:06:17 -04:00
|
|
|
if (strncmp(line, "PROXY ", 6) != 0)
|
|
|
|
|
goto not_v1;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
line += 6;
|
2014-06-14 05:41:36 -04:00
|
|
|
if (trash.len < 9) /* shortest possible line */
|
2012-10-04 18:10:55 -04:00
|
|
|
goto missing;
|
|
|
|
|
|
2016-03-24 05:22:36 -04:00
|
|
|
if (memcmp(line, "TCP4 ", 5) == 0) {
|
2012-10-04 18:10:55 -04:00
|
|
|
u32 src3, dst3, sport, dport;
|
|
|
|
|
|
|
|
|
|
line += 5;
|
|
|
|
|
|
|
|
|
|
src3 = inetaddr_host_lim_ret(line, end, &line);
|
|
|
|
|
if (line == end)
|
|
|
|
|
goto missing;
|
|
|
|
|
if (*line++ != ' ')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
dst3 = inetaddr_host_lim_ret(line, end, &line);
|
|
|
|
|
if (line == end)
|
|
|
|
|
goto missing;
|
|
|
|
|
if (*line++ != ' ')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
sport = read_uint((const char **)&line, end);
|
|
|
|
|
if (line == end)
|
|
|
|
|
goto missing;
|
|
|
|
|
if (*line++ != ' ')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
dport = read_uint((const char **)&line, end);
|
|
|
|
|
if (line > end - 2)
|
|
|
|
|
goto missing;
|
|
|
|
|
if (*line++ != '\r')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
if (*line++ != '\n')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
/* update the session's addresses and mark them set */
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_addr.s_addr = htonl(src3);
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_port = htons(sport);
|
|
|
|
|
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr = htonl(dst3);
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_port = htons(dport);
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
|
|
|
|
}
|
2016-03-24 05:22:36 -04:00
|
|
|
else if (memcmp(line, "TCP6 ", 5) == 0) {
|
2012-10-04 18:10:55 -04:00
|
|
|
u32 sport, dport;
|
|
|
|
|
char *src_s;
|
|
|
|
|
char *dst_s, *sport_s, *dport_s;
|
|
|
|
|
struct in6_addr src3, dst3;
|
|
|
|
|
|
|
|
|
|
line += 5;
|
|
|
|
|
|
|
|
|
|
src_s = line;
|
|
|
|
|
dst_s = sport_s = dport_s = NULL;
|
|
|
|
|
while (1) {
|
|
|
|
|
if (line > end - 2) {
|
|
|
|
|
goto missing;
|
|
|
|
|
}
|
|
|
|
|
else if (*line == '\r') {
|
|
|
|
|
*line = 0;
|
|
|
|
|
line++;
|
|
|
|
|
if (*line++ != '\n')
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*line == ' ') {
|
|
|
|
|
*line = 0;
|
|
|
|
|
if (!dst_s)
|
|
|
|
|
dst_s = line + 1;
|
|
|
|
|
else if (!sport_s)
|
|
|
|
|
sport_s = line + 1;
|
|
|
|
|
else if (!dport_s)
|
|
|
|
|
dport_s = line + 1;
|
|
|
|
|
}
|
|
|
|
|
line++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!dst_s || !sport_s || !dport_s)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
sport = read_uint((const char **)&sport_s,dport_s - 1);
|
|
|
|
|
if (*sport_s != 0)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
dport = read_uint((const char **)&dport_s,line - 2);
|
|
|
|
|
if (*dport_s != 0)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto bad_header;
|
2012-10-04 18:10:55 -04:00
|
|
|
|
|
|
|
|
/* update the session's addresses and mark them set */
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_family = AF_INET6;
|
|
|
|
|
memcpy(&((struct sockaddr_in6 *)&conn->addr.from)->sin6_addr, &src3, sizeof(struct in6_addr));
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_port = htons(sport);
|
|
|
|
|
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_family = AF_INET6;
|
|
|
|
|
memcpy(&((struct sockaddr_in6 *)&conn->addr.to)->sin6_addr, &dst3, sizeof(struct in6_addr));
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_port = htons(dport);
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
|
|
|
|
}
|
2014-06-14 05:41:36 -04:00
|
|
|
else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
|
|
|
|
|
/* This can be a UNIX socket forwarded by an haproxy upstream */
|
|
|
|
|
line += 9;
|
|
|
|
|
}
|
2012-10-04 18:10:55 -04:00
|
|
|
else {
|
2014-06-14 05:41:36 -04:00
|
|
|
/* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
|
2012-12-03 09:41:18 -05:00
|
|
|
conn->err_code = CO_ER_PRX_BAD_PROTO;
|
2012-10-04 18:10:55 -04:00
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-14 05:06:17 -04:00
|
|
|
trash.len = line - trash.str;
|
|
|
|
|
goto eat_header;
|
|
|
|
|
|
|
|
|
|
not_v1:
|
|
|
|
|
/* try PPv2 */
|
|
|
|
|
if (trash.len < PP2_HEADER_LEN)
|
|
|
|
|
goto missing;
|
|
|
|
|
|
|
|
|
|
hdr_v2 = (struct proxy_hdr_v2 *)trash.str;
|
|
|
|
|
|
|
|
|
|
if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
|
|
|
|
|
(hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
|
|
|
|
|
conn->err_code = CO_ER_PRX_NOT_HDR;
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (trash.len < PP2_HEADER_LEN + ntohs(hdr_v2->len))
|
|
|
|
|
goto missing;
|
|
|
|
|
|
|
|
|
|
switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
|
|
|
|
|
case 0x01: /* PROXY command */
|
|
|
|
|
switch (hdr_v2->fam) {
|
|
|
|
|
case 0x11: /* TCPv4 */
|
2014-11-19 04:53:20 -05:00
|
|
|
if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
|
|
|
|
|
goto bad_header;
|
|
|
|
|
|
2014-06-14 05:06:17 -04:00
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_port = hdr_v2->addr.ip4.src_port;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_port = hdr_v2->addr.ip4.dst_port;
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
2015-07-03 08:09:10 -04:00
|
|
|
tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
|
2014-11-17 09:11:45 -05:00
|
|
|
tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET;
|
2014-06-14 05:06:17 -04:00
|
|
|
break;
|
|
|
|
|
case 0x21: /* TCPv6 */
|
2014-11-19 04:53:20 -05:00
|
|
|
if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
|
|
|
|
|
goto bad_header;
|
|
|
|
|
|
2014-06-14 05:06:17 -04:00
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_family = AF_INET6;
|
|
|
|
|
memcpy(&((struct sockaddr_in6 *)&conn->addr.from)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_port = hdr_v2->addr.ip6.src_port;
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_family = AF_INET6;
|
|
|
|
|
memcpy(&((struct sockaddr_in6 *)&conn->addr.to)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_port = hdr_v2->addr.ip6.dst_port;
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
2015-07-03 08:09:10 -04:00
|
|
|
tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
|
2014-11-17 09:11:45 -05:00
|
|
|
tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET6;
|
2014-06-14 05:06:17 -04:00
|
|
|
break;
|
|
|
|
|
}
|
2014-11-17 09:11:45 -05:00
|
|
|
|
|
|
|
|
/* TLV parsing */
|
|
|
|
|
if (tlv_length > 0) {
|
|
|
|
|
while (tlv_offset + TLV_HEADER_SIZE <= trash.len) {
|
|
|
|
|
const struct tlv *tlv_packet = (struct tlv *) &trash.str[tlv_offset];
|
|
|
|
|
const int tlv_len = get_tlv_length(tlv_packet);
|
|
|
|
|
tlv_offset += tlv_len + TLV_HEADER_SIZE;
|
|
|
|
|
|
|
|
|
|
switch (tlv_packet->type) {
|
|
|
|
|
#ifdef CONFIG_HAP_NS
|
|
|
|
|
case PP2_TYPE_NETNS: {
|
|
|
|
|
const struct netns_entry *ns;
|
|
|
|
|
ns = netns_store_lookup((char*)tlv_packet->value, tlv_len);
|
|
|
|
|
if (ns)
|
|
|
|
|
conn->proxy_netns = ns;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-14 05:06:17 -04:00
|
|
|
/* unsupported protocol, keep local connection address */
|
|
|
|
|
break;
|
|
|
|
|
case 0x00: /* LOCAL command */
|
|
|
|
|
/* keep local connection address for LOCAL */
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto bad_header; /* not a supported command */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
trash.len = PP2_HEADER_LEN + ntohs(hdr_v2->len);
|
|
|
|
|
goto eat_header;
|
|
|
|
|
|
|
|
|
|
eat_header:
|
2012-10-04 18:10:55 -04:00
|
|
|
/* remove the PROXY line from the request. For this we re-read the
|
|
|
|
|
* exact line at once. If we don't get the exact same result, we
|
|
|
|
|
* fail.
|
|
|
|
|
*/
|
|
|
|
|
do {
|
2017-08-24 08:31:19 -04:00
|
|
|
int len2 = recv(conn->handle.fd, trash.str, trash.len, 0);
|
2012-10-04 18:10:55 -04:00
|
|
|
if (len2 < 0 && errno == EINTR)
|
|
|
|
|
continue;
|
2012-10-29 11:51:55 -04:00
|
|
|
if (len2 != trash.len)
|
2012-12-03 09:41:18 -05:00
|
|
|
goto recv_abort;
|
2012-10-04 18:10:55 -04:00
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
conn->flags &= ~flag;
|
2017-01-05 09:11:44 -05:00
|
|
|
conn->flags |= CO_FL_RCVD_PROXY;
|
2012-10-04 18:10:55 -04:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
missing:
|
|
|
|
|
/* Missing data. Since we're using MSG_PEEK, we can only poll again if
|
|
|
|
|
* we have not read anything. Otherwise we need to fail because we won't
|
|
|
|
|
* be able to poll anymore.
|
|
|
|
|
*/
|
2012-12-03 09:41:18 -05:00
|
|
|
conn->err_code = CO_ER_PRX_TRUNCATED;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
bad_header:
|
|
|
|
|
/* This is not a valid proxy protocol header */
|
|
|
|
|
conn->err_code = CO_ER_PRX_BAD_HDR;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
recv_abort:
|
|
|
|
|
conn->err_code = CO_ER_PRX_ABORT;
|
2013-12-04 17:44:10 -05:00
|
|
|
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
|
2012-12-03 09:41:18 -05:00
|
|
|
goto fail;
|
|
|
|
|
|
2012-10-04 18:10:55 -04:00
|
|
|
fail:
|
2012-12-10 11:03:52 -05:00
|
|
|
__conn_sock_stop_both(conn);
|
2012-10-04 18:10:55 -04:00
|
|
|
conn->flags |= CO_FL_ERROR;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-04 10:11:10 -04:00
|
|
|
/* This handshake handler waits a NetScaler Client IP insertion header
|
|
|
|
|
* at the beginning of the raw data stream. The header looks like this:
|
|
|
|
|
*
|
|
|
|
|
* 4 bytes: CIP magic number
|
|
|
|
|
* 4 bytes: Header length
|
|
|
|
|
* 20+ bytes: Header of the last IP packet sent by the client during
|
|
|
|
|
* TCP handshake.
|
|
|
|
|
* 20+ bytes: Header of the last TCP packet sent by the client during
|
|
|
|
|
* TCP handshake.
|
|
|
|
|
*
|
|
|
|
|
* This line MUST be at the beginning of the buffer and MUST NOT be
|
|
|
|
|
* fragmented.
|
|
|
|
|
*
|
|
|
|
|
* The header line is small and in all cases smaller than the smallest normal
|
|
|
|
|
* TCP MSS. So it MUST always be delivered as one segment, which ensures we
|
|
|
|
|
* can safely use MSG_PEEK and avoid buffering.
|
|
|
|
|
*
|
|
|
|
|
* Once the data is fetched, the values are set in the connection's address
|
|
|
|
|
* fields, and data are removed from the socket's buffer. The function returns
|
|
|
|
|
* zero if it needs to wait for more data or if it fails, or 1 if it completed
|
|
|
|
|
* and removed itself.
|
|
|
|
|
*/
|
|
|
|
|
int conn_recv_netscaler_cip(struct connection *conn, int flag)
|
|
|
|
|
{
|
|
|
|
|
char *line;
|
|
|
|
|
uint32_t cip_magic;
|
|
|
|
|
uint32_t cip_len;
|
|
|
|
|
uint8_t ip_v;
|
|
|
|
|
|
|
|
|
|
/* we might have been called just after an asynchronous shutr */
|
|
|
|
|
if (conn->flags & CO_FL_SOCK_RD_SH)
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
if (!conn_ctrl_ready(conn))
|
|
|
|
|
goto fail;
|
|
|
|
|
|
2017-08-24 08:31:19 -04:00
|
|
|
if (!fd_recv_ready(conn->handle.fd))
|
2016-06-04 10:11:10 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
do {
|
2017-08-24 08:31:19 -04:00
|
|
|
trash.len = recv(conn->handle.fd, trash.str, trash.size, MSG_PEEK);
|
2016-06-04 10:11:10 -04:00
|
|
|
if (trash.len < 0) {
|
|
|
|
|
if (errno == EINTR)
|
|
|
|
|
continue;
|
|
|
|
|
if (errno == EAGAIN) {
|
2017-08-24 08:31:19 -04:00
|
|
|
fd_cant_recv(conn->handle.fd);
|
2016-06-04 10:11:10 -04:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
goto recv_abort;
|
|
|
|
|
}
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
if (!trash.len) {
|
|
|
|
|
/* client shutdown */
|
|
|
|
|
conn->err_code = CO_ER_CIP_EMPTY;
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length */
|
|
|
|
|
if (trash.len < 8)
|
|
|
|
|
goto missing;
|
|
|
|
|
|
|
|
|
|
line = trash.str;
|
|
|
|
|
|
|
|
|
|
cip_magic = ntohl(*(uint32_t *)line);
|
|
|
|
|
cip_len = ntohl(*(uint32_t *)(line+4));
|
|
|
|
|
|
|
|
|
|
/* Decode a possible NetScaler Client IP request, fail early if
|
|
|
|
|
* it does not match */
|
|
|
|
|
if (cip_magic != objt_listener(conn->target)->bind_conf->ns_cip_magic)
|
|
|
|
|
goto bad_magic;
|
|
|
|
|
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length, minimal IP header */
|
|
|
|
|
if (trash.len < 28)
|
|
|
|
|
goto missing;
|
|
|
|
|
|
|
|
|
|
line += 8;
|
|
|
|
|
|
|
|
|
|
/* Get IP version from the first four bits */
|
|
|
|
|
ip_v = (*line & 0xf0) >> 4;
|
|
|
|
|
|
|
|
|
|
if (ip_v == 4) {
|
|
|
|
|
struct ip *hdr_ip4;
|
2016-07-04 17:51:33 -04:00
|
|
|
struct my_tcphdr *hdr_tcp;
|
2016-06-04 10:11:10 -04:00
|
|
|
|
|
|
|
|
hdr_ip4 = (struct ip *)line;
|
|
|
|
|
|
|
|
|
|
if (trash.len < (8 + ntohs(hdr_ip4->ip_len))) {
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length, IPv4 header */
|
|
|
|
|
goto missing;
|
|
|
|
|
} else if (hdr_ip4->ip_p != IPPROTO_TCP) {
|
|
|
|
|
/* The protocol does not include a TCP header */
|
|
|
|
|
conn->err_code = CO_ER_CIP_BAD_PROTO;
|
|
|
|
|
goto fail;
|
|
|
|
|
} else if (trash.len < (28 + ntohs(hdr_ip4->ip_len))) {
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length, IPv4 header, TCP header */
|
|
|
|
|
goto missing;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-04 17:51:33 -04:00
|
|
|
hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));
|
2016-06-04 10:11:10 -04:00
|
|
|
|
|
|
|
|
/* update the session's addresses and mark them set */
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.from)->sin_port = hdr_tcp->source;
|
|
|
|
|
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_family = AF_INET;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
|
|
|
|
|
((struct sockaddr_in *)&conn->addr.to)->sin_port = hdr_tcp->dest;
|
|
|
|
|
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
|
|
|
|
}
|
|
|
|
|
else if (ip_v == 6) {
|
|
|
|
|
struct ip6_hdr *hdr_ip6;
|
2016-07-04 17:51:33 -04:00
|
|
|
struct my_tcphdr *hdr_tcp;
|
2016-06-04 10:11:10 -04:00
|
|
|
|
|
|
|
|
hdr_ip6 = (struct ip6_hdr *)line;
|
|
|
|
|
|
|
|
|
|
if (trash.len < 28) {
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length, IPv6 header */
|
|
|
|
|
goto missing;
|
|
|
|
|
} else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
|
|
|
|
|
/* The protocol does not include a TCP header */
|
|
|
|
|
conn->err_code = CO_ER_CIP_BAD_PROTO;
|
|
|
|
|
goto fail;
|
|
|
|
|
} else if (trash.len < 48) {
|
|
|
|
|
/* Fail if buffer length is not large enough to contain
|
|
|
|
|
* CIP magic, CIP length, IPv6 header, TCP header */
|
|
|
|
|
goto missing;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-04 17:51:33 -04:00
|
|
|
hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));
|
2016-06-04 10:11:10 -04:00
|
|
|
|
|
|
|
|
/* update the session's addresses and mark them set */
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_family = AF_INET6;
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_addr = hdr_ip6->ip6_src;
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.from)->sin6_port = hdr_tcp->source;
|
|
|
|
|
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_family = AF_INET6;
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_addr = hdr_ip6->ip6_dst;
|
|
|
|
|
((struct sockaddr_in6 *)&conn->addr.to)->sin6_port = hdr_tcp->dest;
|
|
|
|
|
|
|
|
|
|
conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* The protocol does not match something known (IPv4/IPv6) */
|
|
|
|
|
conn->err_code = CO_ER_CIP_BAD_PROTO;
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
line += cip_len;
|
|
|
|
|
trash.len = line - trash.str;
|
|
|
|
|
|
|
|
|
|
/* remove the NetScaler Client IP header from the request. For this
|
|
|
|
|
* we re-read the exact line at once. If we don't get the exact same
|
|
|
|
|
* result, we fail.
|
|
|
|
|
*/
|
|
|
|
|
do {
|
2017-08-24 08:31:19 -04:00
|
|
|
int len2 = recv(conn->handle.fd, trash.str, trash.len, 0);
|
2016-06-04 10:11:10 -04:00
|
|
|
if (len2 < 0 && errno == EINTR)
|
|
|
|
|
continue;
|
|
|
|
|
if (len2 != trash.len)
|
|
|
|
|
goto recv_abort;
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
conn->flags &= ~flag;
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
|
|
missing:
|
|
|
|
|
/* Missing data. Since we're using MSG_PEEK, we can only poll again if
|
|
|
|
|
* we have not read anything. Otherwise we need to fail because we won't
|
|
|
|
|
* be able to poll anymore.
|
|
|
|
|
*/
|
|
|
|
|
conn->err_code = CO_ER_CIP_TRUNCATED;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
bad_magic:
|
|
|
|
|
conn->err_code = CO_ER_CIP_BAD_MAGIC;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
recv_abort:
|
|
|
|
|
conn->err_code = CO_ER_CIP_ABORT;
|
|
|
|
|
conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
|
|
fail:
|
|
|
|
|
__conn_sock_stop_both(conn);
|
|
|
|
|
conn->flags |= CO_FL_ERROR;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-08 23:42:08 -04:00
|
|
|
int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote)
|
|
|
|
|
{
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
if (srv && (srv->pp_opts & SRV_PP_V2)) {
|
|
|
|
|
ret = make_proxy_line_v2(buf, buf_len, srv, remote);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (remote)
|
|
|
|
|
ret = make_proxy_line_v1(buf, buf_len, &remote->addr.from, &remote->addr.to);
|
|
|
|
|
else
|
|
|
|
|
ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-04 18:10:55 -04:00
|
|
|
/* Makes a PROXY protocol line from the two addresses. The output is sent to
|
|
|
|
|
* buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
|
|
|
|
|
* It returns the number of bytes composing this line (including the trailing
|
|
|
|
|
* LF), or zero in case of failure (eg: not enough space). It supports TCP4,
|
2013-10-01 05:41:55 -04:00
|
|
|
* TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
|
|
|
|
|
* emitted as well.
|
2012-10-04 18:10:55 -04:00
|
|
|
*/
|
2014-05-08 23:42:08 -04:00
|
|
|
int make_proxy_line_v1(char *buf, int buf_len, struct sockaddr_storage *src, struct sockaddr_storage *dst)
|
2012-10-04 18:10:55 -04:00
|
|
|
{
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
2013-10-01 05:41:55 -04:00
|
|
|
if (src && dst && src->ss_family == dst->ss_family && src->ss_family == AF_INET) {
|
2012-10-04 18:10:55 -04:00
|
|
|
ret = snprintf(buf + ret, buf_len - ret, "PROXY TCP4 ");
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* IPv4 src */
|
|
|
|
|
if (!inet_ntop(src->ss_family, &((struct sockaddr_in *)src)->sin_addr, buf + ret, buf_len - ret))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
ret += strlen(buf + ret);
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
buf[ret++] = ' ';
|
|
|
|
|
|
|
|
|
|
/* IPv4 dst */
|
|
|
|
|
if (!inet_ntop(dst->ss_family, &((struct sockaddr_in *)dst)->sin_addr, buf + ret, buf_len - ret))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
ret += strlen(buf + ret);
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* source and destination ports */
|
|
|
|
|
ret += snprintf(buf + ret, buf_len - ret, " %u %u\r\n",
|
|
|
|
|
ntohs(((struct sockaddr_in *)src)->sin_port),
|
|
|
|
|
ntohs(((struct sockaddr_in *)dst)->sin_port));
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2013-10-01 05:41:55 -04:00
|
|
|
else if (src && dst && src->ss_family == dst->ss_family && src->ss_family == AF_INET6) {
|
2012-10-04 18:10:55 -04:00
|
|
|
ret = snprintf(buf + ret, buf_len - ret, "PROXY TCP6 ");
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* IPv6 src */
|
|
|
|
|
if (!inet_ntop(src->ss_family, &((struct sockaddr_in6 *)src)->sin6_addr, buf + ret, buf_len - ret))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
ret += strlen(buf + ret);
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
buf[ret++] = ' ';
|
|
|
|
|
|
|
|
|
|
/* IPv6 dst */
|
|
|
|
|
if (!inet_ntop(dst->ss_family, &((struct sockaddr_in6 *)dst)->sin6_addr, buf + ret, buf_len - ret))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
ret += strlen(buf + ret);
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
/* source and destination ports */
|
|
|
|
|
ret += snprintf(buf + ret, buf_len - ret, " %u %u\r\n",
|
|
|
|
|
ntohs(((struct sockaddr_in6 *)src)->sin6_port),
|
|
|
|
|
ntohs(((struct sockaddr_in6 *)dst)->sin6_port));
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* unknown family combination */
|
|
|
|
|
ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
|
|
|
|
|
if (ret >= buf_len)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2014-05-08 23:42:08 -04:00
|
|
|
|
2014-11-17 09:11:45 -05:00
|
|
|
static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
|
2014-05-08 23:42:08 -04:00
|
|
|
{
|
|
|
|
|
struct tlv *tlv;
|
|
|
|
|
|
|
|
|
|
if (!dest || (length + sizeof(*tlv) > dest_len))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
tlv = (struct tlv *)dest;
|
|
|
|
|
|
|
|
|
|
tlv->type = type;
|
|
|
|
|
tlv->length_hi = length >> 8;
|
|
|
|
|
tlv->length_lo = length & 0x00ff;
|
|
|
|
|
memcpy(tlv->value, value, length);
|
|
|
|
|
return length + sizeof(*tlv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote)
|
|
|
|
|
{
|
2014-06-14 02:28:06 -04:00
|
|
|
const char pp2_signature[] = PP2_SIGNATURE;
|
2014-05-08 23:42:08 -04:00
|
|
|
int ret = 0;
|
2014-06-14 02:28:06 -04:00
|
|
|
struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
|
2016-05-18 10:17:44 -04:00
|
|
|
struct sockaddr_storage null_addr = { .ss_family = 0 };
|
2014-05-08 23:42:08 -04:00
|
|
|
struct sockaddr_storage *src = &null_addr;
|
|
|
|
|
struct sockaddr_storage *dst = &null_addr;
|
2017-10-24 04:55:14 -04:00
|
|
|
const char *value;
|
|
|
|
|
int value_len;
|
2014-05-08 23:42:08 -04:00
|
|
|
|
|
|
|
|
if (buf_len < PP2_HEADER_LEN)
|
|
|
|
|
return 0;
|
2014-06-14 02:28:06 -04:00
|
|
|
memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);
|
2014-05-08 23:42:08 -04:00
|
|
|
|
|
|
|
|
if (remote) {
|
|
|
|
|
src = &remote->addr.from;
|
|
|
|
|
dst = &remote->addr.to;
|
|
|
|
|
}
|
2014-11-17 09:11:45 -05:00
|
|
|
|
2014-05-08 23:42:08 -04:00
|
|
|
if (src && dst && src->ss_family == dst->ss_family && src->ss_family == AF_INET) {
|
|
|
|
|
if (buf_len < PP2_HDR_LEN_INET)
|
|
|
|
|
return 0;
|
2014-06-14 02:28:06 -04:00
|
|
|
hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
|
|
|
|
|
hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
|
|
|
|
|
hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
|
|
|
|
|
hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
|
|
|
|
|
hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
|
|
|
|
|
hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
|
2014-05-08 23:42:08 -04:00
|
|
|
ret = PP2_HDR_LEN_INET;
|
|
|
|
|
}
|
|
|
|
|
else if (src && dst && src->ss_family == dst->ss_family && src->ss_family == AF_INET6) {
|
|
|
|
|
if (buf_len < PP2_HDR_LEN_INET6)
|
|
|
|
|
return 0;
|
2014-06-14 02:28:06 -04:00
|
|
|
hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
|
|
|
|
|
hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
|
|
|
|
|
memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
|
|
|
|
|
memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
|
|
|
|
|
hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
|
|
|
|
|
hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
|
2014-05-08 23:42:08 -04:00
|
|
|
ret = PP2_HDR_LEN_INET6;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (buf_len < PP2_HDR_LEN_UNSPEC)
|
|
|
|
|
return 0;
|
2014-06-14 02:28:06 -04:00
|
|
|
hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
|
|
|
|
|
hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
|
2014-05-08 23:42:08 -04:00
|
|
|
ret = PP2_HDR_LEN_UNSPEC;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-24 04:55:14 -04:00
|
|
|
if (conn_get_alpn(remote, &value, &value_len)) {
|
|
|
|
|
if ((buf_len - ret) < sizeof(struct tlv))
|
|
|
|
|
return 0;
|
|
|
|
|
ret += make_tlv(&buf[ret], buf_len, PP2_TYPE_ALPN, value_len, value);
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-08 23:42:08 -04:00
|
|
|
#ifdef USE_OPENSSL
|
|
|
|
|
if (srv->pp_opts & SRV_PP_V2_SSL) {
|
2017-10-24 04:55:14 -04:00
|
|
|
struct tlv_ssl *tlv;
|
|
|
|
|
int ssl_tlv_len = 0;
|
2014-05-08 23:42:08 -04:00
|
|
|
if ((buf_len - ret) < sizeof(struct tlv_ssl))
|
|
|
|
|
return 0;
|
|
|
|
|
tlv = (struct tlv_ssl *)&buf[ret];
|
|
|
|
|
memset(tlv, 0, sizeof(struct tlv_ssl));
|
|
|
|
|
ssl_tlv_len += sizeof(struct tlv_ssl);
|
|
|
|
|
tlv->tlv.type = PP2_TYPE_SSL;
|
|
|
|
|
if (ssl_sock_is_ssl(remote)) {
|
|
|
|
|
tlv->client |= PP2_CLIENT_SSL;
|
2017-10-13 10:59:49 -04:00
|
|
|
value = ssl_sock_get_proto_version(remote);
|
2014-05-08 23:42:08 -04:00
|
|
|
if (value) {
|
2017-10-13 06:15:28 -04:00
|
|
|
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
|
2014-05-08 23:42:08 -04:00
|
|
|
}
|
2014-07-30 10:39:13 -04:00
|
|
|
if (ssl_sock_get_cert_used_sess(remote)) {
|
|
|
|
|
tlv->client |= PP2_CLIENT_CERT_SESS;
|
2014-05-08 23:42:08 -04:00
|
|
|
tlv->verify = htonl(ssl_sock_get_verify_result(remote));
|
2014-07-30 10:39:13 -04:00
|
|
|
if (ssl_sock_get_cert_used_conn(remote))
|
|
|
|
|
tlv->client |= PP2_CLIENT_CERT_CONN;
|
2014-05-08 23:42:08 -04:00
|
|
|
}
|
|
|
|
|
if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
|
2017-10-24 04:55:14 -04:00
|
|
|
struct chunk *cn_trash = get_trash_chunk();
|
2014-07-19 00:37:33 -04:00
|
|
|
if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
|
2017-10-13 06:15:28 -04:00
|
|
|
ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN, cn_trash->len, cn_trash->str);
|
2014-05-08 23:42:08 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
|
|
|
|
|
tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
|
|
|
|
|
ret += ssl_tlv_len;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2014-11-17 09:11:45 -05:00
|
|
|
#ifdef CONFIG_HAP_NS
|
|
|
|
|
if (remote && (remote->proxy_netns)) {
|
|
|
|
|
if ((buf_len - ret) < sizeof(struct tlv))
|
|
|
|
|
return 0;
|
|
|
|
|
ret += make_tlv(&buf[ret], buf_len, PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2014-06-14 02:28:06 -04:00
|
|
|
hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
|
2014-05-08 23:42:08 -04:00
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
2017-01-05 09:11:44 -05:00
|
|
|
|
|
|
|
|
/* fetch if the received connection used a PROXY protocol header */
|
|
|
|
|
int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
|
|
|
{
|
|
|
|
|
struct connection *conn;
|
|
|
|
|
|
|
|
|
|
conn = objt_conn(smp->sess->origin);
|
|
|
|
|
if (!conn)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (!(conn->flags & CO_FL_CONNECTED)) {
|
|
|
|
|
smp->flags |= SMP_F_MAY_CHANGE;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
smp->flags = 0;
|
|
|
|
|
smp->data.type = SMP_T_BOOL;
|
|
|
|
|
smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;
|
|
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Note: must not be declared <const> as its list will be overwritten.
|
|
|
|
|
* Note: fetches that may return multiple types must be declared as the lowest
|
|
|
|
|
* common denominator, the type that can be casted into all other ones. For
|
|
|
|
|
* instance v4/v6 must be declared v4.
|
|
|
|
|
*/
|
|
|
|
|
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
|
|
|
|
|
{ "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
|
|
|
|
|
{ /* END */ },
|
|
|
|
|
}};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__attribute__((constructor))
|
|
|
|
|
static void __connection_init(void)
|
|
|
|
|
{
|
|
|
|
|
sample_register_fetches(&sample_fetch_keywords);
|
|
|
|
|
}
|