From c6f13f12cd862ffae071e56ee7e1fa9998fc23c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 18 Jan 2024 17:24:22 +0100 Subject: [PATCH] Throttle reading from TCP if the sends are not getting through When TCP client would not read the DNS message sent to them, the TCP sends inside named would accumulate and cause degradation of the service. Throttle the reading from the TCP socket when we accumulate enough DNS data to be sent. Currently this is limited in a way that a single largest possible DNS message can fit into the buffer. (cherry picked from commit 26006f7b44474819fac2a76dc6cd6f69f0d76828) --- lib/isc/netmgr/netmgr-int.h | 5 ++-- lib/isc/netmgr/netmgr.c | 1 + lib/isc/netmgr/tcp.c | 58 +++++++++++++++++++++++++++++++++++++ lib/isc/netmgr/tcpdns.c | 54 ++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 2 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f95a51173e..6d2bd9ad9f 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -62,9 +62,10 @@ #endif /* - * The TCP receive buffer can fit one maximum sized DNS message plus its size, - * the receive buffer here affects TCP, DoT and DoH. + * The TCP send and receive buffers can fit one maximum sized DNS message plus + * its size, the receive buffer here affects TCP, DoT and DoH. */ +#define ISC_NETMGR_TCP_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) /* Pick the larger buffer */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 820099553d..7b09639440 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -2086,6 +2086,7 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) { sock = req->sock; + isc__nm_start_reading(sock); isc__nmsocket_reset(sock); } diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 16b53cc579..016a9e9059 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -905,6 +905,31 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { /* The readcb could have paused the reading */ if (atomic_load(&sock->reading)) { + if (!sock->client) { + /* + * Stop reading if we have accumulated enough bytes in + * the send queue; this means that the TCP client is not + * reading back the data we sending to it, and there's + * no reason to continue processing more incoming DNS + * messages, if the client is not reading back the + * responses. + */ + size_t write_queue_size = + uv_stream_get_write_queue_size( + &sock->uv_handle.stream); + + if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, + ISC_LOG_DEBUG(3), + "throttling TCP connection, " + "the other side is " + "not reading the data (%zu)", + write_queue_size); + isc__nm_stop_reading(sock); + } + } + /* The timer will be updated */ isc__nmsocket_timer_restart(sock); } @@ -1095,6 +1120,33 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, return; } +static void +tcp_maybe_restart_reading(isc_nmsocket_t *sock) { + if (!sock->client && sock->reading && + !uv_is_active(&sock->uv_handle.handle)) + { + /* + * Restart reading if we have less data in the send queue than + * the send buffer size, this means that the TCP client has + * started reading some data again. Starting reading when we go + * under the limit instead of waiting for all data has been + * flushed allows faster recovery (in case there was a + * congestion and now there isn't). + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write( + isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "resuming TCP connection, the other side " + "is reading the data again (%zu)", + write_queue_size); + isc__nm_start_reading(sock); + } + } +} + static void tcp_send_cb(uv_write_t *req, int status) { isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; @@ -1112,10 +1164,16 @@ tcp_send_cb(uv_write_t *req, int status) { isc__nm_incstats(sock, STATID_SENDFAIL); isc__nm_failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + + if (!sock->client && sock->reading) { + isc__nm_start_reading(sock); + isc__nmsocket_reset(sock); + } return; } isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); + tcp_maybe_restart_reading(sock); } /* diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index dd3e9ec3fe..00ecb0f3d2 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -913,6 +913,27 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, result = isc__nm_process_sock_buffer(sock); if (result != ISC_R_SUCCESS) { isc__nm_failed_read_cb(sock, result, true); + } else if (!sock->client) { + /* + * Stop reading if we have accumulated enough bytes in + * the send queue; this means that the TCP client is not + * reading back the data we sending to it, and there's + * no reason to continue processing more incoming DNS + * messages, if the client is not reading back the + * responses. + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + + if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "throttling TCP connection, " + "the other side is " + "not reading the data (%zu)", + write_queue_size); + isc__nm_stop_reading(sock); + } } free: if (nread < 0) { @@ -1135,6 +1156,33 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, return; } +static void +tcpdns_maybe_restart_reading(isc_nmsocket_t *sock) { + if (!sock->client && sock->reading && + !uv_is_active(&sock->uv_handle.handle)) + { + /* + * Restart reading if we have less data in the send queue than + * the send buffer size, this means that the TCP client has + * started reading some data again. Starting reading when we go + * under the limit instead of waiting for all data has been + * flushed allows faster recovery (in case there was a + * congestion and now there isn't). + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write( + isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "resuming TCP connection, the other side " + "is reading the data again (%zu)", + write_queue_size); + isc__nm_start_reading(sock); + } + } +} + static void tcpdns_send_cb(uv_write_t *req, int status) { isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; @@ -1152,10 +1200,16 @@ tcpdns_send_cb(uv_write_t *req, int status) { isc__nm_incstats(sock, STATID_SENDFAIL); isc__nm_failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + + if (!sock->client && sock->reading) { + isc__nm_start_reading(sock); + isc__nmsocket_reset(sock); + } return; } isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); + tcpdns_maybe_restart_reading(sock); } /*