diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f95a51173e..6d2bd9ad9f 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -62,9 +62,10 @@ #endif /* - * The TCP receive buffer can fit one maximum sized DNS message plus its size, - * the receive buffer here affects TCP, DoT and DoH. + * The TCP send and receive buffers can fit one maximum sized DNS message plus + * its size, the receive buffer here affects TCP, DoT and DoH. */ +#define ISC_NETMGR_TCP_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) /* Pick the larger buffer */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 820099553d..7b09639440 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -2086,6 +2086,7 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) { sock = req->sock; + isc__nm_start_reading(sock); isc__nmsocket_reset(sock); } diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 16b53cc579..016a9e9059 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -905,6 +905,31 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { /* The readcb could have paused the reading */ if (atomic_load(&sock->reading)) { + if (!sock->client) { + /* + * Stop reading if we have accumulated enough bytes in + * the send queue; this means that the TCP client is not + * reading back the data we sending to it, and there's + * no reason to continue processing more incoming DNS + * messages, if the client is not reading back the + * responses. + */ + size_t write_queue_size = + uv_stream_get_write_queue_size( + &sock->uv_handle.stream); + + if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, + ISC_LOG_DEBUG(3), + "throttling TCP connection, " + "the other side is " + "not reading the data (%zu)", + write_queue_size); + isc__nm_stop_reading(sock); + } + } + /* The timer will be updated */ isc__nmsocket_timer_restart(sock); } @@ -1095,6 +1120,33 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region, return; } +static void +tcp_maybe_restart_reading(isc_nmsocket_t *sock) { + if (!sock->client && sock->reading && + !uv_is_active(&sock->uv_handle.handle)) + { + /* + * Restart reading if we have less data in the send queue than + * the send buffer size, this means that the TCP client has + * started reading some data again. Starting reading when we go + * under the limit instead of waiting for all data has been + * flushed allows faster recovery (in case there was a + * congestion and now there isn't). + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write( + isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "resuming TCP connection, the other side " + "is reading the data again (%zu)", + write_queue_size); + isc__nm_start_reading(sock); + } + } +} + static void tcp_send_cb(uv_write_t *req, int status) { isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; @@ -1112,10 +1164,16 @@ tcp_send_cb(uv_write_t *req, int status) { isc__nm_incstats(sock, STATID_SENDFAIL); isc__nm_failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + + if (!sock->client && sock->reading) { + isc__nm_start_reading(sock); + isc__nmsocket_reset(sock); + } return; } isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); + tcp_maybe_restart_reading(sock); } /* diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index dd3e9ec3fe..00ecb0f3d2 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -913,6 +913,27 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, result = isc__nm_process_sock_buffer(sock); if (result != ISC_R_SUCCESS) { isc__nm_failed_read_cb(sock, result, true); + } else if (!sock->client) { + /* + * Stop reading if we have accumulated enough bytes in + * the send queue; this means that the TCP client is not + * reading back the data we sending to it, and there's + * no reason to continue processing more incoming DNS + * messages, if the client is not reading back the + * responses. + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + + if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "throttling TCP connection, " + "the other side is " + "not reading the data (%zu)", + write_queue_size); + isc__nm_stop_reading(sock); + } } free: if (nread < 0) { @@ -1135,6 +1156,33 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, return; } +static void +tcpdns_maybe_restart_reading(isc_nmsocket_t *sock) { + if (!sock->client && sock->reading && + !uv_is_active(&sock->uv_handle.handle)) + { + /* + * Restart reading if we have less data in the send queue than + * the send buffer size, this means that the TCP client has + * started reading some data again. Starting reading when we go + * under the limit instead of waiting for all data has been + * flushed allows faster recovery (in case there was a + * congestion and now there isn't). + */ + size_t write_queue_size = + uv_stream_get_write_queue_size(&sock->uv_handle.stream); + if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) { + isc_log_write( + isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3), + "resuming TCP connection, the other side " + "is reading the data again (%zu)", + write_queue_size); + isc__nm_start_reading(sock); + } + } +} + static void tcpdns_send_cb(uv_write_t *req, int status) { isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data; @@ -1152,10 +1200,16 @@ tcpdns_send_cb(uv_write_t *req, int status) { isc__nm_incstats(sock, STATID_SENDFAIL); isc__nm_failed_send_cb(sock, uvreq, isc__nm_uverr2result(status)); + + if (!sock->client && sock->reading) { + isc__nm_start_reading(sock); + isc__nmsocket_reset(sock); + } return; } isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false); + tcpdns_maybe_restart_reading(sock); } /*