Throttle reading from TCP if the sends are not getting through

When TCP client would not read the DNS message sent to them, the TCP sends inside named would accumulate and cause degradation of the service. Throttle the reading from the TCP socket when we accumulate enough DNS data to be sent. Currently this is limited in a way that a single largest possible DNS message can fit into the buffer. (cherry picked from commit 26006f7b44474819fac2a76dc6cd6f69f0d76828)
2026-06-04 16:12:04 -04:00 · 2024-01-18 17:24:22 +01:00 · 2024-01-18 17:24:22 +01:00 · c6f13f12cd
commit c6f13f12cd
parent 139ff18da9
4 changed files with 116 additions and 2 deletions
--- a/lib/isc/netmgr/netmgr-int.h
+++ b/lib/isc/netmgr/netmgr-int.h
@ -62,9 +62,10 @@
 #endif

 /*
- * The TCP receive buffer can fit one maximum sized DNS message plus its size,
- * the receive buffer here affects TCP, DoT and DoH.
+ * The TCP send and receive buffers can fit one maximum sized DNS message plus
+ * its size, the receive buffer here affects TCP, DoT and DoH.
 */
+#define ISC_NETMGR_TCP_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)

 /* Pick the larger buffer */
--- a/lib/isc/netmgr/netmgr.c
+++ b/lib/isc/netmgr/netmgr.c
@ -2086,6 +2086,7 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) {

 	sock = req->sock;

+	isc__nm_start_reading(sock);
 	isc__nmsocket_reset(sock);
 }

--- a/lib/isc/netmgr/tcp.c
+++ b/lib/isc/netmgr/tcp.c
@ -905,6 +905,31 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {

 	/* The readcb could have paused the reading */
 	if (atomic_load(&sock->reading)) {
+		if (!sock->client) {
+			/*
+			 * Stop reading if we have accumulated enough bytes in
+			 * the send queue; this means that the TCP client is not
+			 * reading back the data we sending to it, and there's
+			 * no reason to continue processing more incoming DNS
+			 * messages, if the client is not reading back the
+			 * responses.
+			 */
+			size_t write_queue_size =
+				uv_stream_get_write_queue_size(
+					&sock->uv_handle.stream);
+
+			if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
+				isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+					      ISC_LOGMODULE_NETMGR,
+					      ISC_LOG_DEBUG(3),
+					      "throttling TCP connection, "
+					      "the other side is "
+					      "not reading the data (%zu)",
+					      write_queue_size);
+				isc__nm_stop_reading(sock);
+			}
+		}
+
 		/* The timer will be updated */
 		isc__nmsocket_timer_restart(sock);
 	}
@ -1095,6 +1120,33 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
 	return;
 }

+static void
+tcp_maybe_restart_reading(isc_nmsocket_t *sock) {
+	if (!sock->client && sock->reading &&
+	    !uv_is_active(&sock->uv_handle.handle))
+	{
+		/*
+		 * Restart reading if we have less data in the send queue than
+		 * the send buffer size, this means that the TCP client has
+		 * started reading some data again.  Starting reading when we go
+		 * under the limit instead of waiting for all data has been
+		 * flushed allows faster recovery (in case there was a
+		 * congestion and now there isn't).
+		 */
+		size_t write_queue_size =
+			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+		if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
+			isc_log_write(
+				isc_lctx, ISC_LOGCATEGORY_GENERAL,
+				ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+				"resuming TCP connection, the other side  "
+				"is reading the data again (%zu)",
+				write_queue_size);
+			isc__nm_start_reading(sock);
+		}
+	}
+}
+
 static void
 tcp_send_cb(uv_write_t *req, int status) {
 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
@ -1112,10 +1164,16 @@ tcp_send_cb(uv_write_t *req, int status) {
 		isc__nm_incstats(sock, STATID_SENDFAIL);
 		isc__nm_failed_send_cb(sock, uvreq,
 				       isc__nm_uverr2result(status));
+
+		if (!sock->client && sock->reading) {
+			isc__nm_start_reading(sock);
+			isc__nmsocket_reset(sock);
+		}
 		return;
 	}

 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+	tcp_maybe_restart_reading(sock);
 }

 /*
--- a/lib/isc/netmgr/tcpdns.c
+++ b/lib/isc/netmgr/tcpdns.c
@ -913,6 +913,27 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
 	result = isc__nm_process_sock_buffer(sock);
 	if (result != ISC_R_SUCCESS) {
 		isc__nm_failed_read_cb(sock, result, true);
+	} else if (!sock->client) {
+		/*
+		 * Stop reading if we have accumulated enough bytes in
+		 * the send queue; this means that the TCP client is not
+		 * reading back the data we sending to it, and there's
+		 * no reason to continue processing more incoming DNS
+		 * messages, if the client is not reading back the
+		 * responses.
+		 */
+		size_t write_queue_size =
+			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+
+		if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
+			isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+				      ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+				      "throttling TCP connection, "
+				      "the other side is "
+				      "not reading the data (%zu)",
+				      write_queue_size);
+			isc__nm_stop_reading(sock);
+		}
 	}
 free:
 	if (nread < 0) {
@ -1135,6 +1156,33 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
 	return;
 }

+static void
+tcpdns_maybe_restart_reading(isc_nmsocket_t *sock) {
+	if (!sock->client && sock->reading &&
+	    !uv_is_active(&sock->uv_handle.handle))
+	{
+		/*
+		 * Restart reading if we have less data in the send queue than
+		 * the send buffer size, this means that the TCP client has
+		 * started reading some data again.  Starting reading when we go
+		 * under the limit instead of waiting for all data has been
+		 * flushed allows faster recovery (in case there was a
+		 * congestion and now there isn't).
+		 */
+		size_t write_queue_size =
+			uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+		if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
+			isc_log_write(
+				isc_lctx, ISC_LOGCATEGORY_GENERAL,
+				ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+				"resuming TCP connection, the other side  "
+				"is reading the data again (%zu)",
+				write_queue_size);
+			isc__nm_start_reading(sock);
+		}
+	}
+}
+
 static void
 tcpdns_send_cb(uv_write_t *req, int status) {
 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
@ -1152,10 +1200,16 @@ tcpdns_send_cb(uv_write_t *req, int status) {
 		isc__nm_incstats(sock, STATID_SENDFAIL);
 		isc__nm_failed_send_cb(sock, uvreq,
 				       isc__nm_uverr2result(status));
+
+		if (!sock->client && sock->reading) {
+			isc__nm_start_reading(sock);
+			isc__nmsocket_reset(sock);
+		}
 		return;
 	}

 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+	tcpdns_maybe_restart_reading(sock);
 }

 /*