From 64265f1c0eeb816c0bc394ec00c735646de0957c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 1 Apr 2022 14:43:14 +0200 Subject: [PATCH] Add option to configure load balance sockets Previously, the option to enable kernel load balancing of the sockets was always enabled when supported by the operating system (SO_REUSEPORT on Linux and SO_REUSEPORT_LB on FreeBSD). It was reported that in scenarios where the networking threads are also responsible for processing long-running tasks (like RPZ processing, CATZ processing or large zone transfers), this could lead to intermitten brownouts for some clients, because the thread assigned by the operating system might be busy. In such scenarious, the overall performance would be better served by threads competing over the sockets because the idle threads can pick up the incoming traffic. Add new configuration option (`load-balance-sockets`) to allow enabling or disabling the load balancing of the sockets. (cherry picked from commit 85c6e797aa84dfeecb096e7ca3eafb85a5a45f3f) --- bin/named/config.c | 27 +++++++++----- bin/named/named.conf.rst | 1 + bin/named/server.c | 23 ++++++++++++ doc/arm/reference.rst | 23 ++++++++++++ doc/man/named.conf.5in | 1 + doc/misc/options | 1 + doc/misc/options.active | 1 + doc/misc/options.grammar.rst | 1 + lib/isc/include/isc/netmgr.h | 11 ++++++ lib/isc/netmgr/netmgr-int.h | 2 ++ lib/isc/netmgr/netmgr.c | 16 +++++++++ lib/isc/netmgr/tcp.c | 67 ++++++++++++++++++---------------- lib/isc/netmgr/tcpdns.c | 67 ++++++++++++++++++---------------- lib/isc/netmgr/tlsdns.c | 67 ++++++++++++++++++---------------- lib/isc/netmgr/udp.c | 70 +++++++++++++++++++----------------- lib/isccfg/namedconf.c | 1 + 16 files changed, 245 insertions(+), 134 deletions(-) diff --git a/bin/named/config.c b/bin/named/config.c index 9421c612c0..798f745581 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -59,18 +59,26 @@ options {\n\ edns-udp-size 1232;\n\ files unlimited;\n" #if defined(HAVE_GEOIP2) - " geoip-directory \"" MAXMINDDB_PREFIX "/share/" - "GeoIP\";" - "\n" + "\ + geoip-directory \"" MAXMINDDB_PREFIX "/share/GeoIP\";\n" #elif defined(HAVE_GEOIP2) - " geoip-directory \".\";\n" + "\ + geoip-directory \".\";\n" #endif /* if defined(HAVE_GEOIP2) */ "\ heartbeat-interval 60;\n\ interface-interval 60;\n\ # keep-response-order {none;};\n\ listen-on {any;};\n\ - listen-on-v6 {any;};\n\ + listen-on-v6 {any;};\n" +#if HAVE_SO_REUSEPORT_LB + "\ + load-balance-sockets yes;\n" +#else + "\ + load-balance-sockets no;\n" +#endif + "\ # lock-file \"" NAMED_LOCALSTATEDIR "/run/named/named.lock\";\n\ match-mapped-addresses no;\n\ max-ixfr-ratio 100%;\n\ @@ -85,10 +93,11 @@ options {\n\ port 53;\n\ tls-port 853;\n" #if HAVE_LIBNGHTTP2 - "http-port 80;\n" - "https-port 443;\n" - "http-listener-clients 300;\n" - "http-streams-per-connection 100;\n" + "\ + http-port 80;\n\ + https-port 443;\n\ + http-listener-clients 300;\n\ + http-streams-per-connection 100;\n" #endif "\ prefetch 2 9;\n\ diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index b2e007bc4a..523f3f2fd4 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -301,6 +301,7 @@ OPTIONS string ] { address_match_element; ... }; lmdb-mapsize sizeval; + load-balance-sockets boolean; lock-file ( quoted_string | none ); managed-keys-directory quoted_string; masterfile-format ( raw | text ); diff --git a/bin/named/server.c b/bin/named/server.c index 0187adbe93..0909887f27 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8465,6 +8465,7 @@ load_configuration(const char *filename, named_server_t *server, uint32_t softquota = 0; uint32_t max; uint64_t initial, idle, keepalive, advertised; + bool loadbalancesockets; dns_aclenv_t *env = ns_interfacemgr_getaclenv(named_g_server->interfacemgr); @@ -8972,6 +8973,28 @@ load_configuration(const char *filename, named_server_t *server, } ns_interfacemgr_setbacklog(server->interfacemgr, backlog); + obj = NULL; + result = named_config_get(maps, "load-balance-sockets", &obj); + INSIST(result == ISC_R_SUCCESS); + loadbalancesockets = cfg_obj_asboolean(obj); +#if HAVE_SO_REUSEPORT_LB + if (first_time) { + isc_nm_setloadbalancesockets(named_g_netmgr, + cfg_obj_asboolean(obj)); + } else if (loadbalancesockets != + isc_nm_getloadbalancesockets(named_g_netmgr)) { + cfg_obj_log(obj, named_g_lctx, ISC_LOG_WARNING, + "changing load-balance-sockets value requires " + "server restart"); + } +#else + if (loadbalancesockets) { + cfg_obj_log( + obj, named_g_lctx, ISC_LOG_WARNING, + "load-balance-sockets has no effect on this system"); + } +#endif + /* * Configure the interface manager according to the "listen-on" * statement. diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 9a98de4b00..060ae48af3 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -1726,6 +1726,29 @@ Boolean Options If ``yes``, respond to root key sentinel probes as described in draft-ietf-dnsop-kskroll-sentinel-08. The default is ``yes``. +``load-balance-sockets`` + + This option enables kernel load-balancing of sockets on systems which support + it, including Linux and FreeBSD. This instructs the kernel to distribute + incoming socket connections among the networking threads based on a hashing + scheme. For more information, see the receive network flow classification + options (``rx-flow-hash``) section in the ``ethtool`` manual page. The + default is ``yes``. + + Enabling ``load-balance-sockets`` significantly increases general throughput + when incoming traffic is distributed uniformly onto the threads by the + operating system. However, in cases where a worker thread is busy with a + long-lasting operation, such as processing a Response Policy Zone (RPZ) or + Catalog Zone update or an unusually large zone transfer, incoming traffic + that hashes onto that thread may be delayed. On servers where these events + occur frequently, it may be preferable to disable socket load-balancing so + that other threads can pick up the traffic that would have been sent to the + busy thread. + + Note: this option can only be set when ``named`` first starts. + Changes will not take effect during reconfiguration; the server + must be restarted. + ``message-compression`` If ``yes``, DNS name compression is used in responses to regular queries (not including AXFR or IXFR, which always use compression). diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 656b382d05..a33b5fb736 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -351,6 +351,7 @@ options { string ] { address_match_element; ... }; lmdb\-mapsize sizeval; + load\-balance\-sockets boolean; lock\-file ( quoted_string | none ); managed\-keys\-directory quoted_string; masterfile\-format ( raw | text ); diff --git a/doc/misc/options b/doc/misc/options index ca4beab2b8..731f52c79c 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -217,6 +217,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.active b/doc/misc/options.active index 9a6a705f7c..ed897fb225 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -216,6 +216,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index fa861b8806..d6fe2aa3e4 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -153,6 +153,7 @@ ] { ; ... }; lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 16eedb708f..8a29bce9ec 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -437,6 +437,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, * \li 'mgr' is a valid netmgr. */ +bool +isc_nm_getloadbalancesockets(isc_nm_t *mgr); +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled); +/*%< + * Get and set value of load balancing of the sockets. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index c90170911c..40aebd15b4 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -714,6 +714,8 @@ struct isc_nm { atomic_uint_fast32_t workers_paused; atomic_uint_fast32_t maxudp; + bool load_balance_sockets; + atomic_bool paused; /* diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index dd7147a55e..4cec777bd8 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -233,6 +233,11 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { atomic_init(&mgr->send_tcp_buffer_size, 0); atomic_init(&mgr->recv_udp_buffer_size, 0); atomic_init(&mgr->send_udp_buffer_size, 0); +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = true; +#else + mgr->load_balance_sockets = false; +#endif #ifdef NETMGR_TRACE ISC_LIST_INIT(mgr->active_sockets); @@ -575,6 +580,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, atomic_store(&mgr->send_udp_buffer_size, send_udp); } +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled) { + REQUIRE(VALID_NM(mgr)); + +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = enabled; +#else + UNUSED(enabled); +#endif +} + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised) { diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 9fc80c4d20..ad74cb05cb 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -362,7 +362,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcp_lb_socket(sa_family_t sa_family) { +isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -377,10 +377,10 @@ isc__nm_tcp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -405,12 +405,13 @@ start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcplisten(mgr, csock); @@ -458,9 +459,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -475,9 +476,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -509,6 +510,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -516,6 +518,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpsocket); REQUIRE(sock->parent != NULL); @@ -549,28 +552,30 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index 53c2f30ff2..18ce2d300f 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -325,7 +325,7 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { +isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -340,10 +340,10 @@ isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -378,12 +378,13 @@ start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcpdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock); @@ -424,9 +425,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -441,9 +442,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -476,6 +477,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -483,6 +485,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpdnssocket); REQUIRE(sock->parent != NULL); @@ -515,28 +518,30 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index 4864fcc477..a20398789e 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -391,7 +391,7 @@ failure: } static uv_os_sock_t -isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { +isc__nm_tlsdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -406,10 +406,10 @@ isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -438,12 +438,13 @@ start_tlsdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tlsdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tlsdnslisten(mgr, csock); @@ -495,9 +496,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tlsdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -512,9 +513,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tlsdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -547,6 +548,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -554,6 +556,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tlsdnssocket); REQUIRE(sock->parent != NULL); @@ -586,28 +589,30 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 3a43cd3b84..673c04157c 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -85,7 +85,7 @@ static void stop_udp_child(isc_nmsocket_t *sock); static uv_os_sock_t -isc__nm_udp_lb_socket(sa_family_t sa_family) { +isc__nm_udp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -99,10 +99,10 @@ isc__nm_udp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -124,12 +124,13 @@ start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->extrahandlesize = sock->extrahandlesize; csock->tid = tid; -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_udp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_udplisten(mgr, csock); @@ -175,9 +176,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_udp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -192,9 +193,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, start_udp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -420,6 +421,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int uv_init_flags = 0; sa_family_t sa_family; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -427,6 +429,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->parent != NULL); @@ -461,16 +464,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { uv_bind_flags |= UV_UDP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_udp_freebind(&sock->uv_handle.udp, - &sock->parent->iface.type.sa, uv_bind_flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { - /* This thread is first, bind the socket */ + if (mgr->load_balance_sockets) { r = isc_uv_udp_freebind(&sock->uv_handle.udp, &sock->parent->iface.type.sa, uv_bind_flags); @@ -478,13 +472,25 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.udp.flags = sock->uv_handle.udp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.udp.flags = sock->parent->uv_handle.udp.flags; + if (sock->parent->fd == -1) { + /* This thread is first, bind the socket */ + r = isc_uv_udp_freebind(&sock->uv_handle.udp, + &sock->parent->iface.type.sa, + uv_bind_flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.udp.flags = + sock->uv_handle.udp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.udp.flags = + sock->parent->uv_handle.udp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 04c555a5ac..e1a0f626b1 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1278,6 +1278,7 @@ static cfg_clausedef_t options_clauses[] = { { "keep-response-order", &cfg_type_bracketed_aml, 0 }, { "listen-on", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, { "listen-on-v6", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, + { "load-balance-sockets", &cfg_type_boolean, 0 }, { "lock-file", &cfg_type_qstringornone, 0 }, { "managed-keys-directory", &cfg_type_qstring, 0 }, { "match-mapped-addresses", &cfg_type_boolean, 0 },