diff --git a/bin/named/config.c b/bin/named/config.c index 9421c612c0..798f745581 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -59,18 +59,26 @@ options {\n\ edns-udp-size 1232;\n\ files unlimited;\n" #if defined(HAVE_GEOIP2) - " geoip-directory \"" MAXMINDDB_PREFIX "/share/" - "GeoIP\";" - "\n" + "\ + geoip-directory \"" MAXMINDDB_PREFIX "/share/GeoIP\";\n" #elif defined(HAVE_GEOIP2) - " geoip-directory \".\";\n" + "\ + geoip-directory \".\";\n" #endif /* if defined(HAVE_GEOIP2) */ "\ heartbeat-interval 60;\n\ interface-interval 60;\n\ # keep-response-order {none;};\n\ listen-on {any;};\n\ - listen-on-v6 {any;};\n\ + listen-on-v6 {any;};\n" +#if HAVE_SO_REUSEPORT_LB + "\ + load-balance-sockets yes;\n" +#else + "\ + load-balance-sockets no;\n" +#endif + "\ # lock-file \"" NAMED_LOCALSTATEDIR "/run/named/named.lock\";\n\ match-mapped-addresses no;\n\ max-ixfr-ratio 100%;\n\ @@ -85,10 +93,11 @@ options {\n\ port 53;\n\ tls-port 853;\n" #if HAVE_LIBNGHTTP2 - "http-port 80;\n" - "https-port 443;\n" - "http-listener-clients 300;\n" - "http-streams-per-connection 100;\n" + "\ + http-port 80;\n\ + https-port 443;\n\ + http-listener-clients 300;\n\ + http-streams-per-connection 100;\n" #endif "\ prefetch 2 9;\n\ diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index b2e007bc4a..523f3f2fd4 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -301,6 +301,7 @@ OPTIONS string ] { address_match_element; ... }; lmdb-mapsize sizeval; + load-balance-sockets boolean; lock-file ( quoted_string | none ); managed-keys-directory quoted_string; masterfile-format ( raw | text ); diff --git a/bin/named/server.c b/bin/named/server.c index 0187adbe93..0909887f27 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8465,6 +8465,7 @@ load_configuration(const char *filename, named_server_t *server, uint32_t softquota = 0; uint32_t max; uint64_t initial, idle, keepalive, advertised; + bool loadbalancesockets; dns_aclenv_t *env = ns_interfacemgr_getaclenv(named_g_server->interfacemgr); @@ -8972,6 +8973,28 @@ load_configuration(const char *filename, named_server_t *server, } ns_interfacemgr_setbacklog(server->interfacemgr, backlog); + obj = NULL; + result = named_config_get(maps, "load-balance-sockets", &obj); + INSIST(result == ISC_R_SUCCESS); + loadbalancesockets = cfg_obj_asboolean(obj); +#if HAVE_SO_REUSEPORT_LB + if (first_time) { + isc_nm_setloadbalancesockets(named_g_netmgr, + cfg_obj_asboolean(obj)); + } else if (loadbalancesockets != + isc_nm_getloadbalancesockets(named_g_netmgr)) { + cfg_obj_log(obj, named_g_lctx, ISC_LOG_WARNING, + "changing load-balance-sockets value requires " + "server restart"); + } +#else + if (loadbalancesockets) { + cfg_obj_log( + obj, named_g_lctx, ISC_LOG_WARNING, + "load-balance-sockets has no effect on this system"); + } +#endif + /* * Configure the interface manager according to the "listen-on" * statement. diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 9a98de4b00..060ae48af3 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -1726,6 +1726,29 @@ Boolean Options If ``yes``, respond to root key sentinel probes as described in draft-ietf-dnsop-kskroll-sentinel-08. The default is ``yes``. +``load-balance-sockets`` + + This option enables kernel load-balancing of sockets on systems which support + it, including Linux and FreeBSD. This instructs the kernel to distribute + incoming socket connections among the networking threads based on a hashing + scheme. For more information, see the receive network flow classification + options (``rx-flow-hash``) section in the ``ethtool`` manual page. The + default is ``yes``. + + Enabling ``load-balance-sockets`` significantly increases general throughput + when incoming traffic is distributed uniformly onto the threads by the + operating system. However, in cases where a worker thread is busy with a + long-lasting operation, such as processing a Response Policy Zone (RPZ) or + Catalog Zone update or an unusually large zone transfer, incoming traffic + that hashes onto that thread may be delayed. On servers where these events + occur frequently, it may be preferable to disable socket load-balancing so + that other threads can pick up the traffic that would have been sent to the + busy thread. + + Note: this option can only be set when ``named`` first starts. + Changes will not take effect during reconfiguration; the server + must be restarted. + ``message-compression`` If ``yes``, DNS name compression is used in responses to regular queries (not including AXFR or IXFR, which always use compression). diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 656b382d05..a33b5fb736 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -351,6 +351,7 @@ options { string ] { address_match_element; ... }; lmdb\-mapsize sizeval; + load\-balance\-sockets boolean; lock\-file ( quoted_string | none ); managed\-keys\-directory quoted_string; masterfile\-format ( raw | text ); diff --git a/doc/misc/options b/doc/misc/options index ca4beab2b8..731f52c79c 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -217,6 +217,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.active b/doc/misc/options.active index 9a6a705f7c..ed897fb225 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -216,6 +216,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index fa861b8806..d6fe2aa3e4 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -153,6 +153,7 @@ ] { ; ... }; lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 16eedb708f..8a29bce9ec 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -437,6 +437,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, * \li 'mgr' is a valid netmgr. */ +bool +isc_nm_getloadbalancesockets(isc_nm_t *mgr); +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled); +/*%< + * Get and set value of load balancing of the sockets. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index c90170911c..40aebd15b4 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -714,6 +714,8 @@ struct isc_nm { atomic_uint_fast32_t workers_paused; atomic_uint_fast32_t maxudp; + bool load_balance_sockets; + atomic_bool paused; /* diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index dd7147a55e..4cec777bd8 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -233,6 +233,11 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { atomic_init(&mgr->send_tcp_buffer_size, 0); atomic_init(&mgr->recv_udp_buffer_size, 0); atomic_init(&mgr->send_udp_buffer_size, 0); +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = true; +#else + mgr->load_balance_sockets = false; +#endif #ifdef NETMGR_TRACE ISC_LIST_INIT(mgr->active_sockets); @@ -575,6 +580,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, atomic_store(&mgr->send_udp_buffer_size, send_udp); } +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled) { + REQUIRE(VALID_NM(mgr)); + +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = enabled; +#else + UNUSED(enabled); +#endif +} + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised) { diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 9fc80c4d20..ad74cb05cb 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -362,7 +362,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcp_lb_socket(sa_family_t sa_family) { +isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -377,10 +377,10 @@ isc__nm_tcp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -405,12 +405,13 @@ start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcplisten(mgr, csock); @@ -458,9 +459,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -475,9 +476,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -509,6 +510,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -516,6 +518,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpsocket); REQUIRE(sock->parent != NULL); @@ -549,28 +552,30 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index 53c2f30ff2..18ce2d300f 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -325,7 +325,7 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { +isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -340,10 +340,10 @@ isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -378,12 +378,13 @@ start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcpdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock); @@ -424,9 +425,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -441,9 +442,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -476,6 +477,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -483,6 +485,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpdnssocket); REQUIRE(sock->parent != NULL); @@ -515,28 +518,30 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index 4864fcc477..a20398789e 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -391,7 +391,7 @@ failure: } static uv_os_sock_t -isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { +isc__nm_tlsdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -406,10 +406,10 @@ isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -438,12 +438,13 @@ start_tlsdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tlsdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tlsdnslisten(mgr, csock); @@ -495,9 +496,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tlsdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -512,9 +513,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tlsdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -547,6 +548,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -554,6 +556,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tlsdnssocket); REQUIRE(sock->parent != NULL); @@ -586,28 +589,30 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 3a43cd3b84..673c04157c 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -85,7 +85,7 @@ static void stop_udp_child(isc_nmsocket_t *sock); static uv_os_sock_t -isc__nm_udp_lb_socket(sa_family_t sa_family) { +isc__nm_udp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -99,10 +99,10 @@ isc__nm_udp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -124,12 +124,13 @@ start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->extrahandlesize = sock->extrahandlesize; csock->tid = tid; -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_udp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_udplisten(mgr, csock); @@ -175,9 +176,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_udp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -192,9 +193,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, start_udp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -420,6 +421,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int uv_init_flags = 0; sa_family_t sa_family; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -427,6 +429,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->parent != NULL); @@ -461,16 +464,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { uv_bind_flags |= UV_UDP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_udp_freebind(&sock->uv_handle.udp, - &sock->parent->iface.type.sa, uv_bind_flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { - /* This thread is first, bind the socket */ + if (mgr->load_balance_sockets) { r = isc_uv_udp_freebind(&sock->uv_handle.udp, &sock->parent->iface.type.sa, uv_bind_flags); @@ -478,13 +472,25 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.udp.flags = sock->uv_handle.udp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.udp.flags = sock->parent->uv_handle.udp.flags; + if (sock->parent->fd == -1) { + /* This thread is first, bind the socket */ + r = isc_uv_udp_freebind(&sock->uv_handle.udp, + &sock->parent->iface.type.sa, + uv_bind_flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.udp.flags = + sock->uv_handle.udp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.udp.flags = + sock->parent->uv_handle.udp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 04c555a5ac..e1a0f626b1 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1278,6 +1278,7 @@ static cfg_clausedef_t options_clauses[] = { { "keep-response-order", &cfg_type_bracketed_aml, 0 }, { "listen-on", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, { "listen-on-v6", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, + { "load-balance-sockets", &cfg_type_boolean, 0 }, { "lock-file", &cfg_type_qstringornone, 0 }, { "managed-keys-directory", &cfg_type_qstring, 0 }, { "match-mapped-addresses", &cfg_type_boolean, 0 },