From c6c0a9fdba9ffb0a55f328f39ef847cd0fa7a340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Tue, 7 Jan 2020 11:00:15 +0100 Subject: [PATCH 1/5] Add isc_uv_export()/isc_uv_import() functions to libuv compatibility layer. These functions can be used to pass a uv handle between threads in a safe manner. The other option is to use uv_pipe and pass the uv_handle via IPC, which is way more complex. uv_export() and uv_import() functions existed in libuv at some point but were removed later. This code is based on the original removed code. The Windows version of the code uses two functions internal to libuv; a patch for libuv is attached for exporting these functions. --- lib/isc/Makefile.in | 2 +- lib/isc/netmgr/Makefile.in | 4 +- lib/isc/netmgr/uv-compat.c | 168 ++++++++++++++++++++++++++++++++ lib/isc/netmgr/uv-compat.h | 49 ++++++++-- lib/isc/win32/libisc.def.in | 2 + lib/isc/win32/libisc.vcxproj.in | 1 + util/copyrights | 2 + win32utils/libuv.diff | 27 +++++ 8 files changed, 246 insertions(+), 9 deletions(-) create mode 100644 lib/isc/netmgr/uv-compat.c create mode 100644 win32utils/libuv.diff diff --git a/lib/isc/Makefile.in b/lib/isc/Makefile.in index 906ebd3e69..a8986c5a45 100644 --- a/lib/isc/Makefile.in +++ b/lib/isc/Makefile.in @@ -55,7 +55,7 @@ OBJS = pk11.@O@ pk11_result.@O@ \ lex.@O@ lfsr.@O@ lib.@O@ log.@O@ \ md.@O@ mem.@O@ mutexblock.@O@ \ netmgr/netmgr.@O@ netmgr/tcp.@O@ netmgr/udp.@O@ \ - netmgr/tcpdns.@O@ netmgr/uverr2result.@O@ \ + netmgr/tcpdns.@O@ netmgr/uverr2result.@O@ netmgr/uv-compat.@O@ \ netaddr.@O@ netscope.@O@ nonce.@O@ openssl_shim.@O@ pool.@O@ \ parseint.@O@ portset.@O@ queue.@O@ quota.@O@ \ radix.@O@ random.@O@ ratelimiter.@O@ \ diff --git a/lib/isc/netmgr/Makefile.in b/lib/isc/netmgr/Makefile.in index 74f0439521..f58caf5071 100644 --- a/lib/isc/netmgr/Makefile.in +++ b/lib/isc/netmgr/Makefile.in @@ -25,10 +25,10 @@ CDEFINES = CWARNINGS = # Alphabetically -OBJS = netmgr.@O@ tcp.@O@ udp.@O@ tcpdns.@O@ uverr2result.@O@ +OBJS = netmgr.@O@ tcp.@O@ udp.@O@ tcpdns.@O@ uverr2result.@O@ uv-compat.@O@ # Alphabetically -SRCS = netmgr.c tcp.c udp.c tcpdns.c uverr2result.c +SRCS = netmgr.c tcp.c udp.c tcpdns.c uverr2result.c uv-compat.c TARGETS = ${OBJS} diff --git a/lib/isc/netmgr/uv-compat.c b/lib/isc/netmgr/uv-compat.c new file mode 100644 index 0000000000..101c3c7201 --- /dev/null +++ b/lib/isc/netmgr/uv-compat.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include "uv-compat.h" + +/* + * XXXWPK: This code goes into libuv internals and it's platform dependent. + * It's ugly, we shouldn't do it, but the alternative with passing sockets + * over IPC sockets is even worse, and causes all kind of different + * problems. We should try to push these things upstream. + */ + +#ifdef WIN32 +/* This code is adapted from libuv/src/win/internal.h */ +typedef enum { + UV__IPC_SOCKET_XFER_NONE = 0, + UV__IPC_SOCKET_XFER_TCP_CONNECTION, + UV__IPC_SOCKET_XFER_TCP_SERVER +} uv__ipc_socket_xfer_type_t; + +typedef struct { + WSAPROTOCOL_INFOW socket_info; + uint32_t delayed_error; +} uv__ipc_socket_xfer_info_t; + +int +uv__tcp_xfer_import(uv_tcp_t *tcp, uv__ipc_socket_xfer_type_t xfer_type, + uv__ipc_socket_xfer_info_t *xfer_info); +int +uv__tcp_xfer_export(uv_tcp_t *handle, int pid, + uv__ipc_socket_xfer_info_t *xfer_info); + +int +isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info) { + if (stream->type != UV_TCP) { + return (-1); + } + if (uv__tcp_xfer_export((uv_tcp_t *) stream, GetCurrentProcessId(), + &info->socket_info) == -1) { + return (-1); + } + + info->type = UV_TCP; +} + +int +isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info) { + uv__ipc_socket_xfer_info_t xfer_info; + + if (stream->type != UV_TCP || info->type != UV_TCP) { + return (-1); + } + xfer_info.socket_info = info->socket_info; + + return (uv__tcp_xfer_import((uv_tcp_t *) stream, + UV__IPC_SOCKET_XFER_TCP_SERVER, + &xfer_info)); +} +#else /* WIN32 */ +/* Adapted from libuv/src/unix/internal.h */ +#include +#include + +static int +isc_uv__cloexec(int fd, int set) { + int r; + + /* + * This #ifdef is taken directly from the libuv sources. + * We use FIOCLEX and FIONCLEX ioctl() calls when possible, + * but on some platforms are not implemented, or defined but + * not implemented correctly. On those, we use the FD_CLOEXEC + * fcntl() call, which adds extra system call overhead, but + * works. + */ +#if defined(_AIX) || \ + defined(__APPLE__) || \ + defined(__DragonFly__) || \ + defined(__FreeBSD__) || \ + defined(__FreeBSD_kernel__) || \ + defined(__linux__) || \ + defined(__OpenBSD__) || \ + defined(__NetBSD__) + do { + r = ioctl(fd, set ? FIOCLEX : FIONCLEX); + } while (r == -1 && errno == EINTR); +#else /* FIOCLEX/FIONCLEX unsupported */ + int flags; + + do { + r = fcntl(fd, F_GETFD); + } while (r == -1 && errno == EINTR); + + if (r == -1) { + return (-1); + } + + if (!!(r & FD_CLOEXEC) == !!set) { + return (0); + } + + if (set) { + flags = r | FD_CLOEXEC; + } else { + flags = r & ~FD_CLOEXEC; + } + + do { + r = fcntl(fd, F_SETFD, flags); + } while (r == -1 && errno == EINTR); +#endif /* FIOCLEX/FIONCLEX unsupported */ + + if (r != 0) { + return (-1); + } + + return (0); +} + +int +isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info) { + int oldfd, fd; + int err; + + if (stream->type != UV_TCP) { + return (-1); + } + err = uv_fileno((uv_handle_t *) stream, (uv_os_fd_t *) &oldfd); + + if (err != 0) { + return (err); + } + + fd = dup(oldfd); + if (fd == -1) { + return (-1); + } + + err = isc_uv__cloexec(fd, 1); + if (err != 0) { + close(fd); + return (err); + } + + info->type = stream->type; + info->fd = fd; + return (0); +} + +int +isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info) { + if (info->type != UV_TCP) { + return (-1); + } + + uv_tcp_t *tcp = (uv_tcp_t *) stream; + return (uv_tcp_open(tcp, info->fd)); +} +#endif diff --git a/lib/isc/netmgr/uv-compat.h b/lib/isc/netmgr/uv-compat.h index 6ec2f3237b..092c87976c 100644 --- a/lib/isc/netmgr/uv-compat.h +++ b/lib/isc/netmgr/uv-compat.h @@ -13,22 +13,59 @@ #include /* - * Those functions were introduced in newer libuv, we still + * These functions were introduced in newer libuv, but we still * want BIND9 compile on older ones so we emulate them. * They're inline to avoid conflicts when running with a newer * library version. */ #ifndef HAVE_UV_HANDLE_GET_DATA -static inline void* -uv_handle_get_data(const uv_handle_t* handle) { - return (handle->data); +static inline void * +uv_handle_get_data(const uv_handle_t *handle) { + return (handle->data); } #endif #ifndef HAVE_UV_HANDLE_SET_DATA static inline void -uv_handle_set_data(uv_handle_t* handle, void* data) { - handle->data = data; +uv_handle_set_data(uv_handle_t *handle, void *data) { + handle->data = data; }; #endif + +/* + * These functions are not available in libuv, but they're very internal + * to libuv. We should try to get them merged upstream. + */ + +/* + * A sane way to pass listening TCP socket to child threads, without using + * IPC (as the libuv example shows) but a version of the uv_export() and + * uv_import() functions that were unfortunately removed from libuv. + * This is based on the original libuv code. + */ + +typedef struct isc_uv_stream_info_s isc_uv_stream_info_t; + +struct isc_uv_stream_info_s { + uv_handle_type type; +#ifdef WIN32 + WSAPROTOCOL_INFOW socket_info; +#else + int fd; +#endif +}; + +int +isc_uv_export(uv_stream_t *stream, isc_uv_stream_info_t *info); +/*%< + * Exports uv_stream_t as isc_uv_stream_info_t value, which could + * be used to initialize shared streams within the same process. + */ + +int +isc_uv_import(uv_stream_t *stream, isc_uv_stream_info_t *info); +/*%< + * Imports uv_stream_info_t value into uv_stream_t to initialize a + * shared stream. + */ diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index fa20fe059d..618737983a 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -683,6 +683,8 @@ isc_timermgr_destroy isc_timermgr_poke isc_tm_timegm isc_tm_strptime +isc_uv_export +isc_uv_import isc_win32os_versioncheck openlog @IF PKCS11 diff --git a/lib/isc/win32/libisc.vcxproj.in b/lib/isc/win32/libisc.vcxproj.in index e41b625e3a..9fd2d149fe 100644 --- a/lib/isc/win32/libisc.vcxproj.in +++ b/lib/isc/win32/libisc.vcxproj.in @@ -446,6 +446,7 @@ copy InstallFiles ..\Build\Release\ + diff --git a/util/copyrights b/util/copyrights index a675fce20b..d15bd6424f 100644 --- a/util/copyrights +++ b/util/copyrights @@ -2257,6 +2257,7 @@ ./lib/isc/netmgr/tcp.c C 2019,2020 ./lib/isc/netmgr/tcpdns.c C 2019,2020 ./lib/isc/netmgr/udp.c C 2019,2020 +./lib/isc/netmgr/uv-compat.c C 2020 ./lib/isc/netmgr/uv-compat.h C 2019,2020 ./lib/isc/netmgr/uverr2result.c C 2019,2020 ./lib/isc/netscope.c C 2002,2004,2005,2006,2007,2016,2018,2019,2020 @@ -2604,3 +2605,4 @@ ./win32utils/GeoIP.diff X 2013,2018,2019,2020 ./win32utils/bind9.sln.in X 2013,2014,2015,2016,2017,2018,2019,2020 ./win32utils/index.html HTML 2006,2007,2008,2012,2013,2014,2015,2016,2018,2019,2020 +./win32utils/libuv.diff X 2020 diff --git a/win32utils/libuv.diff b/win32utils/libuv.diff new file mode 100644 index 0000000000..41f3796ccc --- /dev/null +++ b/win32utils/libuv.diff @@ -0,0 +1,27 @@ +To make TCP listening properly multithreaded, we need to have the +uv_export() and uv_import() functions that were removed from libuv. +The alternative is passing sockets over IPC, which is complicated and +error prone. + +To make it simple, we export two internal functions from libuv; they will +be used in lib/isc/netmgr/uv-compat.c by our versions of the uv_export() +and uv_import() functions. + +diff --git a/src/win/internal.h b/src/win/internal.h +index 058ddb8e..a9dc4168 100644 +--- a/src/win/internal.h ++++ b/src/win/internal.h +@@ -92,11 +92,11 @@ void uv_process_tcp_connect_req(uv_loop_t* loop, uv_tcp_t* handle, + void uv_tcp_close(uv_loop_t* loop, uv_tcp_t* tcp); + void uv_tcp_endgame(uv_loop_t* loop, uv_tcp_t* handle); + +-int uv__tcp_xfer_export(uv_tcp_t* handle, ++UV_EXTERN int uv__tcp_xfer_export(uv_tcp_t* handle, + int pid, + uv__ipc_socket_xfer_type_t* xfer_type, + uv__ipc_socket_xfer_info_t* xfer_info); +-int uv__tcp_xfer_import(uv_tcp_t* tcp, ++UV_EXTERN int uv__tcp_xfer_import(uv_tcp_t* tcp, + uv__ipc_socket_xfer_type_t xfer_type, + uv__ipc_socket_xfer_info_t* xfer_info); + From 67c1ca9a790b7fb9eb18181b754592582a20c68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Tue, 7 Jan 2020 12:02:41 +0100 Subject: [PATCH 2/5] Use isc_uv_export() to pass bound TCP listening socket to child listeners. For multithreaded TCP listening we need to pass a bound socket to all listening threads. Instead of using uv_pipe handle passing method which is quite complex (lots of callbacks, each of them with its own error handling) we now use isc_uv_export() to export the socket, pass it as a member of the isc__netievent_tcpchildlisten_t structure, and then isc_uv_import() it in the child thread, simplifying the process significantly. --- lib/isc/netmgr/netmgr-int.h | 16 +-- lib/isc/netmgr/netmgr.c | 16 --- lib/isc/netmgr/tcp.c | 203 ++++-------------------------------- 3 files changed, 33 insertions(+), 202 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 235fbd2288..68f3f399b8 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -29,6 +29,7 @@ #include #include #include +#include "uv-compat.h" #define ISC_NETMGR_TID_UNKNOWN -1 @@ -208,9 +209,17 @@ typedef struct isc__netievent__socket_req { typedef isc__netievent__socket_req_t isc__netievent_tcpconnect_t; typedef isc__netievent__socket_req_t isc__netievent_tcplisten_t; -typedef isc__netievent__socket_req_t isc__netievent_tcpchildlisten_t; typedef isc__netievent__socket_req_t isc__netievent_tcpsend_t; +typedef struct isc__netievent__socket_streaminfo { + isc__netievent_type type; + isc_nmsocket_t *sock; + isc_uv_stream_info_t streaminfo; +} isc__netievent__socket_streaminfo_t; + +typedef isc__netievent__socket_streaminfo_t isc__netievent_tcpchildlisten_t; + + typedef struct isc__netievent__socket_handle { isc__netievent_type type; isc_nmsocket_t *sock; @@ -348,11 +357,6 @@ struct isc_nmsocket { isc_nmiface_t *iface; isc_nmhandle_t *tcphandle; - /*% Used to transfer listening TCP sockets to children */ - uv_pipe_t ipc; - char ipc_pipe_name[64]; - atomic_int_fast32_t schildren; - /*% Extra data allocated at the end of each isc_nmhandle_t */ size_t extrahandlesize; diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index fe1f898cb2..90a2015bc4 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -43,12 +43,6 @@ ISC_THREAD_LOCAL int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN; -#ifdef WIN32 -#define NAMED_PIPE_PATTERN "\\\\.\\pipe\\named-%d-%u.pipe" -#else -#define NAMED_PIPE_PATTERN "/tmp/named-%d-%u.pipe" -#endif - static void nmsocket_maybe_destroy(isc_nmsocket_t *sock); static void @@ -856,16 +850,6 @@ isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, sock->ah_handles[i] = NULL; } - /* - * Use a random number in the named pipe name. Also add getpid() - * to the name to make sure we don't get a conflict between - * different unit tests running at the same time, where the PRNG - * is initialized to a constant seed. - */ - snprintf(sock->ipc_pipe_name, sizeof(sock->ipc_pipe_name), - NAMED_PIPE_PATTERN, getpid(), isc_random32()); - sock->ipc_pipe_name[sizeof(sock->ipc_pipe_name) - 1] = '\0'; - isc_mutex_init(&sock->lock); isc_condition_init(&sock->cond); isc_refcount_init(&sock->references, 1); diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index d6ba3f4a99..46c612ab83 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -52,16 +52,6 @@ read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); static void tcp_close_cb(uv_handle_t *uvhandle); -static void -ipc_connection_cb(uv_stream_t *stream, int status); -static void -ipc_write_cb(uv_write_t *uvreq, int status); -static void -ipc_close_cb(uv_handle_t *handle); -static void -childlisten_ipc_connect_cb(uv_connect_t *uvreq, int status); -static void -childlisten_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf); static void stoplistening(isc_nmsocket_t *sock); static void @@ -206,25 +196,6 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, } } -#ifndef WIN32 -/* - * Run fsync() on the directory containing a socket's IPC pipe, to - * ensure that all threads can see the pipe. - */ -static void -syncdir(const isc_nmsocket_t *sock) { - char *pipe = isc_mem_strdup(sock->mgr->mctx, sock->ipc_pipe_name); - int fd = open(dirname(pipe), O_RDONLY); - - RUNTIME_CHECK(fd >= 0); - - isc_mem_free(sock->mgr->mctx, pipe); - - fsync(fd); - close(fd); -} -#endif - /* * For multi-threaded TCP listening, we create a single "parent" socket, * bind to it, and then pass its uv_handle to a set of child sockets, one @@ -241,7 +212,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tcplisten_t *ievent = (isc__netievent_tcplisten_t *) ev0; isc_nmsocket_t *sock = ievent->sock; - int r; + struct sockaddr_storage sname; + int r, snamelen = sizeof(sname); REQUIRE(isc__nm_in_netthread()); REQUIRE(sock->type == isc_nm_tcplistener); @@ -278,45 +250,25 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock->result = isc__nm_uverr2result(r); goto done; } + + /* + * By doing this now, we can find out immediately whether bind() + * failed, and quit if so. (uv_bind() uses a delayed error, + * initially returning success even if bind() fails, and this + * could cause a deadlock later if we didn't check first.) + */ + r = uv_tcp_getsockname(&sock->uv_handle.tcp, &sname, &snamelen); + if (r != 0) { + uv_close(&sock->uv_handle.handle, tcp_close_cb); + sock->result = isc__nm_uverr2result(r); + goto done; + } + uv_handle_set_data(&sock->uv_handle.handle, sock); /* - * uv_pipe_init() is incorrectly documented in libuv, and the - * example in benchmark-multi-accept.c is also wrong. - * - * The third parameter ('ipc') indicates that the pipe will be - * used to *send* a handle to other threads. Therefore, it must - * must be set to 0 for a 'listening' IPC socket, and 1 - * only for sockets that are really passing FDs between - * threads. - */ - r = uv_pipe_init(&worker->loop, &sock->ipc, 0); - RUNTIME_CHECK(r == 0); - - uv_handle_set_data((uv_handle_t *)&sock->ipc, sock); - r = uv_pipe_bind(&sock->ipc, sock->ipc_pipe_name); - RUNTIME_CHECK(r == 0); - - r = uv_listen((uv_stream_t *) &sock->ipc, sock->nchildren, - ipc_connection_cb); - RUNTIME_CHECK(r == 0); - -#ifndef WIN32 - /* - * On Unices a child thread might not see the pipe yet; - * that happened quite often in unit tests on FreeBSD. - * Syncing the directory ensures that the pipe is visible - * to everyone. - * This isn't done on Windows because named pipes exist - * within a different namespace, not on VFS. - */ - syncdir(sock); -#endif - - /* - * For each worker we send a 'tcpchildlisten' event. The child - * listener will then receive its version of the socket - * uv_handle via IPC in isc__nm_async_tcpchildlisten(). + * For each worker, we send a 'tcpchildlisten' event with + * the exported socket. */ for (int i = 0; i < sock->nchildren; i++) { isc_nmsocket_t *csock = &sock->children[i]; @@ -324,6 +276,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { event = isc__nm_get_ievent(csock->mgr, netievent_tcpchildlisten); + isc_uv_export(&sock->uv_handle.stream, &event->streaminfo); event->sock = csock; if (csock->tid == isc_nm_tid()) { isc__nm_async_tcpchildlisten(&sock->mgr->workers[i], @@ -344,67 +297,6 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { return; } -/* - * The parent received an IPC connection from a child and can now send - * the uv_handle to the child for listening. - */ -static void -ipc_connection_cb(uv_stream_t *stream, int status) { - isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *) stream); - isc__networker_t *worker = &sock->mgr->workers[isc_nm_tid()]; - isc__nm_uvreq_t *nreq = isc__nm_uvreq_get(sock->mgr, sock); - int r; - - REQUIRE(status == 0); - - /* - * The buffer can be anything, it will be ignored, but it has to - * be something that won't disappear. - */ - nreq->uvbuf = uv_buf_init((char *)nreq, 1); - uv_pipe_init(&worker->loop, &nreq->ipc, 1); - uv_handle_set_data((uv_handle_t *)&nreq->ipc, nreq); - - r = uv_accept((uv_stream_t *) &sock->ipc, (uv_stream_t *) &nreq->ipc); - RUNTIME_CHECK(r == 0); - - r = uv_write2(&nreq->uv_req.write, - (uv_stream_t *) &nreq->ipc, &nreq->uvbuf, 1, - (uv_stream_t *) &sock->uv_handle.stream, ipc_write_cb); - RUNTIME_CHECK(r == 0); -} - -/* - * The call to send a socket uv_handle is complete; we may be able - * to close the IPC connection. - */ -static void -ipc_write_cb(uv_write_t *uvreq, int status) { - isc__nm_uvreq_t *req = uvreq->data; - - UNUSED(status); - - /* - * We want all children to get the socket. Once that's done, - * we can stop listening on the IPC socket. - */ - if (atomic_fetch_add(&req->sock->schildren, 1) == - req->sock->nchildren - 1) - { - uv_close((uv_handle_t *) &req->sock->ipc, NULL); - } - uv_close((uv_handle_t *) &req->ipc, ipc_close_cb); -} - -/* - * The IPC socket is closed: free its resources. - */ -static void -ipc_close_cb(uv_handle_t *handle) { - isc__nm_uvreq_t *req = uv_handle_get_data(handle); - isc__nm_uvreq_put(&req, req->sock); -} - /* * Connect to the parent socket and be ready to receive the uv_handle * for the socket we'll be listening on. @@ -414,73 +306,24 @@ isc__nm_async_tcpchildlisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__netievent_tcpchildlisten_t *ievent = (isc__netievent_tcpchildlisten_t *) ev0; isc_nmsocket_t *sock = ievent->sock; - isc__nm_uvreq_t *req = NULL; int r; REQUIRE(isc__nm_in_netthread()); REQUIRE(sock->type == isc_nm_tcpchildlistener); - r = uv_pipe_init(&worker->loop, &sock->ipc, 1); - RUNTIME_CHECK(r == 0); - - uv_handle_set_data((uv_handle_t *) &sock->ipc, sock); - - req = isc__nm_uvreq_get(sock->mgr, sock); - uv_pipe_connect(&req->uv_req.connect, &sock->ipc, - sock->parent->ipc_pipe_name, - childlisten_ipc_connect_cb); -} - -/* Child is now connected to parent via IPC and can begin reading. */ -static void -childlisten_ipc_connect_cb(uv_connect_t *uvreq, int status) { - isc__nm_uvreq_t *req = uvreq->data; - isc_nmsocket_t *sock = req->sock; - int r; - - UNUSED(status); - - isc__nm_uvreq_put(&req, sock); - - r = uv_read_start((uv_stream_t *) &sock->ipc, isc__nm_alloc_cb, - childlisten_read_cb); - RUNTIME_CHECK(r == 0); -} - -/* - * Child has received the socket uv_handle via IPC, and can now begin - * listening for connections and can close the IPC socket. - */ -static void -childlisten_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { - isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *) stream); - isc__networker_t *worker = NULL; - uv_pipe_t *ipc = NULL; - uv_handle_type type; - int r; - - UNUSED(nread); - - REQUIRE(VALID_NMSOCK(sock)); - REQUIRE(buf != NULL); - - ipc = (uv_pipe_t *) stream; - type = uv_pipe_pending_type(ipc); - INSIST(type == UV_TCP); - - isc__nm_free_uvbuf(sock, buf); worker = &sock->mgr->workers[isc_nm_tid()]; + uv_tcp_init(&worker->loop, (uv_tcp_t *) &sock->uv_handle.tcp); uv_handle_set_data(&sock->uv_handle.handle, sock); + isc_uv_import(&sock->uv_handle.stream, &ievent->streaminfo); - uv_accept(stream, &sock->uv_handle.stream); r = uv_listen((uv_stream_t *) &sock->uv_handle.tcp, sock->backlog, tcp_connection_cb); - uv_close((uv_handle_t *) ipc, NULL); + if (r != 0) { isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, - "IPC socket close failed: %s", + "uv_listen failed: %s", isc_result_totext(isc__nm_uverr2result(r))); return; } From e38004457c504a40b9662644b569fdebde16e220 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 10 Jan 2020 14:25:30 -0800 Subject: [PATCH 3/5] netmgr fixes: - use UV_{TC,UD}P_IPV6ONLY for IPv6 sockets, keeping the pre-netmgr behaviour. - add a new listening_error bool flag which is set if the child listener fails to start listening. This fixes a bug where named would hang if, e.g., we failed to bind to a TCP socket. --- lib/isc/netmgr/netmgr-int.h | 1 + lib/isc/netmgr/tcp.c | 17 +++++++++++++---- lib/isc/netmgr/udp.c | 6 +++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 68f3f399b8..a451012e16 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -390,6 +390,7 @@ struct isc_nmsocket { */ atomic_bool closed; atomic_bool listening; + atomic_bool listen_error; isc_refcount_t references; /*% diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 46c612ab83..bab631533f 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -180,7 +180,8 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, (isc__netievent_t *) ievent); LOCK(&nsock->lock); - while (!atomic_load(&nsock->listening)) { + while (!atomic_load(&nsock->listening) && + !atomic_load(&nsock->listen_error)) { WAIT(&nsock->cond, &nsock->lock); } UNLOCK(&nsock->lock); @@ -213,7 +214,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { (isc__netievent_tcplisten_t *) ev0; isc_nmsocket_t *sock = ievent->sock; struct sockaddr_storage sname; - int r, snamelen = sizeof(sname); + int r, flags = 0, snamelen = sizeof(sname); REQUIRE(isc__nm_in_netthread()); REQUIRE(sock->type == isc_nm_tcplistener); @@ -241,13 +242,19 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { /* It was never opened */ atomic_store(&sock->closed, true); sock->result = isc__nm_uverr2result(r); + atomic_store(&sock->listen_error, true); goto done; } + if (sock->iface->addr.type.sa.sa_family == AF_INET6) { + flags = UV_TCP_IPV6ONLY; + } - r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, 0); + r = uv_tcp_bind(&sock->uv_handle.tcp, + &sock->iface->addr.type.sa, flags); if (r != 0) { uv_close(&sock->uv_handle.handle, tcp_close_cb); sock->result = isc__nm_uverr2result(r); + atomic_store(&sock->listen_error, true); goto done; } @@ -257,10 +264,12 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { * initially returning success even if bind() fails, and this * could cause a deadlock later if we didn't check first.) */ - r = uv_tcp_getsockname(&sock->uv_handle.tcp, &sname, &snamelen); + r = uv_tcp_getsockname(&sock->uv_handle.tcp, + (struct sockaddr*) &sname, &snamelen); if (r != 0) { uv_close(&sock->uv_handle.handle, tcp_close_cb); sock->result = isc__nm_uverr2result(r); + atomic_store(&sock->listen_error, true); goto done; } diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index abd950a279..2731ce2497 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -130,8 +130,12 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { (isc_nmsocket_t **)&sock->uv_handle.udp.data); uv_udp_open(&sock->uv_handle.udp, sock->fd); + int flags = 0; + if (sock->iface->addr.type.sa.sa_family == AF_INET6) { + flags = UV_UDP_IPV6ONLY; + } uv_udp_bind(&sock->uv_handle.udp, - &sock->parent->iface->addr.type.sa, 0); + &sock->parent->iface->addr.type.sa, flags); uv_recv_buffer_size(&sock->uv_handle.handle, &(int){16 * 1024 * 1024}); uv_send_buffer_size(&sock->uv_handle.handle, From 20c077afc58386354630fa983d43eb90b7f890f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Mon, 13 Jan 2020 20:22:23 +0100 Subject: [PATCH 4/5] Disable pktinfo for ipv6 on all unices If pktinfo were supported then we could listen on :: for ipv6 and get the information about the destination address from pktinfo structure passed in recvmsg but this method is not portable and libuv doesn't support it - so we need to listen on all interfaces. We should verify that this doesn't impact performance (we already do it for ipv4) and either remove all the ipv6pktinfo detection code or think of fixing libuv. --- lib/isc/unix/net.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/isc/unix/net.c b/lib/isc/unix/net.c index c2573bd381..8613b827da 100644 --- a/lib/isc/unix/net.c +++ b/lib/isc/unix/net.c @@ -90,7 +90,9 @@ #endif /* HAVE_SYSCTLBYNAME */ static isc_once_t once_ipv6only = ISC_ONCE_INIT; +#ifdef __notyet__ static isc_once_t once_ipv6pktinfo = ISC_ONCE_INIT; +#endif #ifndef ISC_CMSG_IP_TOS #ifdef __APPLE__ @@ -284,6 +286,7 @@ initialize_ipv6only(void) { try_ipv6only) == ISC_R_SUCCESS); } +#ifdef __notyet__ static void try_ipv6pktinfo(void) { int s, on; @@ -331,6 +334,7 @@ initialize_ipv6pktinfo(void) { RUNTIME_CHECK(isc_once_do(&once_ipv6pktinfo, try_ipv6pktinfo) == ISC_R_SUCCESS); } +#endif isc_result_t isc_net_probe_ipv6only(void) { @@ -340,7 +344,18 @@ isc_net_probe_ipv6only(void) { isc_result_t isc_net_probe_ipv6pktinfo(void) { +/* + * XXXWPK if pktinfo were supported then we could listen on :: for ipv6 and get + * the information about the destination address from pktinfo structure passed + * in recvmsg but this method is not portable and libuv doesn't support it - so + * we need to listen on all interfaces. + * We should verify that this doesn't impact performance (we already do it for + * ipv4) and either remove all the ipv6pktinfo detection code from above + * or think of fixing libuv. + */ +#ifdef __notyet__ initialize_ipv6pktinfo(); +#endif return (ipv6pktinfo_result); } From 7a7b09fee6c32fc61e7528a6bd1b9bb76be3402c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 10 Jan 2020 09:11:41 +0100 Subject: [PATCH 5/5] CHANGES note --- CHANGES | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES b/CHANGES index 23e465e6af..74ff154cde 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,14 @@ +5342. [bug] Disable pktinfo for ipv6 - bind to all interfaces + explicitly as libuv doesn't pass us pktinfo structure, + [GL #1558] + +5341. [func] Simplify passing the bound TCP socket to child + threads by using isc_uv_export/import functions. + [GL !2825] + +5340. [bug] Don't deadlock when binding to a TCP socket fails. + [GL #1499] + 5339. [bug] With some libmaxminddb versions, named could erroneously match an IP address not belonging to any subnet defined in a given GeoIP2 database to one of the existing