bind9/lib/dns/transport.c
Ondřej Surý 6ffda5920e
Add the reader-writer synchronization with modified C-RW-WP
This changes the internal isc_rwlock implementation to:

  Irina Calciu, Dave Dice, Yossi Lev, Victor Luchangco, Virendra
  J. Marathe, and Nir Shavit.  2013.  NUMA-aware reader-writer locks.
  SIGPLAN Not. 48, 8 (August 2013), 157–166.
  DOI:https://doi.org/10.1145/2517327.24425

(The full article available from:
  http://mcg.cs.tau.ac.il/papers/ppopp2013-rwlocks.pdf)

The implementation is based on the The Writer-Preference Lock (C-RW-WP)
variant (see the 3.4 section of the paper for the rationale).

The implemented algorithm has been modified for simplicity and for usage
patterns in rbtdb.c.

The changes compared to the original algorithm:

  * We haven't implemented the cohort locks because that would require a
    knowledge of NUMA nodes, instead a simple atomic_bool is used as
    synchronization point for writer lock.

  * The per-thread reader counters are not being used - this would
    require the internal thread id (isc_tid_v) to be always initialized,
    even in the utilities; the change has a slight performance penalty,
    so we might revisit this change in the future.  However, this change
    also saves a lot of memory, because cache-line aligned counters were
    used, so on 32-core machine, the rwlock would be 4096+ bytes big.

  * The readers use a writer_barrier that will raise after a while when
    readers lock can't be acquired to prevent readers starvation.

  * Separate ingress and egress readers counters queues to reduce both
    inter and intra-thread contention.
2023-02-15 09:30:04 +01:00

710 lines
18 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <inttypes.h>
#include <isc/list.h>
#include <isc/mem.h>
#include <isc/netaddr.h>
#include <isc/refcount.h>
#include <isc/result.h>
#include <isc/rwlock.h>
#include <isc/sockaddr.h>
#include <isc/util.h>
#include <dns/name.h>
#include <dns/rbt.h>
#include <dns/transport.h>
#define TRANSPORT_MAGIC ISC_MAGIC('T', 'r', 'n', 's')
#define VALID_TRANSPORT(ptr) ISC_MAGIC_VALID(ptr, TRANSPORT_MAGIC)
#define TRANSPORT_LIST_MAGIC ISC_MAGIC('T', 'r', 'L', 's')
#define VALID_TRANSPORT_LIST(ptr) ISC_MAGIC_VALID(ptr, TRANSPORT_LIST_MAGIC)
struct dns_transport_list {
unsigned int magic;
isc_refcount_t references;
isc_mem_t *mctx;
isc_rwlock_t lock;
dns_rbt_t *transports[DNS_TRANSPORT_COUNT];
};
typedef enum ternary { ter_none = 0, ter_true = 1, ter_false = 2 } ternary_t;
struct dns_transport {
unsigned int magic;
isc_refcount_t references;
isc_mem_t *mctx;
dns_transport_type_t type;
struct {
char *tlsname;
char *certfile;
char *keyfile;
char *cafile;
char *remote_hostname;
char *ciphers;
uint32_t protocol_versions;
ternary_t prefer_server_ciphers;
bool always_verify_remote;
} tls;
struct {
char *endpoint;
dns_http_mode_t mode;
} doh;
};
static void
free_dns_transport(void *node, void *arg) {
dns_transport_t *transport = node;
REQUIRE(node != NULL);
UNUSED(arg);
dns_transport_detach(&transport);
}
static isc_result_t
list_add(dns_transport_list_t *list, const dns_name_t *name,
const dns_transport_type_t type, dns_transport_t *transport) {
isc_result_t result;
dns_rbt_t *rbt = NULL;
RWLOCK(&list->lock, isc_rwlocktype_write);
rbt = list->transports[type];
INSIST(rbt != NULL);
result = dns_rbt_addname(rbt, name, transport);
RWUNLOCK(&list->lock, isc_rwlocktype_write);
return (result);
}
dns_transport_type_t
dns_transport_get_type(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->type);
}
char *
dns_transport_get_certfile(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.certfile);
}
char *
dns_transport_get_keyfile(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.keyfile);
}
char *
dns_transport_get_cafile(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.cafile);
}
char *
dns_transport_get_remote_hostname(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.remote_hostname);
}
char *
dns_transport_get_endpoint(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->doh.endpoint);
}
dns_http_mode_t
dns_transport_get_mode(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->doh.mode);
}
dns_transport_t *
dns_transport_new(const dns_name_t *name, dns_transport_type_t type,
dns_transport_list_t *list) {
dns_transport_t *transport = isc_mem_get(list->mctx,
sizeof(*transport));
*transport = (dns_transport_t){ .type = type };
isc_refcount_init(&transport->references, 1);
isc_mem_attach(list->mctx, &transport->mctx);
transport->magic = TRANSPORT_MAGIC;
list_add(list, name, type, transport);
return (transport);
}
void
dns_transport_set_certfile(dns_transport_t *transport, const char *certfile) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.certfile != NULL) {
isc_mem_free(transport->mctx, transport->tls.certfile);
}
if (certfile != NULL) {
transport->tls.certfile = isc_mem_strdup(transport->mctx,
certfile);
}
}
void
dns_transport_set_keyfile(dns_transport_t *transport, const char *keyfile) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.keyfile != NULL) {
isc_mem_free(transport->mctx, transport->tls.keyfile);
}
if (keyfile != NULL) {
transport->tls.keyfile = isc_mem_strdup(transport->mctx,
keyfile);
}
}
void
dns_transport_set_cafile(dns_transport_t *transport, const char *cafile) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.cafile != NULL) {
isc_mem_free(transport->mctx, transport->tls.cafile);
}
if (cafile != NULL) {
transport->tls.cafile = isc_mem_strdup(transport->mctx, cafile);
}
}
void
dns_transport_set_remote_hostname(dns_transport_t *transport,
const char *hostname) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.remote_hostname != NULL) {
isc_mem_free(transport->mctx, transport->tls.remote_hostname);
}
if (hostname != NULL) {
transport->tls.remote_hostname = isc_mem_strdup(transport->mctx,
hostname);
}
}
void
dns_transport_set_endpoint(dns_transport_t *transport, const char *endpoint) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_HTTP);
if (transport->doh.endpoint != NULL) {
isc_mem_free(transport->mctx, transport->doh.endpoint);
}
if (endpoint != NULL) {
transport->doh.endpoint = isc_mem_strdup(transport->mctx,
endpoint);
}
}
void
dns_transport_set_mode(dns_transport_t *transport, dns_http_mode_t mode) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_HTTP);
transport->doh.mode = mode;
}
void
dns_transport_set_tls_versions(dns_transport_t *transport,
const uint32_t tls_versions) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_HTTP ||
transport->type == DNS_TRANSPORT_TLS);
transport->tls.protocol_versions = tls_versions;
}
uint32_t
dns_transport_get_tls_versions(const dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.protocol_versions);
}
void
dns_transport_set_ciphers(dns_transport_t *transport, const char *ciphers) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.ciphers != NULL) {
isc_mem_free(transport->mctx, transport->tls.ciphers);
}
if (ciphers != NULL) {
transport->tls.ciphers = isc_mem_strdup(transport->mctx,
ciphers);
}
}
void
dns_transport_set_tlsname(dns_transport_t *transport, const char *tlsname) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
if (transport->tls.tlsname != NULL) {
isc_mem_free(transport->mctx, transport->tls.tlsname);
}
if (tlsname != NULL) {
transport->tls.tlsname = isc_mem_strdup(transport->mctx,
tlsname);
}
}
char *
dns_transport_get_ciphers(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.ciphers);
}
char *
dns_transport_get_tlsname(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
return (transport->tls.tlsname);
}
void
dns_transport_set_prefer_server_ciphers(dns_transport_t *transport,
const bool prefer) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
transport->tls.prefer_server_ciphers = prefer ? ter_true : ter_false;
}
bool
dns_transport_get_prefer_server_ciphers(const dns_transport_t *transport,
bool *preferp) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(preferp != NULL);
if (transport->tls.prefer_server_ciphers == ter_none) {
return (false);
} else if (transport->tls.prefer_server_ciphers == ter_true) {
*preferp = true;
return (true);
} else if (transport->tls.prefer_server_ciphers == ter_false) {
*preferp = false;
return (true);
}
UNREACHABLE();
return false;
}
void
dns_transport_set_always_verify_remote(dns_transport_t *transport,
const bool always_verify_remote) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
transport->tls.always_verify_remote = always_verify_remote;
}
bool
dns_transport_get_always_verify_remote(dns_transport_t *transport) {
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS ||
transport->type == DNS_TRANSPORT_HTTP);
return transport->tls.always_verify_remote;
}
isc_result_t
dns_transport_get_tlsctx(dns_transport_t *transport, const isc_sockaddr_t *peer,
isc_tlsctx_cache_t *tlsctx_cache, isc_mem_t *mctx,
isc_tlsctx_t **pctx,
isc_tlsctx_client_session_cache_t **psess_cache) {
isc_result_t result = ISC_R_FAILURE;
isc_tlsctx_t *tlsctx = NULL, *found = NULL;
isc_tls_cert_store_t *store = NULL, *found_store = NULL;
isc_tlsctx_client_session_cache_t *sess_cache = NULL;
isc_tlsctx_client_session_cache_t *found_sess_cache = NULL;
uint32_t tls_versions;
const char *ciphers = NULL;
bool prefer_server_ciphers;
uint16_t family;
const char *tlsname = NULL;
REQUIRE(VALID_TRANSPORT(transport));
REQUIRE(transport->type == DNS_TRANSPORT_TLS);
REQUIRE(peer != NULL);
REQUIRE(tlsctx_cache != NULL);
REQUIRE(mctx != NULL);
REQUIRE(pctx != NULL && *pctx == NULL);
REQUIRE(psess_cache != NULL && *psess_cache == NULL);
family = (isc_sockaddr_pf(peer) == PF_INET6) ? AF_INET6 : AF_INET;
tlsname = dns_transport_get_tlsname(transport);
INSIST(tlsname != NULL && *tlsname != '\0');
/*
* Let's try to re-use the already created context. This way
* we have a chance to resume the TLS session, bypassing the
* full TLS handshake procedure, making establishing
* subsequent TLS connections faster.
*/
result = isc_tlsctx_cache_find(tlsctx_cache, tlsname,
isc_tlsctx_cache_tls, family, &found,
&found_store, &found_sess_cache);
if (result != ISC_R_SUCCESS) {
const char *hostname =
dns_transport_get_remote_hostname(transport);
const char *ca_file = dns_transport_get_cafile(transport);
const char *cert_file = dns_transport_get_certfile(transport);
const char *key_file = dns_transport_get_keyfile(transport);
const bool always_verify_remote =
dns_transport_get_always_verify_remote(transport);
char peer_addr_str[INET6_ADDRSTRLEN] = { 0 };
isc_netaddr_t peer_netaddr = { 0 };
bool hostname_ignore_subject;
/*
* So, no context exists. Let's create one using the
* parameters from the configuration file and try to
* store it for further reuse.
*/
result = isc_tlsctx_createclient(&tlsctx);
if (result != ISC_R_SUCCESS) {
goto failure;
}
tls_versions = dns_transport_get_tls_versions(transport);
if (tls_versions != 0) {
isc_tlsctx_set_protocols(tlsctx, tls_versions);
}
ciphers = dns_transport_get_ciphers(transport);
if (ciphers != NULL) {
isc_tlsctx_set_cipherlist(tlsctx, ciphers);
}
if (dns_transport_get_prefer_server_ciphers(
transport, &prefer_server_ciphers))
{
isc_tlsctx_prefer_server_ciphers(tlsctx,
prefer_server_ciphers);
}
if (always_verify_remote || hostname != NULL || ca_file != NULL)
{
/*
* The situation when 'found_store != NULL' while
* 'found == NULL' may occur as there is a one-to-many
* relation between cert stores and per-transport TLS
* contexts. That is, there could be one store
* shared between multiple contexts.
*/
if (found_store == NULL) {
/*
* 'ca_file' can equal 'NULL' here, in
* which case the store with system-wide
* CA certificates will be created.
*/
result = isc_tls_cert_store_create(ca_file,
&store);
if (result != ISC_R_SUCCESS) {
goto failure;
}
} else {
store = found_store;
}
INSIST(store != NULL);
if (hostname == NULL) {
/*
* If hostname is not specified, then use the
* peer IP address for validation.
*/
isc_netaddr_fromsockaddr(&peer_netaddr, peer);
isc_netaddr_format(&peer_netaddr, peer_addr_str,
sizeof(peer_addr_str));
hostname = peer_addr_str;
}
/*
* According to RFC 8310, Subject field MUST NOT
* be inspected when verifying hostname for DoT.
* Only SubjectAltName must be checked.
*/
hostname_ignore_subject = true;
result = isc_tlsctx_enable_peer_verification(
tlsctx, false, store, hostname,
hostname_ignore_subject);
if (result != ISC_R_SUCCESS) {
goto failure;
}
/*
* Let's load client certificate and enable
* Mutual TLS. We do that only in the case when
* Strict TLS is enabled, because Mutual TLS is
* an extension of it.
*/
if (cert_file != NULL) {
INSIST(key_file != NULL);
result = isc_tlsctx_load_certificate(
tlsctx, key_file, cert_file);
if (result != ISC_R_SUCCESS) {
goto failure;
}
}
}
isc_tlsctx_enable_dot_client_alpn(tlsctx);
isc_tlsctx_client_session_cache_create(
mctx, tlsctx,
ISC_TLSCTX_CLIENT_SESSION_CACHE_DEFAULT_SIZE,
&sess_cache);
found_store = NULL;
result = isc_tlsctx_cache_add(tlsctx_cache, tlsname,
isc_tlsctx_cache_tls, family,
tlsctx, store, sess_cache, &found,
&found_store, &found_sess_cache);
if (result == ISC_R_EXISTS) {
/*
* It seems the entry has just been created from
* within another thread while we were initialising
* ours. Although this is unlikely, it could happen
* after startup/re-initialisation. In such a case,
* discard the new context and associated data and use
* the already established one from now on.
*
* Such situation will not occur after the
* initial 'warm-up', so it is not critical
* performance-wise.
*/
INSIST(found != NULL);
isc_tlsctx_free(&tlsctx);
isc_tls_cert_store_free(&store);
isc_tlsctx_client_session_cache_detach(&sess_cache);
/* Let's return the data from the cache. */
*psess_cache = found_sess_cache;
*pctx = found;
} else {
/*
* Adding the fresh values into the cache has been
* successful, let's return them
*/
INSIST(result == ISC_R_SUCCESS);
*psess_cache = sess_cache;
*pctx = tlsctx;
}
} else {
/*
* The cache lookup has been successful, let's return the
* results.
*/
INSIST(result == ISC_R_SUCCESS);
*psess_cache = found_sess_cache;
*pctx = found;
}
return (ISC_R_SUCCESS);
failure:
if (tlsctx != NULL) {
isc_tlsctx_free(&tlsctx);
}
/*
* The 'found_store' is being managed by the TLS context
* cache. Thus, we should keep it as it is, as it will get
* destroyed alongside the cache. As there is one store per
* multiple TLS contexts, we need to handle store deletion in a
* special way.
*/
if (store != NULL && store != found_store) {
isc_tls_cert_store_free(&store);
}
return (result);
}
static void
transport_destroy(dns_transport_t *transport) {
isc_refcount_destroy(&transport->references);
transport->magic = 0;
if (transport->doh.endpoint != NULL) {
isc_mem_free(transport->mctx, transport->doh.endpoint);
}
if (transport->tls.remote_hostname != NULL) {
isc_mem_free(transport->mctx, transport->tls.remote_hostname);
}
if (transport->tls.cafile != NULL) {
isc_mem_free(transport->mctx, transport->tls.cafile);
}
if (transport->tls.keyfile != NULL) {
isc_mem_free(transport->mctx, transport->tls.keyfile);
}
if (transport->tls.certfile != NULL) {
isc_mem_free(transport->mctx, transport->tls.certfile);
}
if (transport->tls.ciphers != NULL) {
isc_mem_free(transport->mctx, transport->tls.ciphers);
}
if (transport->tls.tlsname != NULL) {
isc_mem_free(transport->mctx, transport->tls.tlsname);
}
isc_mem_putanddetach(&transport->mctx, transport, sizeof(*transport));
}
void
dns_transport_attach(dns_transport_t *source, dns_transport_t **targetp) {
REQUIRE(source != NULL);
REQUIRE(targetp != NULL && *targetp == NULL);
isc_refcount_increment(&source->references);
*targetp = source;
}
void
dns_transport_detach(dns_transport_t **transportp) {
dns_transport_t *transport = NULL;
REQUIRE(transportp != NULL);
REQUIRE(VALID_TRANSPORT(*transportp));
transport = *transportp;
*transportp = NULL;
if (isc_refcount_decrement(&transport->references) == 1) {
transport_destroy(transport);
}
}
dns_transport_t *
dns_transport_find(const dns_transport_type_t type, const dns_name_t *name,
dns_transport_list_t *list) {
isc_result_t result;
dns_transport_t *transport = NULL;
dns_rbt_t *rbt = NULL;
REQUIRE(VALID_TRANSPORT_LIST(list));
REQUIRE(list->transports[type] != NULL);
rbt = list->transports[type];
RWLOCK(&list->lock, isc_rwlocktype_read);
result = dns_rbt_findname(rbt, name, 0, NULL, (void *)&transport);
if (result == ISC_R_SUCCESS) {
isc_refcount_increment(&transport->references);
}
RWUNLOCK(&list->lock, isc_rwlocktype_read);
return (transport);
}
dns_transport_list_t *
dns_transport_list_new(isc_mem_t *mctx) {
dns_transport_list_t *list = isc_mem_get(mctx, sizeof(*list));
*list = (dns_transport_list_t){ 0 };
isc_rwlock_init(&list->lock);
isc_mem_attach(mctx, &list->mctx);
isc_refcount_init(&list->references, 1);
list->magic = TRANSPORT_LIST_MAGIC;
for (size_t type = 0; type < DNS_TRANSPORT_COUNT; type++) {
isc_result_t result;
result = dns_rbt_create(list->mctx, free_dns_transport, NULL,
&list->transports[type]);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
}
return (list);
}
void
dns_transport_list_attach(dns_transport_list_t *source,
dns_transport_list_t **targetp) {
REQUIRE(VALID_TRANSPORT_LIST(source));
REQUIRE(targetp != NULL && *targetp == NULL);
isc_refcount_increment(&source->references);
*targetp = source;
}
static void
transport_list_destroy(dns_transport_list_t *list) {
isc_refcount_destroy(&list->references);
list->magic = 0;
for (size_t type = 0; type < DNS_TRANSPORT_COUNT; type++) {
if (list->transports[type] != NULL) {
dns_rbt_destroy(&list->transports[type]);
}
}
isc_rwlock_destroy(&list->lock);
isc_mem_putanddetach(&list->mctx, list, sizeof(*list));
}
void
dns_transport_list_detach(dns_transport_list_t **listp) {
dns_transport_list_t *list = NULL;
REQUIRE(listp != NULL);
REQUIRE(VALID_TRANSPORT_LIST(*listp));
list = *listp;
*listp = NULL;
if (isc_refcount_decrement(&list->references) == 1) {
transport_list_destroy(list);
}
}