From c8f1fa0e474d00a78c55d40ee7fef1e286765f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 22 Aug 2024 17:23:09 +0200 Subject: [PATCH] Follow the number of CPU set by taskset/cpuset Administrators may wish to constrain the set of cores that BIND 9 runs on via the 'taskset', 'cpuset' or 'numactl' programs (or equivalent on other O/S), for example to achieve higher (or more stable) performance by more closely associating threads with individual NIC rx queues. If the admin has used taskset, it follows that BIND ought to automatically use the given number of CPUs rather than the system wide count. Co-Authored-By: Ray Bellis (cherry picked from commit 5a2df8caf5c7c2def7266dc10dde60ef92d4ccb7) --- bin/tests/system/Makefile.am | 1 + bin/tests/system/cpu/clean.sh | 16 +++++ bin/tests/system/cpu/ns1/named.conf.in | 20 +++++++ bin/tests/system/cpu/prereq.sh | 21 +++++++ bin/tests/system/cpu/setup.sh | 21 +++++++ bin/tests/system/cpu/tests.sh | 65 +++++++++++++++++++++ bin/tests/system/cpu/tests_sh_cpu.py | 14 +++++ configure.ac | 12 +++- lib/isc/os.c | 81 +++++++++++++++++++++++++- 9 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 bin/tests/system/cpu/clean.sh create mode 100644 bin/tests/system/cpu/ns1/named.conf.in create mode 100644 bin/tests/system/cpu/prereq.sh create mode 100644 bin/tests/system/cpu/setup.sh create mode 100755 bin/tests/system/cpu/tests.sh create mode 100644 bin/tests/system/cpu/tests_sh_cpu.py diff --git a/bin/tests/system/Makefile.am b/bin/tests/system/Makefile.am index befd51270f..446ece22c2 100644 --- a/bin/tests/system/Makefile.am +++ b/bin/tests/system/Makefile.am @@ -108,6 +108,7 @@ TESTS = \ checknames \ checkzone \ cookie \ + cpu \ database \ dialup \ digdelv \ diff --git a/bin/tests/system/cpu/clean.sh b/bin/tests/system/cpu/clean.sh new file mode 100644 index 0000000000..cff7c61995 --- /dev/null +++ b/bin/tests/system/cpu/clean.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +set -e + +rm -f ./named.run.* diff --git a/bin/tests/system/cpu/ns1/named.conf.in b/bin/tests/system/cpu/ns1/named.conf.in new file mode 100644 index 0000000000..6c934b23e4 --- /dev/null +++ b/bin/tests/system/cpu/ns1/named.conf.in @@ -0,0 +1,20 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +options { + query-source address 10.53.0.1; + port @PORT@; + pid-file "named.pid"; + listen-on { 10.53.0.1; }; + listen-on-v6 { none; }; +}; diff --git a/bin/tests/system/cpu/prereq.sh b/bin/tests/system/cpu/prereq.sh new file mode 100644 index 0000000000..7f9f6c06d5 --- /dev/null +++ b/bin/tests/system/cpu/prereq.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +. ../conf.sh + +command -v cpuset >/dev/null || command -v numactl >/dev/null || command -v taskset >/dev/null || { + echo_i "This test requires cpuset, numactl, or taskset." >&2 + exit 255 +} + +exit 0 diff --git a/bin/tests/system/cpu/setup.sh b/bin/tests/system/cpu/setup.sh new file mode 100644 index 0000000000..9676770adb --- /dev/null +++ b/bin/tests/system/cpu/setup.sh @@ -0,0 +1,21 @@ +#!/bin/sh -e + +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +# shellcheck source=conf.sh +. ../conf.sh + +set -e + +$SHELL clean.sh + +copy_setports ns1/named.conf.in ns1/named.conf diff --git a/bin/tests/system/cpu/tests.sh b/bin/tests/system/cpu/tests.sh new file mode 100755 index 0000000000..a521b4f29c --- /dev/null +++ b/bin/tests/system/cpu/tests.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +# shellcheck source=conf.sh +. ../conf.sh + +status=0 +n=0 + +CPUSET=$(command -v cpuset) +NUMACTL=$(command -v numactl) +TASKSET=$(command -v taskset) + +cpulist() ( + if [ -n "$CPUSET" ]; then + cpuset -g | head -1 | sed -e "s/.*: //" | tr -s ', ' '\n' + elif [ -n "$NUMACTL" ]; then + numactl --show | sed -ne 's/^physcpubind: //p' | tr -s ' ' '\n' + elif [ -n "$TASKSET" ]; then + # shellcheck disable=SC2046 + seq $(taskset -c -p $$ | sed -e 's/.*: //' | tr -s ' -' ' ') + else + echo 0 + fi +) + +cpulimit() ( + set -x + min_cpu="${1}" + shift + max_cpu="${1}" + shift + + if [ -n "$CPUSET" ]; then + cpuset -l "${min_cpu}-${max_cpu}" "$@" 2>&1 + elif [ -n "$NUMACTL" ]; then + numactl --physcpubind="${min_cpu}-${max_cpu}" "$@" 2>&1 + elif [ -n "$TASKSET" ]; then + taskset -c "${min_cpu}-${max_cpu}" "$@" 2>&1 + fi +) + +ret=0 +for cpu in $(cpulist); do + n=$((n + 1)) + echo_i "testing that limiting CPU sets to 0-${cpu} works ($n)" + cpulimit 0 "$cpu" "$NAMED" -g >named.run.$n 2>&1 || true + ncpus=$(sed -ne 's/.*found \([0-9]*\) CPU.*\([0-9]*\) worker thread.*/\1/p' named.run.$n) + [ "$ncpus" -eq "$((cpu + 1))" ] || ret=1 +done +test "$ret" -eq 0 || echo_i "failed" +status=$((status + ret)) + +echo_i "exit status: $status" +[ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/cpu/tests_sh_cpu.py b/bin/tests/system/cpu/tests_sh_cpu.py new file mode 100644 index 0000000000..264dc27b0e --- /dev/null +++ b/bin/tests/system/cpu/tests_sh_cpu.py @@ -0,0 +1,14 @@ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + + +def test_cpu(run_tests_sh): + run_tests_sh() diff --git a/configure.ac b/configure.ac index 1e80fba356..f62789f837 100644 --- a/configure.ac +++ b/configure.ac @@ -265,7 +265,8 @@ AS_CASE([$host], ]) ]) -AC_CHECK_HEADERS([fcntl.h regex.h sys/time.h unistd.h sys/mman.h sys/sockio.h sys/select.h sys/param.h sys/sysctl.h net/if6.h sys/socket.h net/route.h linux/netlink.h linux/rtnetlink.h], [], [], +AC_CHECK_HEADERS([sys/param.h sys/socket.h]) +AC_CHECK_HEADERS([fcntl.h regex.h sys/time.h unistd.h sys/mman.h sys/sockio.h sys/select.h sys/sysctl.h net/if6.h net/route.h linux/netlink.h linux/rtnetlink.h], [], [], [$ac_includes_default #ifdef HAVE_SYS_PARAM_H # include @@ -532,9 +533,16 @@ AM_CONDITIONAL([HAVE_LIBNGHTTP2], [test -n "$LIBNGHTTP2_LIBS"]) AC_CHECK_FUNCS([flockfile getc_unlocked]) # -# Look for sysconf to allow detection of the number of processors. +# Look for sysconf or other ways to allow detection of the number of processors. # AC_CHECK_FUNCS([sysconf]) +AC_CHECK_FUNCS(sysconf sched_getaffinity cpuset_getaffinity) +AC_CHECK_HEADERS([sys/cpuset.h], [], [], + [$ac_includes_default + #ifdef HAVE_SYS_PARAM_H + # include + #endif + ]) # # Do we want to use pthread rwlock? diff --git a/lib/isc/os.c b/lib/isc/os.c index 0ba0fab43c..fced991903 100644 --- a/lib/isc/os.c +++ b/lib/isc/os.c @@ -59,17 +59,94 @@ sysctl_ncpus(void) { } #endif /* if defined(HAVE_SYS_SYSCTL_H) && defined(HAVE_SYSCTLBYNAME) */ +#if defined(HAVE_SCHED_GETAFFINITY) + +#if defined(HAVE_SCHED_H) +#include +#endif + +/* + * Administrators may wish to constrain the set of cores that BIND runs + * on via the 'taskset' or 'numactl' programs (or equivalent on other + * O/S), for example to achieve higher (or more stable) performance by + * more closely associating threads with individual NIC rx queues. If + * the admin has used taskset, it follows that BIND ought to + * automatically use the given number of CPUs rather than the system + * wide count. + */ +static int +sched_affinity_ncpus(void) { + cpu_set_t cpus; + int result; + + result = sched_getaffinity(0, sizeof(cpus), &cpus); + if (result != -1) { +#ifdef CPU_COUNT + return (CPU_COUNT(&cpus)); +#else + int i, n = 0; + + for (i = 0; i < CPU_SETSIZE; ++i) { + if (CPU_ISSET(i, &cpus)) + ++n; + } + return (n); +#endif + } + return (0); +} +#endif + +/* + * Affinity detecting variant of sched_affinity_cpus() for FreeBSD + */ + +#if defined(HAVE_SYS_CPUSET_H) && defined(HAVE_CPUSET_GETAFFINITY) +#include +#include + +static int +cpuset_affinity_ncpus(void) { + cpuset_t cpus; + int result; + + result = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(cpus), &cpus); + if (result != -1) { + int i, n = 0; + for (i = 0; i < CPU_SETSIZE; ++i) { + if (CPU_ISSET(i, &cpus)) + ++n; + } + return (n); + } + return (0); +} +#endif + static void ncpus_initialize(void) { +#if defined(HAVE_SYS_CPUSET_H) && defined(HAVE_CPUSET_GETAFFINITY) + if (isc__os_ncpus <= 0) { + isc__os_ncpus = cpuset_affinity_ncpus(); + } +#endif +#if defined(HAVE_SCHED_GETAFFINITY) + if (isc__os_ncpus <= 0) { + isc__os_ncpus = sched_affinity_ncpus(); + } +#endif #if defined(HAVE_SYSCONF) - isc__os_ncpus = sysconf_ncpus(); + if (isc__os_ncpus <= 0) { + isc__os_ncpus = sysconf_ncpus(); + } #endif /* if defined(HAVE_SYSCONF) */ #if defined(HAVE_SYS_SYSCTL_H) && defined(HAVE_SYSCTLBYNAME) if (isc__os_ncpus <= 0) { isc__os_ncpus = sysctl_ncpus(); } #endif /* if defined(HAVE_SYS_SYSCTL_H) && defined(HAVE_SYSCTLBYNAME) */ - if (isc__os_ncpus == 0) { + if (isc__os_ncpus <= 0) { isc__os_ncpus = 1; } }