bind9/bin/tests/system/start.pl

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

445 lines
10 KiB
Perl
Raw Normal View History

#!/usr/bin/perl -w
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
2012-06-28 21:39:47 -04:00
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
# Framework for starting test servers.
# Based on the type of server specified, check for port availability, remove
# temporary files, start the server, and verify that the server is running.
# If a server is specified, start it. Otherwise, start all servers for test.
use strict;
2018-11-27 18:47:08 -05:00
use warnings;
use Cwd ':DEFAULT', 'abs_path';
use English '-no_match_vars';
use Getopt::Long;
2018-11-27 18:47:08 -05:00
use Time::HiRes 'sleep'; # allows sleeping fractional seconds
# Usage:
# perl start.pl [--noclean] [--restart] [--port port] [--taskset cpus] test [server [options]]
#
# --noclean Do not cleanup files in server directory.
#
# --restart Indicate that the server is being restarted, so get the
# server to append output to an existing log file instead of
# starting a new one.
#
# --port port Specify the default port being used by the server to answer
# queries (default 5300). This script will interrogate the
# server on this port to see if it is running. (Note: for
# "named" nameservers, this can be overridden by the presence
# of the file "named.port" in the server directory containing
# the number of the query port.)
#
# --taskset cpus Use taskset to signal which cpus can be used. For example
# cpus=fff0 means all cpus aexcept for 0, 1, 2, and 3 are
# eligible.
#
# test Name of the test directory.
#
# server Name of the server directory. This will be of the form
# "nsN" or "ansN", where "N" is an integer between 1 and 8.
# If not given, the script will start all the servers in the
# test directory.
#
# options Alternate options for the server.
#
# NOTE: options must be specified with '-- "<option list>"',
# for instance: start.pl . ns1 -- "-c n.conf -d 43"
#
# ALSO NOTE: this variable will be filled with the contents
# of the first non-commented/non-blank line of args in a file
# called "named.args" in an ns*/ subdirectory. Only the FIRST
2017-12-21 13:18:17 -05:00
# non-commented/non-blank line is used (everything else in
# the file is ignored). If "options" is already set, then
# "named.args" is ignored.
my $usage = "usage: $0 [--noclean] [--restart] [--port <port>] [--taskset <cpus>] test-directory [server-directory [server-options]]";
2018-11-27 18:47:08 -05:00
my $clean = 1;
my $restart = 0;
my $queryport = 5300;
my $taskset = "";
2018-11-27 18:47:08 -05:00
GetOptions(
'clean!' => \$clean,
'restart!' => \$restart,
'port=i' => \$queryport,
'taskset=s' => \$taskset,
2018-11-27 18:47:08 -05:00
) or die "$usage\n";
2018-11-27 18:47:08 -05:00
my( $test, $server_arg, $options_arg ) = @ARGV;
if (!$test) {
die "$usage\n";
}
2018-11-27 18:47:08 -05:00
# Global variables
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
my $builddir = $ENV{'builddir'};
my $srcdir = $ENV{'srcdir'};
my $testdir = "$builddir/$test";
2018-11-27 18:47:08 -05:00
if (! -d $testdir) {
die "No test directory: \"$testdir\"\n";
}
2018-11-27 18:47:08 -05:00
if ($server_arg && ! -d "$testdir/$server_arg") {
die "No server directory: \"$testdir/$server_arg\"\n";
}
my $NAMED = $ENV{'NAMED'};
my $DIG = $ENV{'DIG'};
my $PERL = $ENV{'PERL'};
my $PYTHON = $ENV{'PYTHON'};
# Start the server(s)
2018-11-27 18:47:08 -05:00
my @ns;
my @ans;
if ($server_arg) {
if ($server_arg =~ /^ns/) {
push(@ns, $server_arg);
} elsif ($server_arg =~ /^ans/) {
push(@ans, $server_arg);
} else {
print "$0: ns or ans directory expected";
print "I:$test:failed";
}
} else {
# Determine which servers need to be started for this test.
2018-11-27 18:47:08 -05:00
opendir DIR, $testdir or die "unable to read test directory: \"$test\" ($OS_ERROR)\n";
my @files = sort readdir DIR;
closedir DIR;
2018-11-27 18:47:08 -05:00
@ns = grep /^ns[0-9]*$/, @files;
@ans = grep /^ans[0-9]*$/, @files;
}
2018-11-27 18:47:08 -05:00
# Start the servers we found.
foreach my $name(@ns) {
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
my $instances_so_far = count_running_lines($name);
2018-11-27 18:47:08 -05:00
&check_ns_port($name);
&start_ns_server($name, $options_arg);
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
&verify_ns_server($name, $instances_so_far);
2018-11-27 18:47:08 -05:00
}
foreach my $name(@ans) {
&start_ans_server($name);
}
# Subroutines
2018-11-27 18:47:08 -05:00
sub read_ns_port {
my ( $server ) = @_;
my $port = $queryport;
2018-11-27 18:47:08 -05:00
my $options = "";
2018-11-27 18:47:08 -05:00
if ($server) {
my $file = $testdir . "/" . $server . "/named.port";
2018-11-27 18:47:08 -05:00
if (-e $file) {
open(my $fh, "<", $file) or die "unable to read ports file \"$file\" ($OS_ERROR)";
2018-11-27 18:47:08 -05:00
my $line = <$fh>;
if ($line) {
chomp $line;
$port = $line;
}
}
2018-11-27 18:47:08 -05:00
}
return ($port);
}
sub check_ns_port {
my ( $server ) = @_;
2018-11-27 18:47:08 -05:00
my $options = "";
my $port = read_ns_port($server);
if ($server =~ /(\d+)$/) {
$options = "-i $1";
}
my $tries = 0;
2018-11-27 18:47:08 -05:00
while (1) {
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
my $return = system("$PERL $srcdir/testsock.pl -p $port $options");
2018-11-27 18:47:08 -05:00
if ($return == 0) {
last;
}
$tries++;
if ($tries > 4) {
print "$0: could not bind to server addresses, still running?\n";
2018-11-27 18:47:08 -05:00
print "I:$test:server sockets not available\n";
print "I:$test:failed\n";
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
system("$PERL $srcdir/stop.pl $test"); # Is this the correct behavior?
2018-11-27 18:47:08 -05:00
exit 1;
}
2018-11-27 18:47:08 -05:00
print "I:$test:Couldn't bind to socket (yet)\n";
sleep 2;
}
}
sub start_server {
my ( $server, $command, $pid_file ) = @_;
2018-11-27 18:47:08 -05:00
chdir "$testdir/$server" or die "unable to chdir \"$testdir/$server\" ($OS_ERROR)\n";
# start the server
my $child = `$command`;
chomp($child);
# wait up to 90 seconds for the server to start and to write the
# pid file otherwise kill this server and any others that have
# already been started
my $tries = 0;
while (!-s $pid_file) {
if (++$tries > 900) {
2018-11-27 18:47:08 -05:00
print "I:$test:Couldn't start server $command (pid=$child)\n";
print "I:$test:failed\n";
kill "ABRT", $child if ("$child" ne "");
chdir "$testdir";
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
system "$PERL $srcdir/stop.pl $test";
exit 1;
}
2018-11-27 18:47:08 -05:00
sleep 0.1;
}
2011-10-10 15:06:05 -04:00
2018-11-27 18:47:08 -05:00
# go back to the top level directory
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
chdir $builddir;
}
2018-11-27 18:47:08 -05:00
sub construct_ns_command {
my ( $server, $options ) = @_;
2018-11-27 18:47:08 -05:00
my $command;
if ($taskset) {
$command = "taskset $taskset $NAMED ";
} elsif ($ENV{'USE_RR'}) {
$ENV{'_RR_TRACE_DIR'} = ".";
$command = "$ENV{'TOP_BUILDDIR'}/libtool --mode=execute rr record --chaos $NAMED ";
2018-11-27 18:47:08 -05:00
} else {
$command = "$NAMED ";
2018-11-27 18:47:08 -05:00
}
2018-11-27 18:47:08 -05:00
my $args_file = $testdir . "/" . $server . "/" . "named.args";
if ($options) {
$command .= $options;
} elsif (-e $args_file) {
open(my $fh, "<", $args_file) or die "unable to read args_file \"$args_file\" ($OS_ERROR)\n";
while(my $line=<$fh>) {
next if ($line =~ /^\s*$/); #discard blank lines
next if ($line =~ /^\s*#/); #discard comment lines
chomp $line;
2018-11-27 18:47:08 -05:00
$line =~ s/#.*$//;
$command .= $line;
last;
}
2018-11-27 18:47:08 -05:00
} else {
$command .= "-D $test-$server ";
$command .= "-m record ";
2018-11-27 18:47:08 -05:00
foreach my $t_option(
"dropedns", "ednsformerr", "ednsnotimp", "ednsrefused",
"cookiealwaysvalid", "noaa", "noedns", "nosoa",
"maxudp512", "maxudp1460",
2018-11-27 18:47:08 -05:00
) {
if (-e "$testdir/$server/named.$t_option") {
$command .= "-T $t_option "
}
}
$command .= "-c named.conf -d 99 -g -T maxcachesize=2097152";
2018-11-27 18:47:08 -05:00
}
if (-e "$testdir/$server/named.notcp") {
$command .= " -T notcp"
}
if ($restart) {
$command .= " >>named.run 2>&1 &";
} else {
$command .= " >named.run 2>&1 &";
}
# get the shell to report the pid of the server ($!)
$command .= " echo \$!";
return $command;
}
sub start_ns_server {
my ( $server, $options ) = @_;
2018-11-27 18:47:08 -05:00
my $cleanup_files;
my $command;
my $pid_file;
$cleanup_files = "{./*.jnl,./*.bk,./*.st,./named.run}";
$command = construct_ns_command($server, $options);
$pid_file = "named.pid";
if ($clean) {
unlink glob $cleanup_files;
}
start_server($server, $command, $pid_file);
}
sub construct_ans_command {
my ( $server, $options ) = @_;
2018-11-27 18:47:08 -05:00
my $command;
my $n;
if ($server =~ /^ans(\d+)/) {
$n = $1;
} else {
die "unable to parse server number from name \"$server\"\n";
}
if (-e "$testdir/$server/ans.py") {
$ENV{'PYTHONPATH'} = $testdir . ":" . $builddir;
2018-11-27 18:47:08 -05:00
$command = "$PYTHON -u ans.py 10.53.0.$n $queryport";
} elsif (-e "$testdir/$server/ans.pl") {
$command = "$PERL ans.pl";
} else {
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
$command = "$PERL $srcdir/ans.pl 10.53.0.$n";
2018-11-27 18:47:08 -05:00
}
if ($options) {
$command .= $options;
}
2018-11-27 18:47:08 -05:00
if ($restart) {
$command .= " >>ans.run 2>&1 &";
} else {
$command .= " >ans.run 2>&1 &";
2018-11-27 18:47:08 -05:00
}
# get the shell to report the pid of the server ($!)
$command .= " echo \$!";
return $command;
}
sub start_ans_server {
my ( $server, $options ) = @_;
2018-11-27 18:47:08 -05:00
my $cleanup_files;
my $command;
my $pid_file;
$cleanup_files = "{./ans.run}";
$command = construct_ans_command($server, $options);
$pid_file = "ans.pid";
if ($clean) {
unlink glob $cleanup_files;
}
start_server($server, $command, $pid_file);
}
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
sub count_running_lines {
my ( $server ) = @_;
my $runfile = "$testdir/$server/named.run";
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
# the shell *ought* to have created the file immediately, but this
# logic allows the creation to be delayed without issues
if (open(my $fh, "<", $runfile)) {
# the whitespace blob should be an ISO timestamp
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
# but we don't care about them really, only that they are there
return scalar(grep /^\S+[ T]\S+ running\R/, <$fh>);
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
} else {
return 0;
}
}
sub verify_ns_server {
my ( $server, $instances_so_far ) = @_;
my $tries = 0;
Fix startup detection after restart in start.pl The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
2022-10-11 05:54:57 -04:00
while (count_running_lines($server) < $instances_so_far + 1) {
$tries++;
if ($tries >= 30) {
2018-11-27 18:47:08 -05:00
print "I:$test:server $server seems to have not started\n";
print "I:$test:failed\n";
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
system("$PERL $srcdir/stop.pl $test");
exit 1;
}
sleep 2;
}
$tries = 0;
2018-11-27 18:47:08 -05:00
my $port = read_ns_port($server);
my $tcp = "+tcp";
my $n;
if ($server =~ /^ns(\d+)/) {
$n = $1;
} else {
die "unable to parse server number from name \"$server\"\n";
}
if (-e "$testdir/$server/named.notcp") {
$tcp = "";
}
my $ip = "10.53.0.$n";
if (-e "$testdir/$server/named.ipv6-only") {
$ip = "fd92:7065:b8e:ffff::$n";
}
while (1) {
my $return = system("$DIG $tcp +noadd +nosea +nostat +noquest +nocomm +nocmd +noedns -p $port version.bind. chaos txt \@$ip > /dev/null");
2018-11-27 18:47:08 -05:00
last if ($return == 0);
2018-11-27 18:47:08 -05:00
$tries++;
if ($tries >= 30) {
print "I:$test:no response from $server\n";
print "I:$test:failed\n";
Complete rewrite the BIND 9 build system The rewrite of BIND 9 build system is a large work and cannot be reasonable split into separate merge requests. Addition of the automake has a positive effect on the readability and maintainability of the build system as it is more declarative, it allows conditional and we are able to drop all of the custom make code that BIND 9 developed over the years to overcome the deficiencies of autoconf + custom Makefile.in files. This squashed commit contains following changes: - conversion (or rather fresh rewrite) of all Makefile.in files to Makefile.am by using automake - the libtool is now properly integrated with automake (the way we used it was rather hackish as the only official way how to use libtool is via automake - the dynamic module loading was rewritten from a custom patchwork to libtool's libltdl (which includes the patchwork to support module loading on different systems internally) - conversion of the unit test executor from kyua to automake parallel driver - conversion of the system test executor from custom make/shell to automake parallel driver - The GSSAPI has been refactored, the custom SPNEGO on the basis that all major KRB5/GSSAPI (mit-krb5, heimdal and Windows) implementations support SPNEGO mechanism. - The various defunct tests from bin/tests have been removed: bin/tests/optional and bin/tests/pkcs11 - The text files generated from the MD files have been removed, the MarkDown has been designed to be readable by both humans and computers - The xsl header is now generated by a simple sed command instead of perl helper - The <irs/platform.h> header has been removed - cleanups of configure.ac script to make it more simpler, addition of multiple macros (there's still work to be done though) - the tarball can now be prepared with `make dist` - the system tests are partially able to run in oot build Here's a list of unfinished work that needs to be completed in subsequent merge requests: - `make distcheck` doesn't yet work (because of system tests oot run is not yet finished) - documentation is not yet built, there's a different merge request with docbook to sphinx-build rst conversion that needs to be rebased and adapted on top of the automake - msvc build is non functional yet and we need to decide whether we will just cross-compile bind9 using mingw-w64 or fix the msvc build - contributed dlz modules are not included neither in the autoconf nor automake
2018-08-07 10:46:53 -04:00
system("$PERL $srcdir/stop.pl $test");
2018-11-27 18:47:08 -05:00
exit 1;
}
2018-11-27 18:47:08 -05:00
sleep 2;
}
}