bind9/bin/tests/system/stop.pl
Michał Kępień d995bd8dec Fix waiting for lock file removal upon exit
Commit c787a539d2 fixed a certain class of
intermittent system test failures caused by named instances unable to
restart.  The root cause was bin/tests/system/stop.pl returning without
waiting for a named instance to remove its lock file.

Later on, it turned out that the above change causes other issues on
Windows due to the way named handles signals on that platform.  Commit
761ba4514f intended to address those
issues by making the server_lock_file() subroutine in
bin/tests/system/stop.pl return an empty value on Windows, in order to
prevent the script for waiting for lock file cleanup on that platform.
Note, however, that Windows detection in that subroutine is limited to
checking whether the CYGWIN environment variable is set.

While that environment variable was not set on Unix-like systems before
commit 761ba4514f, another commit
(a33237f070, merged a few weeks later)
changed that by setting the CYGWIN environment variable to an empty
value on Unix-like systems.  This made the defined($ENV{'CYGWIN'}) check
in server_lock_file() return true, inadvertently preventing
bin/tests/system/stop.pl from waiting for lock file removal before
exiting on Unix-like systems and therefore reintroducing the original
issue.

Fix by making server_lock_file() only return an empty value when the
CYGWIN environment variable is set to a non-empty value (which is what
bin/tests/system/conf.sh.win32 does).  Adjust a similar check in the
pid_file_exists() subroutine in the same way for consistency.

(cherry picked from commit a938db2170)
2022-01-26 15:29:39 +01:00

288 lines
6.2 KiB
Perl

#!/usr/bin/perl -w
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
# Framework for stopping test servers
# Based on the type of server specified, signal the server to stop, wait
# briefly for it to die, and then kill it if it is still alive.
# If a server is specified, stop it. Otherwise, stop all servers for test.
use strict;
use warnings;
use Cwd ':DEFAULT', 'abs_path';
use English '-no_match_vars';
use Getopt::Long;
# Usage:
# perl stop.pl [--use-rndc [--port port]] test [server]
#
# --use-rndc Attempt to stop the server via the "rndc stop" command.
#
# --port port Only relevant if --use-rndc is specified, this sets the
# command port over which the attempt should be made. If
# not specified, port 9953 is used.
#
# test Name of the test directory.
#
# server Name of the server directory.
my $usage = "usage: $0 [--use-rndc [--halt] [--port port]] test-directory [server-directory]";
my $use_rndc = 0;
my $halt = 0;
my $rndc_port = 9953;
my $errors = 0;
GetOptions(
'use-rndc!' => \$use_rndc,
'halt!' => \$halt,
'port=i' => \$rndc_port
) or die "$usage\n";
my ( $test, $server_arg ) = @ARGV;
if (!$test) {
die "$usage\n";
}
# Global variables
my $topdir = abs_path($ENV{'SYSTEMTESTTOP'});
my $testdir = abs_path($topdir . "/" . $test);
if (! -d $testdir) {
die "No test directory: \"$testdir\"\n";
}
if ($server_arg && ! -d "$testdir/$server_arg") {
die "No server directory: \"$testdir/$server_arg\"\n";
}
my $RNDC = $ENV{RNDC};
my @ns;
my @ans;
if ($server_arg) {
if ($server_arg =~ /^ns/) {
push(@ns, $server_arg);
} elsif ($server_arg =~ /^ans/) {
push(@ans, $server_arg);
} else {
print "$0: ns or ans directory expected";
print "I:$test:failed";
}
} else {
# Determine which servers need to be stopped for this test.
opendir DIR, $testdir or die "unable to read test directory: \"$test\" ($OS_ERROR)\n";
my @files = sort readdir DIR;
closedir DIR;
@ns = grep /^ns[0-9]*$/, @files;
@ans = grep /^ans[0-9]*$/, @files;
}
# Stop the server(s), pass 1: rndc.
if ($use_rndc) {
foreach my $name(@ns) {
stop_rndc($name, $rndc_port);
}
@ns = wait_for_servers(30, @ns);
}
# Pass 2: SIGTERM
foreach my $name (@ns) {
stop_signal($name, "TERM");
}
@ns = wait_for_servers(60, @ns);
foreach my $name(@ans) {
stop_signal($name, "TERM", 1);
}
@ans = wait_for_servers(1200, @ans);
# Pass 3: SIGABRT
foreach my $name (@ns) {
print "I:$test:$name didn't die when sent a SIGTERM\n";
stop_signal($name, "ABRT");
$errors = 1;
}
foreach my $name (@ans) {
print "I:$test:$name didn't die when sent a SIGTERM\n";
stop_signal($name, "ABRT", 1);
$errors = 1;
}
exit($errors);
# Subroutines
# Return the full path to a given server's lock file.
sub server_lock_file {
my ( $server ) = @_;
return if (defined($ENV{'CYGWIN'}) && $ENV{'CYGWIN'});
return $testdir . "/" . $server . "/named.lock" if ($server =~ /^ns/);
return if ($server =~ /^ans/);
die "Unknown server type $server\n";
}
# Return the full path to a given server's PID file.
sub server_pid_file {
my ( $server ) = @_;
return $testdir . "/" . $server . "/named.pid" if ($server =~ /^ns/);
return $testdir . "/" . $server . "/ans.pid" if ($server =~ /^ans/);
die "Unknown server type $server\n";
}
# Read a PID.
sub read_pid {
my ( $pid_file ) = @_;
return unless -f $pid_file;
# we don't really care about the race condition here
my $result = open(my $fh, "<", $pid_file);
if (!defined($result)) {
print "I:$test:$pid_file: $!\n";
unlink $pid_file;
return;
}
my $pid = <$fh>;
return unless defined($pid);
chomp($pid);
return $pid;
}
# Stop a named process with rndc.
sub stop_rndc {
my ( $server, $port ) = @_;
my $n;
if ($server =~ /^ns(\d+)/) {
$n = $1;
} else {
die "unable to parse server number from name \"$server\"\n";
}
my $ip = "10.53.0.$n";
my $how = $halt ? "halt" : "stop";
# Ugly, but should work.
system("$RNDC -c ../common/rndc.conf -s $ip -p $port $how | sed 's/^/I:$test:$server /'");
return;
}
sub server_died {
my ( $server, $signal ) = @_;
print "I:$test:$server died before a SIG$signal was sent\n";
$errors = 1;
my $pid_file = server_pid_file($server);
unlink($pid_file);
return;
}
sub send_signal {
my ( $signal, $pid, $ans ) = @_;
if (! defined $ans) {
$ans = 0;
}
my $result = 0;
if (!$ans && ($^O eq 'cygwin' || $^O eq 'msys')) {
my $killout = `/bin/kill -f -$signal $pid 2>&1`;
chomp($killout);
$result = 1 if ($killout eq '');
} else {
$result = kill $signal, $pid;
}
return $result;
}
# Stop a server by sending a signal to it.
sub stop_signal {
my ( $server, $signal, $ans ) = @_;
if (! defined $ans) {
$ans = 0;
}
my $pid_file = server_pid_file($server);
my $pid = read_pid($pid_file);
return unless defined($pid);
# Send signal to the server, and bail out if signal can't be sent
if (send_signal($signal, $pid, $ans) != 1) {
server_died($server, $signal);
return;
}
return;
}
sub pid_file_exists {
my ( $server ) = @_;
my $pid_file = server_pid_file($server);
my $pid = read_pid($pid_file);
return unless defined($pid);
# If we're here, the PID file hasn't been cleaned up yet
if (send_signal(0, $pid) == 0) {
# XXX: on windows this is likely to result in a
# false positive, so don't bother reporting the error.
if (!defined($ENV{'CYGWIN'}) || !$ENV{'CYGWIN'}) {
print "I:$test:$server crashed on shutdown\n";
$errors = 1;
}
return;
}
return $server;
}
sub lock_file_exists {
my ( $server ) = @_;
my $lock_file = server_lock_file($server);
return unless defined($lock_file) && -f $lock_file;
return $server;
}
sub wait_for_servers {
my ( $timeout, @servers ) = @_;
while ($timeout > 0 && @servers > 0) {
sleep 1 if (@servers > 0);
@servers =
grep { defined($_) }
map { pid_file_exists($_) || lock_file_exists($_) } @servers;
$timeout--;
}
return @servers;
}