Allow retries when checking TCP high-water stats

In the TCP high-water checks, "rndc stats" is run after ans6 reports
that it opened the requested number of TCP connections.  However, we
fail to account for the fact that ns5 might not yet have called accept()
for these connections, in which case the counts output by "rndc stats"
will be off.  To prevent intermittent "tcp" system test failures, allow
the relevant connection count checks to be retried (just once, after one
second, as that should be enough for any system to accept() a dozen TCP
connections under any circumstances).
This commit is contained in:
Michał Kępień 2019-11-07 12:25:39 +01:00 committed by Evan Hunt
parent 69f8f65323
commit 1e22e052d0
2 changed files with 38 additions and 9 deletions

View file

@ -338,6 +338,26 @@ nextpartpeek() {
nextpartread $1 2> /dev/null
}
# retry: keep running a command until it succeeds, up to $1 times, with
# one-second intervals
retry() {
__retries="${1}"
shift
while :; do
if "$@"; then
return 0
fi
__retries=$((__retries-1))
if [ "${__retries}" -gt 0 ]; then
echo_i "retrying"
sleep 1
else
return 1
fi
done
}
rndc_reload() {
echo_i "`$RNDC -c ../common/rndc.conf -s $2 -p ${CONTROLPORT} reload $3 2>&1 | sed 's/^/'$1' /'`"
# reloading single zone is synchronous, if we're reloading whole server

View file

@ -112,9 +112,12 @@ ret=0
OLD_TCP_CUR="${TCP_CUR}"
TCP_ADDED=9
open_connections "${TCP_ADDED}"
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" $((OLD_TCP_CUR + TCP_ADDED)) "current TCP clients count" || ret=1
assert_int_equal "${TCP_HIGH}" $((OLD_TCP_CUR + TCP_ADDED)) "TCP high-water value" || ret=1
check_stats_added() {
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" $((OLD_TCP_CUR + TCP_ADDED)) "current TCP clients count" || return 1
assert_int_equal "${TCP_HIGH}" $((OLD_TCP_CUR + TCP_ADDED)) "TCP high-water value" || return 1
}
retry 2 check_stats_added || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
@ -127,9 +130,12 @@ OLD_TCP_CUR="${TCP_CUR}"
OLD_TCP_HIGH="${TCP_HIGH}"
TCP_REMOVED=5
close_connections "${TCP_REMOVED}"
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" $((OLD_TCP_CUR - TCP_REMOVED)) "current TCP clients count" || ret=1
assert_int_equal "${TCP_HIGH}" "${OLD_TCP_HIGH}" "TCP high-water value" || ret=1
check_stats_removed() {
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" $((OLD_TCP_CUR - TCP_REMOVED)) "current TCP clients count" || return 1
assert_int_equal "${TCP_HIGH}" "${OLD_TCP_HIGH}" "TCP high-water value" || return 1
}
retry 2 check_stats_removed || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
@ -139,9 +145,12 @@ n=$((n + 1))
echo_i "TCP high-water: ensure tcp-clients is an upper bound ($n)"
ret=0
open_connections $((TCP_LIMIT + 1))
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" "${TCP_LIMIT}" "current TCP clients count" || ret=1
assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || ret=1
check_stats_limit() {
refresh_tcp_stats
assert_int_equal "${TCP_CUR}" "${TCP_LIMIT}" "current TCP clients count" || return 1
assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1
}
retry 2 check_stats_limit || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`