fix: test: Harden EDE 24 system tests

Harden `ede24` system test in order to avoid random failures, likely caused by timing issues. Also remove expiration-related dead-code (which should have been done in the original ede24 changes) as well as printing the query ID, as this should be useful to debug further flaky system test issues. (In particular, this one, if the changes made here are not enough).

Closes #5625

Merge branch '5625-fix-ede24-test' into 'main'

See merge request isc-projects/bind9!11217
This commit is contained in:
Colin Vidal 2025-11-06 16:13:29 +01:00
commit 4b2dcb3128
8 changed files with 107 additions and 94 deletions

View file

@ -0,0 +1,39 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
import isctest
def check_soa_noerror():
msg = isctest.query.create("foo.fr", "SOA")
res = isctest.query.udp(msg, "10.53.0.2")
isctest.check.noerror(res)
def check_soa_servfail_ede24(edemsg):
msg = isctest.query.create("foo.fr", "SOA")
res = isctest.query.udp(msg, "10.53.0.2")
isctest.check.servfail(res)
# Few CI machines uses old version of dnspython which doesn't supports
# EDNS, so we effectively bypass the check for those one. (It's fine, a
# bunch of other CI machines _does_ have recent version of dnspython).
if hasattr(res, "extended_errors"):
assert len(res.extended_errors()) == 1
assert res.extended_errors()[0].to_text() == f"EDE 24 (Invalid Data): {edemsg}"
def check_ns2_ready(ns2):
# Sanity check that everything works first, once we're sure the foo.fr zone
# has transfered to ns2.
with ns2.watch_log_from_start() as watcher:
watcher.wait_for_line("Transfer status: success")
check_soa_noerror()

View file

@ -14,7 +14,7 @@ foo.fr. IN SOA ns.foo.fr. op.foo.fr. (
3 ; serial
1 ; refresh
1 ; retry
1 ; expire
3 ; expire
60 ; minimum
)
foo.fr. NS ns.foo.fr.

View file

@ -1,71 +0,0 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
import os
import isctest
def check_soa_noerror():
msg = isctest.query.create("foo.fr", "SOA")
res = isctest.query.udp(msg, "10.53.0.2")
isctest.check.noerror(res)
def check_soa_servfail_ede24(edemsg):
msg = isctest.query.create("foo.fr", "SOA")
res = isctest.query.udp(msg, "10.53.0.2")
isctest.check.servfail(res)
# Few CI machines uses old version of dnspython which doesn't supports
# EDNS, so we effectively bypass the check for those one. (It's fine, a
# bunch of other CI machines _does_ have recent version of dnspython).
if hasattr(res, "extended_errors"):
assert len(res.extended_errors()) == 1
assert res.extended_errors()[0].to_text() == f"EDE 24 (Invalid Data): {edemsg}"
def test_ede24_noloaded(ns1, ns2):
# Sanity check that everything works first
check_soa_noerror()
# Stop all servers, and we'll restart only ns2.
ns1.stop()
ns2.stop()
with ns2.watch_log_from_here() as watcher:
ns2.start(["--noclean", "--restart", "--port", os.environ["PORT"]])
watcher.wait_for_line("failure trying primary 10.53.0.1")
# ns2 attempts an XFR but ns1 since is off the zone DB can't be loaded.
check_soa_servfail_ede24("zone not loaded")
def test_ede24_expired(ns1, ns2):
# Restart ns1 then checks the server notify the zone in ns2 and ns2 serves
# the zone again.
with ns2.watch_log_from_here() as watcher:
ns1.start(["--noclean", "--restart", "--port", os.environ["PORT"]])
watcher.wait_for_line("Transfer status: success")
check_soa_noerror()
# Stop the primary and wait for expiration of the zone in the secondary.
with ns2.watch_log_from_here() as watcher:
ns1.stop()
watcher.wait_for_line(" zone foo.fr/IN: expired")
# ns2 can't answer anymore.
check_soa_servfail_ede24("zone expired")
# Restart the primary and wait for the zone to be back up again.
with ns2.watch_log_from_here() as watcher:
ns1.start(["--noclean", "--restart", "--port", os.environ["PORT"]])
watcher.wait_for_line("Transfer status: success")
check_soa_noerror()

View file

@ -0,0 +1,36 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
import os
from ede24.common import check_ns2_ready, check_soa_noerror, check_soa_servfail_ede24
def test_ede24_expired(ns1, ns2):
check_ns2_ready(ns2)
# Stop the primary and wait for expiration of the zone in the secondary.
with ns2.watch_log_from_here() as watcher:
ns1.stop()
log_sequence = [
" zone foo.fr/IN: expired",
" zone foo.fr/IN: stop zone timer",
]
watcher.wait_for_sequence(log_sequence)
# ns2 can't answer anymore.
check_soa_servfail_ede24("zone expired")
# Restart the primary and wait for the zone to be back up again.
with ns2.watch_log_from_here() as watcher:
ns1.start(["--noclean", "--restart", "--port", os.environ["PORT"]])
watcher.wait_for_line("Transfer status: success")
check_soa_noerror()

View file

@ -0,0 +1,28 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
import os
from ede24.common import check_ns2_ready, check_soa_servfail_ede24
def test_ede24_noloaded(ns1, ns2):
check_ns2_ready(ns2)
# Stop all servers, and we'll restart only ns2.
ns1.stop()
ns2.stop()
with ns2.watch_log_from_here() as watcher:
ns2.start(["--noclean", "--restart", "--port", os.environ["PORT"]])
watcher.wait_for_line("failure trying primary 10.53.0.1")
# ns2 attempts an XFR but ns1 since is off the zone DB can't be loaded.
check_soa_servfail_ede24("zone not loaded")

View file

@ -593,17 +593,6 @@ dns_zone_markdirty(dns_zone_t *zone);
*\li 'zone' to be a valid zone.
*/
void
dns_zone_expire(dns_zone_t *zone);
/*%<
* Mark the zone as expired. If the zone requires dumping cause it to
* be initiated. Set the refresh and retry intervals to there default
* values and unload the zone.
*
* Require
*\li 'zone' to be a valid zone.
*/
void
dns_zone_refresh(dns_zone_t *zone);
/*%<

View file

@ -11762,15 +11762,6 @@ again:
UNLOCK_ZONE(zone);
}
void
dns_zone_expire(dns_zone_t *zone) {
REQUIRE(DNS_ZONE_VALID(zone));
LOCK_ZONE(zone);
zone_expire(zone);
UNLOCK_ZONE(zone);
}
static void
zone_expire(dns_zone_t *zone) {
dns_db_t *db = NULL;

View file

@ -5283,12 +5283,13 @@ query_trace(query_ctx_t *qctx) {
snprintf(mbuf, sizeof(mbuf) - 1,
"client attr:0x%x, query attr:0x%X, restarts:%u, "
"origqname:%s, timer:%d, authdb:%d, referral:%d",
"origqname:%s, timer:%d, authdb:%d, referral:%d, id:%hu",
qctx->client->inner.attributes, qctx->client->query.attributes,
qctx->client->query.restarts, qbuf,
(int)qctx->client->query.timerset,
(int)qctx->client->query.authdbset,
(int)qctx->client->query.isreferral);
(int)qctx->client->query.isreferral,
qctx->client->message->id);
CCTRACE(ISC_LOG_DEBUG(3), mbuf);
#else /* ifdef WANT_QUERYTRACE */
UNUSED(qctx);