redis/tests/integration/replication-psync.tcl
Yuan Wang f3316c3a1a
Some checks are pending
CI / test-ubuntu-latest (push) Waiting to run
CI / test-sanitizer-address (push) Waiting to run
CI / build-debian-old (push) Waiting to run
CI / build-macos-latest (push) Waiting to run
CI / build-32bit (push) Waiting to run
CI / build-libc-malloc (push) Waiting to run
CI / build-centos-jemalloc (push) Waiting to run
CI / build-old-chain-jemalloc (push) Waiting to run
Codecov / code-coverage (push) Waiting to run
External Server Tests / test-external-standalone (push) Waiting to run
External Server Tests / test-external-cluster (push) Waiting to run
External Server Tests / test-external-nodebug (push) Waiting to run
Spellcheck / Spellcheck (push) Waiting to run
Introduce flushdb option for repl-diskless-load (#14596)
`repl-diskless-load` feature can effectively reduce the time of full
synchronization, but maybe it is not widely used.
`swapdb` option needs double `maxmemory`, and `on-empty-db` only works
on the first full sync (the replica must have no data).

This PR introduce a new option: `flushdb` - Always flush the entire
dataset before diskless load. If the diskless load fails, the replica
will lose all existing data.

Of course, it brings the risk of data loss, but it provides a choice if
you want to reduce full sync time and accept this risk.
2025-12-15 11:25:53 +08:00

166 lines
6.5 KiB
Tcl

#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of (a) the Redis Source Available License 2.0
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
# GNU Affero General Public License v3 (AGPLv3).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# Creates a master-slave pair and breaks the link continuously to force
# partial resyncs attempts, all this while flooding the master with
# write queries.
#
# You can specify backlog size, ttl, delay before reconnection, test duration
# in seconds, and an additional condition to verify at the end.
#
# If reconnect is > 0, the test actually try to break the connection and
# reconnect with the master, otherwise just the initial synchronization is
# checked for consistency.
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect rdbchannel} {
start_server {tags {"repl"} overrides {save {}}} {
start_server {overrides {save {}}} {
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
set slave [srv 0 client]
$master config set repl-backlog-size $backlog_size
$master config set repl-backlog-ttl $backlog_ttl
$master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 1
$master config set repl-rdb-channel $rdbchannel
$slave config set repl-diskless-load $sdl
$slave config set repl-rdb-channel $rdbchannel
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000]
test {Slave should be able to synchronize with the master} {
$slave slaveof $master_host $master_port
wait_for_condition 50 100 {
[lindex [r role] 0] eq {slave} &&
[lindex [r role] 3] eq {connected}
} else {
fail "Replication not started."
}
}
# Check that the background clients are actually writing.
test {Detect write load to master} {
wait_for_condition 50 1000 {
[$master dbsize] > 100
} else {
fail "Can't detect write load from background clients."
}
}
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect, rdbchannel: $rdbchannel)" {
# Now while the clients are writing data, break the maste-slave
# link multiple times.
if ($reconnect) {
for {set j 0} {$j < $duration*10} {incr j} {
after 100
# catch {puts "MASTER [$master dbsize] keys, REPLICA [$slave dbsize] keys"}
if {($j % 20) == 0} {
catch {
if {$delay} {
$slave multi
$slave client kill $master_host:$master_port
$slave debug sleep $delay
$slave exec
} else {
$slave client kill $master_host:$master_port
}
}
}
}
}
stop_bg_complex_data $load_handle0
stop_bg_complex_data $load_handle1
stop_bg_complex_data $load_handle2
# Wait for the slave to reach the "online"
# state from the POV of the master.
set retry 5000
while {$retry} {
set info [$master info]
if {[string match {*slave0:*state=online*} $info]} {
break
} else {
incr retry -1
after 100
}
}
if {$retry == 0} {
error "assertion:Slave not correctly synchronized"
}
# Wait that slave acknowledge it is online so
# we are sure that DBSIZE and DEBUG DIGEST will not
# fail because of timing issues. (-LOADING error)
wait_for_condition 5000 100 {
[lindex [$slave role] 3] eq {connected}
} else {
fail "Slave still not connected after some time"
}
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
puts -nonewline $fd $csv1
close $fd
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert {[$master dbsize] > 0}
eval $cond
}
}
}
}
tags {"external:skip"} {
foreach mdl {no yes} {
foreach sdl {disabled swapdb flushdb} {
foreach rdbchannel {yes no} {
if {$rdbchannel == "yes" && $mdl == "no"} {
# rdbchannel replication requires repl-diskless-sync enabled
continue
}
test_psync {no reconnection, just sync} 6 1000000 3600 0 {
} $mdl $sdl 0 $rdbchannel
test_psync {ok psync} 6 100000000 3600 0 {
assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1 $rdbchannel
test_psync {no backlog} 6 100 3600 0.5 {
assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1 $rdbchannel
test_psync {ok after delay} 3 100000000 3600 3 {
assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1 $rdbchannel
test_psync {backlog expired} 3 100000000 1 3 {
assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1 $rdbchannel
}
}
}
}