mirror of
https://github.com/redis/redis.git
synced 2026-03-06 07:20:28 -05:00
Some checks are pending
CI / test-ubuntu-latest (push) Waiting to run
CI / test-sanitizer-address (push) Waiting to run
CI / build-debian-old (push) Waiting to run
CI / build-macos-latest (push) Waiting to run
CI / build-32bit (push) Waiting to run
CI / build-libc-malloc (push) Waiting to run
CI / build-centos-jemalloc (push) Waiting to run
CI / build-old-chain-jemalloc (push) Waiting to run
Codecov / code-coverage (push) Waiting to run
External Server Tests / test-external-standalone (push) Waiting to run
External Server Tests / test-external-cluster (push) Waiting to run
External Server Tests / test-external-nodebug (push) Waiting to run
Spellcheck / Spellcheck (push) Waiting to run
`repl-diskless-load` feature can effectively reduce the time of full synchronization, but maybe it is not widely used. `swapdb` option needs double `maxmemory`, and `on-empty-db` only works on the first full sync (the replica must have no data). This PR introduce a new option: `flushdb` - Always flush the entire dataset before diskless load. If the diskless load fails, the replica will lose all existing data. Of course, it brings the risk of data loss, but it provides a choice if you want to reduce full sync time and accept this risk.
166 lines
6.5 KiB
Tcl
166 lines
6.5 KiB
Tcl
#
|
|
# Copyright (c) 2009-Present, Redis Ltd.
|
|
# All rights reserved.
|
|
#
|
|
# Copyright (c) 2024-present, Valkey contributors.
|
|
# All rights reserved.
|
|
#
|
|
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
|
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
|
# GNU Affero General Public License v3 (AGPLv3).
|
|
#
|
|
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
|
#
|
|
|
|
# Creates a master-slave pair and breaks the link continuously to force
|
|
# partial resyncs attempts, all this while flooding the master with
|
|
# write queries.
|
|
#
|
|
# You can specify backlog size, ttl, delay before reconnection, test duration
|
|
# in seconds, and an additional condition to verify at the end.
|
|
#
|
|
# If reconnect is > 0, the test actually try to break the connection and
|
|
# reconnect with the master, otherwise just the initial synchronization is
|
|
# checked for consistency.
|
|
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect rdbchannel} {
|
|
start_server {tags {"repl"} overrides {save {}}} {
|
|
start_server {overrides {save {}}} {
|
|
|
|
set master [srv -1 client]
|
|
set master_host [srv -1 host]
|
|
set master_port [srv -1 port]
|
|
set slave [srv 0 client]
|
|
|
|
$master config set repl-backlog-size $backlog_size
|
|
$master config set repl-backlog-ttl $backlog_ttl
|
|
$master config set repl-diskless-sync $mdl
|
|
$master config set repl-diskless-sync-delay 1
|
|
$master config set repl-rdb-channel $rdbchannel
|
|
$slave config set repl-diskless-load $sdl
|
|
$slave config set repl-rdb-channel $rdbchannel
|
|
|
|
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
|
|
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
|
|
set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000]
|
|
|
|
test {Slave should be able to synchronize with the master} {
|
|
$slave slaveof $master_host $master_port
|
|
wait_for_condition 50 100 {
|
|
[lindex [r role] 0] eq {slave} &&
|
|
[lindex [r role] 3] eq {connected}
|
|
} else {
|
|
fail "Replication not started."
|
|
}
|
|
}
|
|
|
|
# Check that the background clients are actually writing.
|
|
test {Detect write load to master} {
|
|
wait_for_condition 50 1000 {
|
|
[$master dbsize] > 100
|
|
} else {
|
|
fail "Can't detect write load from background clients."
|
|
}
|
|
}
|
|
|
|
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect, rdbchannel: $rdbchannel)" {
|
|
# Now while the clients are writing data, break the maste-slave
|
|
# link multiple times.
|
|
if ($reconnect) {
|
|
for {set j 0} {$j < $duration*10} {incr j} {
|
|
after 100
|
|
# catch {puts "MASTER [$master dbsize] keys, REPLICA [$slave dbsize] keys"}
|
|
|
|
if {($j % 20) == 0} {
|
|
catch {
|
|
if {$delay} {
|
|
$slave multi
|
|
$slave client kill $master_host:$master_port
|
|
$slave debug sleep $delay
|
|
$slave exec
|
|
} else {
|
|
$slave client kill $master_host:$master_port
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
stop_bg_complex_data $load_handle0
|
|
stop_bg_complex_data $load_handle1
|
|
stop_bg_complex_data $load_handle2
|
|
|
|
# Wait for the slave to reach the "online"
|
|
# state from the POV of the master.
|
|
set retry 5000
|
|
while {$retry} {
|
|
set info [$master info]
|
|
if {[string match {*slave0:*state=online*} $info]} {
|
|
break
|
|
} else {
|
|
incr retry -1
|
|
after 100
|
|
}
|
|
}
|
|
if {$retry == 0} {
|
|
error "assertion:Slave not correctly synchronized"
|
|
}
|
|
|
|
# Wait that slave acknowledge it is online so
|
|
# we are sure that DBSIZE and DEBUG DIGEST will not
|
|
# fail because of timing issues. (-LOADING error)
|
|
wait_for_condition 5000 100 {
|
|
[lindex [$slave role] 3] eq {connected}
|
|
} else {
|
|
fail "Slave still not connected after some time"
|
|
}
|
|
|
|
wait_for_condition 100 100 {
|
|
[$master debug digest] == [$slave debug digest]
|
|
} else {
|
|
set csv1 [csvdump r]
|
|
set csv2 [csvdump {r -1}]
|
|
set fd [open /tmp/repldump1.txt w]
|
|
puts -nonewline $fd $csv1
|
|
close $fd
|
|
set fd [open /tmp/repldump2.txt w]
|
|
puts -nonewline $fd $csv2
|
|
close $fd
|
|
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
|
|
}
|
|
assert {[$master dbsize] > 0}
|
|
eval $cond
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
tags {"external:skip"} {
|
|
foreach mdl {no yes} {
|
|
foreach sdl {disabled swapdb flushdb} {
|
|
foreach rdbchannel {yes no} {
|
|
if {$rdbchannel == "yes" && $mdl == "no"} {
|
|
# rdbchannel replication requires repl-diskless-sync enabled
|
|
continue
|
|
}
|
|
|
|
test_psync {no reconnection, just sync} 6 1000000 3600 0 {
|
|
} $mdl $sdl 0 $rdbchannel
|
|
|
|
test_psync {ok psync} 6 100000000 3600 0 {
|
|
assert {[s -1 sync_partial_ok] > 0}
|
|
} $mdl $sdl 1 $rdbchannel
|
|
|
|
test_psync {no backlog} 6 100 3600 0.5 {
|
|
assert {[s -1 sync_partial_err] > 0}
|
|
} $mdl $sdl 1 $rdbchannel
|
|
|
|
test_psync {ok after delay} 3 100000000 3600 3 {
|
|
assert {[s -1 sync_partial_ok] > 0}
|
|
} $mdl $sdl 1 $rdbchannel
|
|
|
|
test_psync {backlog expired} 3 100000000 1 3 {
|
|
assert {[s -1 sync_partial_err] > 0}
|
|
} $mdl $sdl 1 $rdbchannel
|
|
}
|
|
}
|
|
}
|
|
}
|