From 13c2f7adbba76e0748f0bceb23130786f9ca3d96 Mon Sep 17 00:00:00 2001 From: sr_zhao Date: Wed, 13 Nov 2024 14:56:48 +0800 Subject: [PATCH] Increase successful psync chance by flushing pending replication stream before disconnecting replica. To perform manual failover without data loss, we execute following steps: 1. Temporarily disable write by executing `replica of 127.0.0.1 0` on the master 2. Promote one replica as the new master. 3. Set the old master as a replica of the promoted master. During step 1, the master disconnects replicas. If there are pending replication streams for the replica that is promoted as the new master in step 2, step 3 fails to psync because the old master has a more recent replication stream. This patch improves the chances of successful psync by flushing the pending replication stream of the replica in a non-blocking manner during step 1. This ensures that the replica promoted in step 2 is more likely to have the same replication stream as the master, increasing the likelihood of successful psync in step 3. --- src/networking.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 47312b8d8..78b678a36 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1461,7 +1461,17 @@ void disconnectSlaves(void) { listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { - freeClient((client*)ln->value); + /* Attempt to flush pending replication stream before disconnecting + * slaves without blocking, increasing the chances of successful + * psync for slaves when failed over. */ + client *slave = (client*)ln->value; + writeToClient(slave,0); + if (clientHasPendingReplies(slave)) { + sds client_desc = catClientInfoString(sdsempty(), slave); + serverLog(LL_NOTICE, "Slave still have pending replies when disconnect: %s", client_desc); + sdsfree(client_desc); + } + freeClient(slave); } }