mirror of
https://github.com/redis/redis.git
synced 2026-03-23 19:23:17 -04:00
The mess: Some parts use alsoPropagate for late propagation, others using an immediate one (propagate()), causing edge cases, ugly/hacky code, and the tendency for bugs The basic idea is that all commands are propagated via alsoPropagate (i.e. added to a list) and the top-most call() is responsible for going over that list and actually propagating them (and wrapping them in MULTI/EXEC if there's more than one command). This is done in the new function, propagatePendingCommands. Callers to propagatePendingCommands: 1. top-most call() (we want all nested call()s to add to the also_propagate array and just the top-most one to propagate them) - via `afterCommand` 2. handleClientsBlockedOnKeys: it is out of call() context and it may propagate stuff - via `afterCommand`. 3. handleClientsBlockedOnKeys edge case: if the looked-up key is already expired, we will propagate the expire but will not unblock any client so `afterCommand` isn't called. in that case, we have to propagate the deletion explicitly. 4. cron stuff: active-expire and eviction may also propagate stuff 5. modules: the module API allows to propagate stuff from just about anywhere (timers, keyspace notifications, threads). I could have tried to catch all the out-of-call-context places but it seemed easier to handle it in one place: when we free the context. in the spirit of what was done in call(), only the top-most freeing of a module context may cause propagation. 6. modules: when using a thread-safe ctx it's not clear when/if the ctx will be freed. we do know that the module must lock the GIL before calling RM_Replicate/RM_Call so we propagate the pending commands when releasing the GIL. A "known limitation", which were actually a bug, was fixed because of this commit (see propagate.tcl): When using a mix of RM_Call with `!` and RM_Replicate, the command would propagate out-of-order: first all the commands from RM_Call, and then the ones from RM_Replicate Another thing worth mentioning is that if, in the past, a client would issue a MULTI/EXEC with just one write command the server would blindly propagate the MULTI/EXEC too, even though it's redundant. not anymore. This commit renames propagate() to propagateNow() in order to cause conflicts in pending PRs. propagatePendingCommands is the only caller of propagateNow, which is now a static, internal helper function. Optimizations: 1. alsoPropagate will not add stuff to also_propagate if there's no AOF and replicas 2. alsoPropagate reallocs also_propagagte exponentially, to save calls to memmove Bugfixes: 1. CONFIG SET can create evictions, sending notifications which can cause to dirty++ with modules. we need to prevent it from propagating to AOF/replicas 2. We need to set current_client in RM_Call. buggy scenario: - CONFIG SET maxmemory, eviction notifications, module hook calls RM_Call - assertion in lookupKey crashes, because current_client has CONFIG SET, which isn't CMD_WRITE 3. minor: in eviction, call propagateDeletion after notification, like active-expire and all commands (we always send a notification before propagating the command)
428 lines
14 KiB
C
428 lines
14 KiB
C
/*
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "server.h"
|
|
|
|
/* ================================ MULTI/EXEC ============================== */
|
|
|
|
/* Client state initialization for MULTI/EXEC */
|
|
void initClientMultiState(client *c) {
|
|
c->mstate.commands = NULL;
|
|
c->mstate.count = 0;
|
|
c->mstate.cmd_flags = 0;
|
|
c->mstate.cmd_inv_flags = 0;
|
|
c->mstate.argv_len_sums = 0;
|
|
}
|
|
|
|
/* Release all the resources associated with MULTI/EXEC state */
|
|
void freeClientMultiState(client *c) {
|
|
int j;
|
|
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
int i;
|
|
multiCmd *mc = c->mstate.commands+j;
|
|
|
|
for (i = 0; i < mc->argc; i++)
|
|
decrRefCount(mc->argv[i]);
|
|
zfree(mc->argv);
|
|
}
|
|
zfree(c->mstate.commands);
|
|
}
|
|
|
|
/* Add a new command into the MULTI commands queue */
|
|
void queueMultiCommand(client *c) {
|
|
multiCmd *mc;
|
|
|
|
/* No sense to waste memory if the transaction is already aborted.
|
|
* this is useful in case client sends these in a pipeline, or doesn't
|
|
* bother to read previous responses and didn't notice the multi was already
|
|
* aborted. */
|
|
if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC))
|
|
return;
|
|
|
|
c->mstate.commands = zrealloc(c->mstate.commands,
|
|
sizeof(multiCmd)*(c->mstate.count+1));
|
|
mc = c->mstate.commands+c->mstate.count;
|
|
mc->cmd = c->cmd;
|
|
mc->argc = c->argc;
|
|
mc->argv = c->argv;
|
|
mc->argv_len = c->argv_len;
|
|
|
|
c->mstate.count++;
|
|
c->mstate.cmd_flags |= c->cmd->flags;
|
|
c->mstate.cmd_inv_flags |= ~c->cmd->flags;
|
|
c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj*)*c->argc;
|
|
|
|
/* Reset the client's args since we copied them into the mstate and shouldn't
|
|
* reference them from c anymore. */
|
|
c->argv = NULL;
|
|
c->argc = 0;
|
|
c->argv_len_sum = 0;
|
|
c->argv_len = 0;
|
|
}
|
|
|
|
void discardTransaction(client *c) {
|
|
freeClientMultiState(c);
|
|
initClientMultiState(c);
|
|
c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
|
|
unwatchAllKeys(c);
|
|
}
|
|
|
|
/* Flag the transaction as DIRTY_EXEC so that EXEC will fail.
|
|
* Should be called every time there is an error while queueing a command. */
|
|
void flagTransaction(client *c) {
|
|
if (c->flags & CLIENT_MULTI)
|
|
c->flags |= CLIENT_DIRTY_EXEC;
|
|
}
|
|
|
|
void multiCommand(client *c) {
|
|
if (c->flags & CLIENT_MULTI) {
|
|
addReplyError(c,"MULTI calls can not be nested");
|
|
return;
|
|
}
|
|
c->flags |= CLIENT_MULTI;
|
|
|
|
addReply(c,shared.ok);
|
|
}
|
|
|
|
void discardCommand(client *c) {
|
|
if (!(c->flags & CLIENT_MULTI)) {
|
|
addReplyError(c,"DISCARD without MULTI");
|
|
return;
|
|
}
|
|
discardTransaction(c);
|
|
addReply(c,shared.ok);
|
|
}
|
|
|
|
/* Aborts a transaction, with a specific error message.
|
|
* The transaction is always aborted with -EXECABORT so that the client knows
|
|
* the server exited the multi state, but the actual reason for the abort is
|
|
* included too.
|
|
* Note: 'error' may or may not end with \r\n. see addReplyErrorFormat. */
|
|
void execCommandAbort(client *c, sds error) {
|
|
discardTransaction(c);
|
|
|
|
if (error[0] == '-') error++;
|
|
addReplyErrorFormat(c, "-EXECABORT Transaction discarded because of: %s", error);
|
|
|
|
/* Send EXEC to clients waiting data from MONITOR. We did send a MULTI
|
|
* already, and didn't send any of the queued commands, now we'll just send
|
|
* EXEC so it is clear that the transaction is over. */
|
|
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
|
|
}
|
|
|
|
void execCommand(client *c) {
|
|
int j;
|
|
robj **orig_argv;
|
|
int orig_argc, orig_argv_len;
|
|
struct redisCommand *orig_cmd;
|
|
|
|
if (!(c->flags & CLIENT_MULTI)) {
|
|
addReplyError(c,"EXEC without MULTI");
|
|
return;
|
|
}
|
|
|
|
/* EXEC with expired watched key is disallowed*/
|
|
if (isWatchedKeyExpired(c)) {
|
|
c->flags |= (CLIENT_DIRTY_CAS);
|
|
}
|
|
|
|
/* Check if we need to abort the EXEC because:
|
|
* 1) Some WATCHed key was touched.
|
|
* 2) There was a previous error while queueing commands.
|
|
* A failed EXEC in the first case returns a multi bulk nil object
|
|
* (technically it is not an error but a special behavior), while
|
|
* in the second an EXECABORT error is returned. */
|
|
if (c->flags & (CLIENT_DIRTY_CAS | CLIENT_DIRTY_EXEC)) {
|
|
if (c->flags & CLIENT_DIRTY_EXEC) {
|
|
addReplyErrorObject(c, shared.execaborterr);
|
|
} else {
|
|
addReply(c, shared.nullarray[c->resp]);
|
|
}
|
|
|
|
discardTransaction(c);
|
|
return;
|
|
}
|
|
|
|
uint64_t old_flags = c->flags;
|
|
|
|
/* we do not want to allow blocking commands inside multi */
|
|
c->flags |= CLIENT_DENY_BLOCKING;
|
|
|
|
/* Exec all the queued commands */
|
|
unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
|
|
|
|
server.in_exec = 1;
|
|
|
|
orig_argv = c->argv;
|
|
orig_argv_len = c->argv_len;
|
|
orig_argc = c->argc;
|
|
orig_cmd = c->cmd;
|
|
addReplyArrayLen(c,c->mstate.count);
|
|
for (j = 0; j < c->mstate.count; j++) {
|
|
c->argc = c->mstate.commands[j].argc;
|
|
c->argv = c->mstate.commands[j].argv;
|
|
c->argv_len = c->mstate.commands[j].argv_len;
|
|
c->cmd = c->mstate.commands[j].cmd;
|
|
|
|
/* ACL permissions are also checked at the time of execution in case
|
|
* they were changed after the commands were queued. */
|
|
int acl_errpos;
|
|
int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
|
|
if (acl_retval != ACL_OK) {
|
|
char *reason;
|
|
switch (acl_retval) {
|
|
case ACL_DENIED_CMD:
|
|
reason = "no permission to execute the command or subcommand";
|
|
break;
|
|
case ACL_DENIED_KEY:
|
|
reason = "no permission to touch the specified keys";
|
|
break;
|
|
case ACL_DENIED_CHANNEL:
|
|
reason = "no permission to access one of the channels used "
|
|
"as arguments";
|
|
break;
|
|
default:
|
|
reason = "no permission";
|
|
break;
|
|
}
|
|
addACLLogEntry(c,acl_retval,ACL_LOG_CTX_MULTI,acl_errpos,NULL,NULL);
|
|
addReplyErrorFormat(c,
|
|
"-NOPERM ACLs rules changed between the moment the "
|
|
"transaction was accumulated and the EXEC call. "
|
|
"This command is no longer allowed for the "
|
|
"following reason: %s", reason);
|
|
} else {
|
|
if (c->id == CLIENT_ID_AOF)
|
|
call(c,CMD_CALL_NONE);
|
|
else
|
|
call(c,CMD_CALL_FULL);
|
|
|
|
serverAssert((c->flags & CLIENT_BLOCKED) == 0);
|
|
}
|
|
|
|
/* Commands may alter argc/argv, restore mstate. */
|
|
c->mstate.commands[j].argc = c->argc;
|
|
c->mstate.commands[j].argv = c->argv;
|
|
c->mstate.commands[j].cmd = c->cmd;
|
|
}
|
|
|
|
// restore old DENY_BLOCKING value
|
|
if (!(old_flags & CLIENT_DENY_BLOCKING))
|
|
c->flags &= ~CLIENT_DENY_BLOCKING;
|
|
|
|
c->argv = orig_argv;
|
|
c->argv_len = orig_argv_len;
|
|
c->argc = orig_argc;
|
|
c->cmd = orig_cmd;
|
|
discardTransaction(c);
|
|
|
|
server.in_exec = 0;
|
|
}
|
|
|
|
/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
|
|
*
|
|
* The implementation uses a per-DB hash table mapping keys to list of clients
|
|
* WATCHing those keys, so that given a key that is going to be modified
|
|
* we can mark all the associated clients as dirty.
|
|
*
|
|
* Also every client contains a list of WATCHed keys so that's possible to
|
|
* un-watch such keys when the client is freed or when UNWATCH is called. */
|
|
|
|
/* In the client->watched_keys list we need to use watchedKey structures
|
|
* as in order to identify a key in Redis we need both the key name and the
|
|
* DB */
|
|
typedef struct watchedKey {
|
|
robj *key;
|
|
redisDb *db;
|
|
} watchedKey;
|
|
|
|
/* Watch for the specified key */
|
|
void watchForKey(client *c, robj *key) {
|
|
list *clients = NULL;
|
|
listIter li;
|
|
listNode *ln;
|
|
watchedKey *wk;
|
|
|
|
/* Check if we are already watching for this key */
|
|
listRewind(c->watched_keys,&li);
|
|
while((ln = listNext(&li))) {
|
|
wk = listNodeValue(ln);
|
|
if (wk->db == c->db && equalStringObjects(key,wk->key))
|
|
return; /* Key already watched */
|
|
}
|
|
/* This key is not already watched in this DB. Let's add it */
|
|
clients = dictFetchValue(c->db->watched_keys,key);
|
|
if (!clients) {
|
|
clients = listCreate();
|
|
dictAdd(c->db->watched_keys,key,clients);
|
|
incrRefCount(key);
|
|
}
|
|
listAddNodeTail(clients,c);
|
|
/* Add the new key to the list of keys watched by this client */
|
|
wk = zmalloc(sizeof(*wk));
|
|
wk->key = key;
|
|
wk->db = c->db;
|
|
incrRefCount(key);
|
|
listAddNodeTail(c->watched_keys,wk);
|
|
}
|
|
|
|
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
|
|
* flag is up to the caller. */
|
|
void unwatchAllKeys(client *c) {
|
|
listIter li;
|
|
listNode *ln;
|
|
|
|
if (listLength(c->watched_keys) == 0) return;
|
|
listRewind(c->watched_keys,&li);
|
|
while((ln = listNext(&li))) {
|
|
list *clients;
|
|
watchedKey *wk;
|
|
|
|
/* Lookup the watched key -> clients list and remove the client
|
|
* from the list */
|
|
wk = listNodeValue(ln);
|
|
clients = dictFetchValue(wk->db->watched_keys, wk->key);
|
|
serverAssertWithInfo(c,NULL,clients != NULL);
|
|
listDelNode(clients,listSearchKey(clients,c));
|
|
/* Kill the entry at all if this was the only client */
|
|
if (listLength(clients) == 0)
|
|
dictDelete(wk->db->watched_keys, wk->key);
|
|
/* Remove this watched key from the client->watched list */
|
|
listDelNode(c->watched_keys,ln);
|
|
decrRefCount(wk->key);
|
|
zfree(wk);
|
|
}
|
|
}
|
|
|
|
/* iterates over the watched_keys list and
|
|
* look for an expired key . */
|
|
int isWatchedKeyExpired(client *c) {
|
|
listIter li;
|
|
listNode *ln;
|
|
watchedKey *wk;
|
|
if (listLength(c->watched_keys) == 0) return 0;
|
|
listRewind(c->watched_keys,&li);
|
|
while ((ln = listNext(&li))) {
|
|
wk = listNodeValue(ln);
|
|
if (keyIsExpired(wk->db, wk->key)) return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* "Touch" a key, so that if this key is being WATCHed by some client the
|
|
* next EXEC will fail. */
|
|
void touchWatchedKey(redisDb *db, robj *key) {
|
|
list *clients;
|
|
listIter li;
|
|
listNode *ln;
|
|
|
|
if (dictSize(db->watched_keys) == 0) return;
|
|
clients = dictFetchValue(db->watched_keys, key);
|
|
if (!clients) return;
|
|
|
|
/* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
|
|
/* Check if we are already watching for this key */
|
|
listRewind(clients,&li);
|
|
while((ln = listNext(&li))) {
|
|
client *c = listNodeValue(ln);
|
|
|
|
c->flags |= CLIENT_DIRTY_CAS;
|
|
/* As the client is marked as dirty, there is no point in getting here
|
|
* again in case that key (or others) are modified again (or keep the
|
|
* memory overhead till EXEC). */
|
|
unwatchAllKeys(c);
|
|
}
|
|
}
|
|
|
|
/* Set CLIENT_DIRTY_CAS to all clients of DB when DB is dirty.
|
|
* It may happen in the following situations:
|
|
* FLUSHDB, FLUSHALL, SWAPDB, end of successful diskless replication.
|
|
*
|
|
* replaced_with: for SWAPDB, the WATCH should be invalidated if
|
|
* the key exists in either of them, and skipped only if it
|
|
* doesn't exist in both. */
|
|
void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
|
|
listIter li;
|
|
listNode *ln;
|
|
dictEntry *de;
|
|
|
|
if (dictSize(emptied->watched_keys) == 0) return;
|
|
|
|
dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
|
|
while((de = dictNext(di)) != NULL) {
|
|
robj *key = dictGetKey(de);
|
|
if (dictFind(emptied->dict, key->ptr) ||
|
|
(replaced_with && dictFind(replaced_with->dict, key->ptr)))
|
|
{
|
|
list *clients = dictGetVal(de);
|
|
if (!clients) continue;
|
|
listRewind(clients,&li);
|
|
while((ln = listNext(&li))) {
|
|
client *c = listNodeValue(ln);
|
|
c->flags |= CLIENT_DIRTY_CAS;
|
|
/* As the client is marked as dirty, there is no point in getting here
|
|
* again for others keys (or keep the memory overhead till EXEC). */
|
|
unwatchAllKeys(c);
|
|
}
|
|
}
|
|
}
|
|
dictReleaseIterator(di);
|
|
}
|
|
|
|
void watchCommand(client *c) {
|
|
int j;
|
|
|
|
if (c->flags & CLIENT_MULTI) {
|
|
addReplyError(c,"WATCH inside MULTI is not allowed");
|
|
return;
|
|
}
|
|
/* No point in watching if the client is already dirty. */
|
|
if (c->flags & CLIENT_DIRTY_CAS) {
|
|
addReply(c,shared.ok);
|
|
return;
|
|
}
|
|
for (j = 1; j < c->argc; j++)
|
|
watchForKey(c,c->argv[j]);
|
|
addReply(c,shared.ok);
|
|
}
|
|
|
|
void unwatchCommand(client *c) {
|
|
unwatchAllKeys(c);
|
|
c->flags &= (~CLIENT_DIRTY_CAS);
|
|
addReply(c,shared.ok);
|
|
}
|
|
|
|
size_t multiStateMemOverhead(client *c) {
|
|
size_t mem = c->mstate.argv_len_sums;
|
|
/* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */
|
|
mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey));
|
|
return mem;
|
|
}
|