/* Ultra‑lightweight top‑level JSON field extractor. * Return the element directly as an expr.c token. * This code is directly included inside expr.c. * * Copyright (c) 2025-Present, Redis Ltd. * All rights reserved. * * Licensed under your choice of the Redis Source Available License 2.0 * (RSALv2) or the Server Side Public License v1 (SSPLv1). * * Originally authored by: Salvatore Sanfilippo. * * ------------------------------------------------------------------ * * DESIGN GOALS: * * 1. Zero heap allocations while seeking the requested key. * 2. A single parse (and therefore a single allocation, if needed) * when the key finally matches. * 3. Same subset‑of‑JSON coverage needed by expr.c: * - Strings (escapes: \" \\ \n \r \t). * - Numbers (double). * - Booleans. * - Null. * - Flat arrays of the above primitives. * * Any other value (nested object, unicode escape, etc.) returns NULL. * Should be very easy to extend it in case in the future we want * more for the FILTER option of VSIM. * 4. No global state, so this file can be #included directly in expr.c. * * The only API expr.c uses directly is: * * exprtoken *jsonExtractField(const char *json, size_t json_len, * const char *field, size_t field_len); * ------------------------------------------------------------------ */ #include #include // Forward declarations. static int jsonSkipValue(const char **p, const char *end); static exprtoken *jsonParseValueToken(const char **p, const char *end); /* Similar to ctype.h isdigit() but covers the whole JSON number charset, * including exp form. */ static int jsonIsNumberChar(int c) { return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E'; } /* ========================== Fast skipping of JSON ========================= * The helpers here are designed to skip values without performing any * allocation. This way, for the use case of this JSON parser, we are able * to easily (and with good speed) skip fields and values we are not * interested in. Then, later in the code, when we find the field we want * to obtain, we finally call the functions that turn a given JSON value * associated to a field into our of our expressions token. * ========================================================================== */ /* Advance *p consuming all the spaces. */ static inline void jsonSkipWhiteSpaces(const char **p, const char *end) { while (*p < end && isspace((unsigned char)**p)) (*p)++; } /* Advance *p past a JSON string. Returns 1 on success, 0 on error. */ static int jsonSkipString(const char **p, const char *end) { if (*p >= end || **p != '"') return 0; (*p)++; /* Skip opening quote. */ while (*p < end) { if (**p == '\\') { (*p) += 2; continue; } if (**p == '"') { (*p)++; /* Skip closing quote. */ return 1; } (*p)++; } return 0; /* unterminated */ } /* Skip an array or object generically using depth counter. * Opener and closer tells the function how the aggregated * data type starts/stops, basically [] or {}. */ static int jsonSkipBracketed(const char **p, const char *end, char opener, char closer) { int depth = 1; (*p)++; /* Skip opener. */ /* Loop until we reach the end of the input or find the matching * closer (depth becomes 0). */ while (*p < end && depth > 0) { char c = **p; if (c == '"') { // Found a string, delegate skipping to jsonSkipString(). if (!jsonSkipString(p, end)) { return 0; // String skipping failed (e.g., unterminated) } /* jsonSkipString() advances *p past the closing quote. * Continue the loop to process the character *after* the string. */ continue; } /* If it's not a string, check if it affects the depth for the * specific brackets we are currently tracking. */ if (c == opener) { depth++; } else if (c == closer) { depth--; } /* Always advance the pointer for any non-string character. * This handles commas, colons, whitespace, numbers, literals, * and even nested brackets of a *different* type than the * one we are currently skipping (e.g. skipping a { inside []). */ (*p)++; } /* Return 1 (true) if we successfully found the matching closer, * otherwise there is a parse error and we return 0. */ return depth == 0; } /* Skip a single JSON literal (true, null, ...) starting at *p. * Returns 1 on success, 0 on failure. */ static int jsonSkipLiteral(const char **p, const char *end, const char *lit) { size_t l = strlen(lit); if (*p + l > end) return 0; if (strncmp(*p, lit, l) == 0) { *p += l; return 1; } return 0; } /* Skip number, don't check that number format is correct, just consume * number-alike characters. * * Note: More robust number skipping might check validity, * but for skipping, just consuming plausible characters is enough. */ static int jsonSkipNumber(const char **p, const char *end) { const char *num_start = *p; while (*p < end && jsonIsNumberChar(**p)) (*p)++; return *p > num_start; // Any progress made? Otherwise no number found. } /* Skip any JSON value. 1 = success, 0 = error. */ static int jsonSkipValue(const char **p, const char *end) { jsonSkipWhiteSpaces(p, end); if (*p >= end) return 0; switch (**p) { case '"': return jsonSkipString(p, end); case '{': return jsonSkipBracketed(p, end, '{', '}'); case '[': return jsonSkipBracketed(p, end, '[', ']'); case 't': return jsonSkipLiteral(p, end, "true"); case 'f': return jsonSkipLiteral(p, end, "false"); case 'n': return jsonSkipLiteral(p, end, "null"); default: return jsonSkipNumber(p, end); } } /* =========================== JSON to exprtoken ============================ * The functions below convert a given json value to the equivalent * expression token structure. * ========================================================================== */ static exprtoken *jsonParseStringToken(const char **p, const char *end) { if (*p >= end || **p != '"') return NULL; const char *start = ++(*p); int esc = 0; size_t len = 0; int has_esc = 0; const char *q = *p; while (q < end) { if (esc) { esc = 0; q++; len++; has_esc = 1; continue; } if (*q == '\\') { esc = 1; q++; continue; } if (*q == '"') break; q++; len++; } if (q >= end || *q != '"') return NULL; // Unterminated string exprtoken *t = exprNewToken(EXPR_TOKEN_STR); if (!has_esc) { // No escapes, we can point directly into the original JSON string. t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL; } else { // Escapes present, need to allocate and copy/process escapes. char *dst = RedisModule_Alloc(len + 1); t->str.start = t->str.heapstr = dst; t->str.len = len; const char *r = start; esc = 0; while (r < q) { if (esc) { switch (*r) { // Supported escapes from Goal 3. case 'n': *dst='\n'; break; case 'r': *dst='\r'; break; case 't': *dst='\t'; break; case '\\': *dst='\\'; break; case '"': *dst='\"'; break; // Escapes (like \uXXXX, \b, \f) are not supported for now, // we just copy them verbatim. default: *dst=*r; break; } dst++; esc = 0; r++; continue; } if (*r == '\\') { esc = 1; r++; continue; } *dst++ = *r++; } *dst = '\0'; // Null-terminate the allocated string. } *p = q + 1; // Advance the main pointer past the closing quote. return t; } static exprtoken *jsonParseNumberToken(const char **p, const char *end) { // Use a buffer to extract the number literal for parsing with strtod(). char buf[256]; int idx = 0; const char *start = *p; // For strtod partial failures check. // Copy potential number characters to buffer. while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) { buf[idx++] = **p; (*p)++; } buf[idx]='\0'; // Null-terminate buffer. if (idx==0) return NULL; // No number characters found. char *ep; // End pointer for strtod validation. double v = strtod(buf, &ep); /* Check if strtod() consumed the entire buffer content. * If not, the number format was invalid. */ if (*ep!='\0') { // strtod() failed; rewind p to the start and return NULL *p = start; return NULL; } // If strtod() succeeded, create and return the token.. exprtoken *t = exprNewToken(EXPR_TOKEN_NUM); t->num = v; return t; } static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) { size_t l = strlen(lit); // Ensure we don't read past 'end'. if ((*p + l) > end) return NULL; if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match. // Check that the character *after* the literal is a valid JSON delimiter // (whitespace, comma, closing bracket/brace, or end of input) // This prevents matching "trueblabla" as "true". if ((*p + l) < end) { char next_char = *(*p + l); if (!isspace((unsigned char)next_char) && next_char!=',' && next_char!=']' && next_char!='}') { return NULL; // Invalid character following literal. } } // Literal matched and is correctly terminated. *p += l; exprtoken *t = exprNewToken(type); t->num = num; return t; } static exprtoken *jsonParseArrayToken(const char **p, const char *end) { if (*p >= end || **p != '[') return NULL; (*p)++; // Skip '['. jsonSkipWhiteSpaces(p,end); exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE); t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0; // Handle empty array []. if (*p < end && **p == ']') { (*p)++; // Skip ']'. return t; } // Parse array elements. while (1) { exprtoken *ele = jsonParseValueToken(p,end); if (!ele) { exprTokenRelease(t); // Clean up partially built array token. return NULL; } // Grow allocated space for elements if needed. if (t->tuple.len == alloc) { size_t newsize = alloc ? alloc * 2 : 4; // Check for potential overflow if newsize becomes huge. if (newsize < alloc) { exprTokenRelease(ele); exprTokenRelease(t); return NULL; } exprtoken **newele = RedisModule_Realloc(t->tuple.ele, sizeof(exprtoken*)*newsize); t->tuple.ele = newele; alloc = newsize; } t->tuple.ele[t->tuple.len++] = ele; // Add element. jsonSkipWhiteSpaces(p,end); if (*p>=end) { // Unterminated array. Note that this check is crucial because // previous value parsed may seek 'p' to 'end'. exprTokenRelease(t); return NULL; } // Check for comma (more elements) or closing bracket. if (**p == ',') { (*p)++; // Skip ',' jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element continue; // Parse next element } else if (**p == ']') { (*p)++; // Skip ']' return t; // End of array } else { // Unexpected character (not ',' or ']') exprTokenRelease(t); return NULL; } } } /* Turn a JSON value into an expr token. */ static exprtoken *jsonParseValueToken(const char **p, const char *end) { jsonSkipWhiteSpaces(p,end); if (*p >= end) return NULL; switch (**p) { case '"': return jsonParseStringToken(p,end); case '[': return jsonParseArrayToken(p,end); case '{': return NULL; // No nested elements support for now. case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1); case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0); case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0); default: // Check if it starts like a number. if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') { return jsonParseNumberToken(p,end); } // Anything else is an unsupported type or malformed JSON. return NULL; } } /* ============================== Fast key seeking ========================== */ /* Finds the start of the value for a given field key within a JSON object. * Returns pointer to the first char of the value, or NULL if not found/error. * This function does not perform any allocation and is optimized to seek * the specified *toplevel* filed as fast as possible. */ static const char *jsonSeekField(const char *json, const char *end, const char *field, size_t flen) { const char *p = json; jsonSkipWhiteSpaces(&p,end); if (p >= end || *p != '{') return NULL; // Must start with '{'. p++; // skip '{'. while (1) { jsonSkipWhiteSpaces(&p,end); if (p >= end) return NULL; // Reached end within object. if (*p == '}') return NULL; // End of object, field not found. // Expecting a key (string). if (*p != '"') return NULL; // Key must be a string. // --- Key Matching using jsonSkipString --- const char *key_start = p + 1; // Start of key content. const char *key_end_p = p; // Will later contain the end. // Use jsonSkipString() to find the end. if (!jsonSkipString(&key_end_p, end)) { // Unterminated / invalid key string. return NULL; } // Calculate the length of the key's content. size_t klen = (key_end_p - 1) - key_start; /* Perform the comparison using the raw key content. * WARNING: This uses memcmp(), so we don't handle escaped chars * within the key matching against unescaped chars in 'field'. */ int match = klen == flen && !memcmp(key_start, field, flen); // Update the main pointer 'p' to be after the key string. p = key_end_p; // Now we expect to find a ":" followed by a value. jsonSkipWhiteSpaces(&p,end); if (p>=end || *p!=':') return NULL; // Expect ':' after key p++; // Skip ':'. // Seek value. jsonSkipWhiteSpaces(&p,end); if (p>=end) return NULL; // Expect value after ':' if (match) { // Found the matching key, p now points to the start of the value. return p; } else { // Key didn't match, skip the corresponding value. if (!jsonSkipValue(&p,end)) return NULL; // Syntax error. } // Look for comma or a closing brace. jsonSkipWhiteSpaces(&p,end); if (p>=end) return NULL; // Reached end after value. if (*p == ',') { p++; // Skip comma, continue loop to find next key. continue; } else if (*p == '}') { return NULL; // Reached end of object, field not found. } return NULL; // Malformed JSON (unexpected char after value). } } /* This is the only real API that this file conceptually exports (it is * inlined, actually). */ exprtoken *jsonExtractField(const char *json, size_t json_len, const char *field, size_t field_len) { const char *end = json + json_len; const char *valptr = jsonSeekField(json,end,field,field_len); if (!valptr) return NULL; /* Key found, valptr points to the start of the value. * Convert it into an expression token object. */ return jsonParseValueToken(&valptr,end); }