Add 'modules/vector-sets/' from commit 'c6db0a7c20ff5638f3a0c9ce9c106303daeb2f67'

git-subtree-dir: modules/vector-sets git-subtree-mainline: 8ea8f4220c git-subtree-split: c6db0a7c20
2026-02-03 20:39:54 -05:00 · 2025-04-02 16:34:28 +03:00 · 2025-04-02 16:34:28 +03:00 · 78e0d87177
commit 78e0d87177
parent 8ea8f4220c c6db0a7c20
41 changed files with 14928 additions and 0 deletions
--- a/modules/vector-sets/.gitignore
+++ b/modules/vector-sets/.gitignore
@ -0,0 +1,11 @@
+__pycache__
+misc
+*.so
+*.xo
+*.o
+.DS_Store
+w2v
+word2vec.bin
+TODO
+*.txt
+*.rdb
--- a/modules/vector-sets/LICENSE
+++ b/modules/vector-sets/LICENSE
@ -0,0 +1,2 @@
+This code is Copyright (c) 2024-Present, Redis Ltd.
+All Rights Reserved.
--- a/modules/vector-sets/Makefile
+++ b/modules/vector-sets/Makefile
@ -0,0 +1,84 @@
+# Compiler settings
+CC = cc
+
+ifdef SANITIZER
+ifeq ($(SANITIZER),address)
+	SAN=-fsanitize=address
+else
+ifeq ($(SANITIZER),undefined)
+	SAN=-fsanitize=undefined
+else
+ifeq ($(SANITIZER),thread)
+	SAN=-fsanitize=thread
+else
+    $(error "unknown sanitizer=${SANITIZER}")
+endif
+endif
+endif
+endif
+
+CFLAGS = -O2 -Wall -Wextra -g $(SAN) -std=c11
+LDFLAGS = -lm $(SAN)
+
+# Detect OS
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
+
+# Shared library compile flags for linux / osx
+ifeq ($(uname_S),Linux)
+	SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c11 -O2
+	SHOBJ_LDFLAGS ?= -shared
+ifneq (,$(findstring armv,$(uname_M)))
+	SHOBJ_LDFLAGS += -latomic
+endif
+ifneq (,$(findstring aarch64,$(uname_M)))
+	SHOBJ_LDFLAGS += -latomic
+endif
+else
+	SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c11 -O3
+	SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
+endif
+
+# OS X 11.x doesn't have /usr/lib/libSystem.dylib and needs an explicit setting.
+ifeq ($(uname_S),Darwin)
+ifeq ("$(wildcard /usr/lib/libSystem.dylib)","")
+LIBS = -L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lsystem
+endif
+endif
+
+.SUFFIXES: .c .so .xo .o
+
+all: vset.so
+
+.c.xo:
+	$(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
+
+vset.xo: redismodule.h expr.c
+
+vset.so: vset.xo hnsw.xo cJSON.xo
+	$(CC) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) $(SAN) -lc
+
+# Example sources / objects
+SRCS = hnsw.c w2v.c
+OBJS = $(SRCS:.c=.o)
+
+TARGET = w2v
+MODULE = vset.so
+
+# Default target
+all: $(TARGET) $(MODULE)
+
+# Example linking rule
+$(TARGET): $(OBJS)
+	$(CC) $(OBJS) $(LDFLAGS) -o $(TARGET)
+
+# Compilation rule for object files
+%.o: %.c
+	$(CC) $(CFLAGS) -c $< -o $@
+
+# Clean rule
+clean:
+	rm -f $(TARGET) $(OBJS) *.xo *.so
+
+# Declare phony targets
+.PHONY: all clean
--- a/modules/vector-sets/README.md
+++ b/modules/vector-sets/README.md
@ -0,0 +1,633 @@
+This module implements Vector Sets for Redis, a new Redis data type similar
+to Sorted Sets but having string elements associated to a vector instead of
+a score. The fundamental goal of Vector Sets is to make possible adding items,
+and later get a subset of the added items that are the most similar to a
+specified vector (often a learned embedding), or the most similar to the vector
+of an element that is already part of the Vector Set.
+
+Moreover, Vector sets implement optional filtered search capabilities: it is possible to associate attributes to all or to a subset of elements in the set, and then, using the `FILTER` option of the `VSIM` command, to ask for items similar to a given vector but also passing a filter specified as a simple mathematical expression (Like `".year > 1950"` or similar). This means that **you can have vector similarity and scalar filters at the same time**.
+
+## Installation
+
+Build with:
+
+    make
+
+Then load the module with the following command line, or by inserting the needed directives in the `redis.conf` file.
+
+    ./redis-server --loadmodule vset.so
+
+To run tests, I suggest using this:
+
+    ./redis-server --save "" --enable-debug-command yes
+
+The execute the tests with:
+
+    ./test.py
+
+## Reference of available commands
+
+**VADD: add items into a vector set**
+
+    VADD key [REDUCE dim] FP32|VALUES vector element [CAS] [NOQUANT | Q8 | BIN]
+             [EF build-exploration-factor] [SETATTR <attributes>] [M <numlinks>]
+
+Add a new element into the vector set specified by the key.
+The vector can be provided as FP32 blob of values, or as floating point
+numbers as strings, prefixed by the number of elements (3 in the example):
+
+    VADD mykey VALUES 3 0.1 1.2 0.5 my-element
+
+Meaning of the options:
+
+`REDUCE` implements random projection, in order to reduce the
+dimensionality of the vector. The projection matrix is saved and reloaded
+along with the vector set. **Please note that** the `REDUCE` option must be passed immediately before the vector, like in `REDUCE 50 VALUES ...`.
+
+`CAS` performs the operation partially using threads, in a
+check-and-set style. The neighbor candidates collection, which is slow, is
+performed in the background, while the command is executed in the main thread.
+
+`NOQUANT` forces the vector to be created (in the first VADD call to a given key) without integer 8 quantization, which is otherwise the default.
+
+`BIN` forces the vector to use binary quantization instead of int8. This is much faster and uses less memory, but has impacts on the recall quality.
+
+`Q8` forces the vector to use signed 8 bit quantization. This is the default, and the option only exists in order to make sure to check at insertion time if the vector set is of the same format.
+
+`EF` plays a role in the effort made to find good candidates when connecting the new node to the existing HNSW graph. The default is 200. Using a larger value, may help to have a better recall. To improve the recall it is also possible to increase `EF` during `VSIM` searches.
+
+`SETATTR` associates attributes to the newly created entry or update the entry attributes (if it already exists). It is the same as calling the `VSETATTR` attribute separately, so please check the documentation of that command in the filtered search section of this documentation.
+
+`M` defaults to 16 and is the HNSW famous `M` parameters. It is the maximum number of connections that each node of the graph have with other nodes: more connections mean more memory, but a better ability to explore the graph. Nodes at layer zero (every node exists at least at layer zero) have `M*2` connections, while the other layers only have `M` connections. This means that, for instance, an `M` of 64 will use at least 1024 bytes of memory for each node! That is, `64 links * 2 times * 8 bytes pointers`, and even more, since on average each node has something like 1.33 layers (but the other layers have just `M` connections, instead of `M*2`). If you don't have a recall quality problem, the default is fine, and uses a limited amount of memory.
+
+**VSIM: return elements by vector similarity**
+
+    VSIM key [ELE|FP32|VALUES] <vector or element> [WITHSCORES] [COUNT num] [EF search-exploration-factor] [FILTER expression] [FILTER-EF max-filtering-effort] [TRUTH] [NOTHREAD]
+
+The command returns similar vectors, for simplicity (and verbosity) in the following example, instead of providing a vector using FP32 or VALUES (like in `VADD`), we will ask for elements having a vector similar to a given element already in the sorted set:
+
+    > VSIM word_embeddings ELE apple
+     1) "apple"
+     2) "apples"
+     3) "pear"
+     4) "fruit"
+     5) "berry"
+     6) "pears"
+     7) "strawberry"
+     8) "peach"
+     9) "potato"
+    10) "grape"
+
+It is possible to specify a `COUNT` and also to get the similarity score (from 1 to 0, where 1 is identical, 0 is opposite vector) between the query and the returned items.
+
+    > VSIM word_embeddings ELE apple WITHSCORES COUNT 3
+    1) "apple"
+    2) "0.9998867657923256"
+    3) "apples"
+    4) "0.8598527610301971"
+    5) "pear"
+    6) "0.8226882219314575"
+
+The `EF` argument is the exploration factor: the higher it is, the slower the command becomes, but the better the index is explored to find nodes that are near to our query. Sensible values are from 50 to 1000.
+
+The `TRUTH` option forces the command to perform a linear scan of all the entries inside the set, without using the graph search inside the HNSW, so it returns the best matching elements (the perfect result set) that can be used in order to easily calculate the recall. Of course the linear scan is `O(N)`, so it is much slower than the `log(N)` (considering a small `COUNT`) provided by the HNSW index.
+
+The `NOTHREAD` option forces the command to execute the search on the data structure in the main thread. Normally `VSIM` spawns a thread instead. This may be useful for benchmarking purposes, or when we work with extremely small vector sets and don't want to pay the cost of spawning a thread. It is possible that in the future this option will be automatically used by Redis when we detect small vector sets. Note that this option blocks the server for all the time needed to complete the command, so it is a source of potential latency issues: if you are in doubt, never use it.
+
+For `FILTER` and `FILTER-EF` options, please check the filtered search section of this documentation.
+
+**VDIM: return the dimension of the vectors inside the vector set**
+
+    VDIM keyname
+
+Example:
+
+    > VDIM word_embeddings
+    (integer) 300
+
+Note that in the case of vectors that were populated using the `REDUCE`
+option, for random projection, the vector set will report the size of
+the projected (reduced) dimension. Yet the user should perform all the
+queries using full-size vectors.
+
+**VCARD: return the number of elements in a vector set**
+
+    VCARD key
+
+Example:
+
+    > VCARD word_embeddings
+    (integer) 3000000
+
+
+**VREM: remove elements from vector set**
+
+    VREM key element
+
+Example:
+
+    > VADD vset VALUES 3 1 0 1 bar
+    (integer) 1
+    > VREM vset bar
+    (integer) 1
+    > VREM vset bar
+    (integer) 0
+
+VREM does not perform thumstone / logical deletion, but will actually reclaim
+the memory from the vector set, so it is save to add and remove elements
+in a vector set in the context of long running applications that continuously
+update the same index.
+
+**VEMB: return the approximated vector of an element**
+
+    VEMB key element
+
+Example:
+
+    > VEMB word_embeddings SQL
+      1) "0.18208661675453186"
+      2) "0.08535309880971909"
+      3) "0.1365649551153183"
+      4) "-0.16501599550247192"
+      5) "0.14225517213344574"
+      ... 295 more elements ...
+
+Because vector sets perform insertion time normalization and optional
+quantization, the returned vector could be approximated. `VEMB` will take
+care to de-quantized and de-normalize the vector before returning it.
+
+It is possible to ask VEMB to return raw data, that is, the interal representation used by the vector: fp32, int8, or a bitmap for binary quantization. This behavior is triggered by the `RAW` option of of VEMB:
+
+    VEMB word_embedding apple RAW
+
+In this case the return value of the command is an array of three or more elements:
+1. The name of the quantization used, that is one of: "fp32", "bin", "q8".
+2. The a string blob containing the raw data, 4 bytes fp32 floats for fp32, a bitmap for binary quants, or int8 bytes array for q8 quants.
+3. A float representing the l2 of the vector before normalization. You need to multiply by this vector if you want to de-normalize the value for any reason.
+
+For q8 quantization, an additional elements is also returned: the quantization
+range, so the integers from -127 to 127 represent (normalized) components
+in the range `-range`, `+range`.
+
+**VLINKS: introspection command that shows neighbors for a node**
+
+    VLINKS key element [WITHSCORES]
+
+The command reports the neighbors for each level.
+
+**VINFO: introspection command that shows info about a vector set**
+
+    VINFO key
+
+Example:
+
+    > VINFO word_embeddings
+     1) quant-type
+     2) int8
+     3) vector-dim
+     4) (integer) 300
+     5) size
+     6) (integer) 3000000
+     7) max-level
+     8) (integer) 12
+     9) vset-uid
+    10) (integer) 1
+    11) hnsw-max-node-uid
+    12) (integer) 3000000
+
+**VSETATTR: associate or remove the JSON attributes of elements**
+
+    VSETATTR key element "{... json ...}"
+
+Each element of a vector set can be optionally associated with a JSON string
+in order to use the `FILTER` option of `VSIM` to filter elements by scalars
+(see the filtered search section for more information). This command can set,
+update (if already set) or delete (if you set to an empty string) the
+associated JSON attributes of an element.
+
+The command returns 0 if the element or the key don't exist, without
+raising an error, otherwise 1 is returned, and the element attributes
+are set or updated.
+
+**VGETATTR: retrieve the JSON attributes of elements**
+
+    VGETATTR key element
+
+The command returns the JSON attribute associated with an element, or
+null if there is no element associated, or no element at all, or no key.
+
+**VRANDMEMBER: return random members from a vector set**
+
+    VRANDMEMBER key [count]
+
+Return one or more random elements from a vector set.
+
+The semantics of this command are similar to Redis's native SRANDMEMBER command:
+
+- When called without count, returns a single random element from the set, as a single string (no array reply).
+- When called with a positive count, returns up to count distinct random elements (no duplicates).
+- When called with a negative count, returns count random elements, potentially with duplicates.
+- If the count value is larger than the set size (and positive), only the entire set is returned.
+
+If the key doesn't exist, returns a Null reply if count is not given, or an empty array if a count is provided.
+
+Examples:
+
+    > VADD vset VALUES 3 1 0 0 elem1
+    (integer) 1
+    > VADD vset VALUES 3 0 1 0 elem2
+    (integer) 1
+    > VADD vset VALUES 3 0 0 1 elem3
+    (integer) 1
+
+    # Return a single random element
+    > VRANDMEMBER vset
+    "elem2"
+
+    # Return 2 distinct random elements
+    > VRANDMEMBER vset 2
+    1) "elem1"
+    2) "elem3"
+
+    # Return 3 random elements with possible duplicates
+    > VRANDMEMBER vset -3
+    1) "elem2"
+    2) "elem2"
+    3) "elem1"
+
+    # Return more elements than in the set (returns all elements)
+    > VRANDMEMBER vset 10
+    1) "elem1"
+    2) "elem2"
+    3) "elem3"
+
+    # When key doesn't exist
+    > VRANDMEMBER nonexistent
+    (nil)
+    > VRANDMEMBER nonexistent 3
+    (empty array)
+
+This command is particularly useful for:
+
+1. Selecting random samples from a vector set for testing or training.
+2. Performance testing by retrieving random elements for subsequent similarity searches.
+
+When the user asks for unique elements (positev count) the implementation optimizes for two scenarios:
+- For small sample sizes (less than 20% of the set size), it uses a dictionary to avoid duplicates, and performs a real random walk inside the graph.
+- For large sample sizes (more than 20% of the set size), it starts from a random node and sequentially traverses the internal list, providing faster performances but not really "random" elements.
+
+The command has `O(N)` worst-case time complexity when requesting many unique elements (it uses linear scanning), or `O(M*log(N))` complexity when the users asks for `M` random elements in a sorted set of `N` elements, with `M` much smaller than `N`.
+
+# Filtered search
+
+Each element of the vector set can be associated with a set of attributes specified as a JSON blob:
+
+    > VADD vset VALUES 3 1 1 1 a SETATTR '{"year": 1950}'
+    (integer) 1
+    > VADD vset VALUES 3 -1 -1 -1 b SETATTR '{"year": 1951}'
+    (integer) 1
+
+Specifying an attribute with the `SETATTR` option of `VADD` is exactly equivalent to adding an element and then setting (or updating, if already set) the attributes JSON string. Also the symmetrical `VGETATTR` command returns the attribute associated to a given element.
+
+    > VADD vset VALUES 3 0 1 0 c
+    (integer) 1
+    > VSETATTR vset c '{"year": 1952}'
+    (integer) 1
+    > VGETATTR vset c
+    "{\"year\": 1952}"
+
+At this point, I may use the FILTER option of VSIM to only ask for the subset of elements that are verified by my expression:
+
+    > VSIM vset VALUES 3 0 0 0 FILTER '.year > 1950'
+    1) "c"
+    2) "b"
+
+The items will be returned again in order of similarity (most similar first), but only the items with the year field matching the expression is returned.
+
+The expressions are similar to what you would write inside the `if` statement of JavaScript or other familiar programming languages: you can use `and`, `or`, the obvious math operators like `+`, `-`, `/`, `>=`, `<`, ... and so forth (see the expressions section for more info). The selectors of the JSON object attributes start with a dot followed by the name of the key inside the JSON objects.
+
+Elements with invalid JSON or not having a given specified field **are considered as not matching** the expression, but will not generate any error at runtime.
+
+## FILTER expressions capabilities
+
+FILTER expressions allow you to perform complex filtering on vector similarity results using a JavaScript-like syntax. The expression is evaluated against each element's JSON attributes, with only elements that satisfy the expression being included in the results.
+
+### Expression Syntax
+
+Expressions support the following operators and capabilities:
+
+1. **Arithmetic operators**: `+`, `-`, `*`, `/`, `%` (modulo), `**` (exponentiation)
+2. **Comparison operators**: `>`, `>=`, `<`, `<=`, `==`, `!=`
+3. **Logical operators**: `and`/`&&`, `or`/`||`, `!`/`not`
+4. **Containment operator**: `in`
+5. **Parentheses** for grouping: `(...)`
+
+### Selector Notation
+
+Attributes are accessed using dot notation:
+
+- `.year` references the "year" attribute
+- `.movie.year` would **NOT** reference the "year" field inside a "movie" object, only keys that are at the first level of the JSON object are accessible.
+
+### JSON and expressions data types
+
+Expressions can work with:
+
+- Numbers (dobule precision floats)
+- Strings (enclosed in single or double quotes)
+- Booleans (no native type: they are represented as 1 for true, 0 for false)
+- Arrays (for use with the `in` operator: `value in [1, 2, 3]`)
+
+JSON attributes are converted in this way:
+
+- Numbers will be converted to numbers.
+- Strings to strings.
+- Booleans to 0 or 1 number.
+- Arrays to tuples (for "in" operator), but only if composed of just numbers and strings.
+
+Any other type is ignored, and accessig it will make the expression evaluate to false.
+
+### Examples
+
+```
+# Find items from the 1980s
+VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.year >= 1980 and .year < 1990'
+
+# Find action movies with high ratings
+VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.genre == "action" and .rating > 8.0'
+
+# Find movies directed by either Spielberg or Nolan
+VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.director in ["Spielberg", "Nolan"]'
+
+# Complex condition with numerical operations
+VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '(.year - 2000) ** 2 < 100 and .rating / 2 > 4'
+```
+
+### Error Handling
+
+Elements with any of the following conditions are considered not matching:
+- Missing the queried JSON attribute
+- Having invalid JSON in their attributes
+- Having a JSON value that cannot be converted to the expected type
+
+This behavior allows you to safely filter on optional attributes without generating errors.
+
+### FILTER effort
+
+The `FILTER-EF` option controls the maximum effort spent when filtering vector search results.
+
+When performing vector similarity search with filtering, Vector Sets perform the standard similarity search as they apply the filter expression to each node. Since many results might be filtered out, Vector Sets may need to examine a lot more candidates than the requested `COUNT` to ensure sufficient matching results are returned. Actually, if the elements matching the filter are very rare or if there are less than elements matching than the specified count, this would trigger a full scan of the HNSW graph.
+
+For this reason, by default, the maximum effort is limited to a reasonable amount of nodes explored.
+
+### Modifying the FILTER effort
+
+1. By default, Vector Sets will explore up to `COUNT * 100` candidates to find matching results.
+2. You can control this exploration with the `FILTER-EF` parameter.
+3. A higher `FILTER-EF` value increases the chances of finding all relevant matches at the cost of increased processing time.
+4. A `FILTER-EF` of zero will explore as many nodes as needed in order to actually return the number of elements specified by `COUNT`.
+5. Even when a high `FILTER-EF` value is specified **the implementation will do a lot less work** if the elements passing the filter are very common, because of the early stop conditions of the HNSW implementation (once the specified amount of elements is reached and the quality check of the other candidates trigger an early stop).
+
+```
+VSIM key [ELE|FP32|VALUES] <vector or element> COUNT 10 FILTER '.year > 2000' FILTER-EF 500
+```
+
+In this example, Vector Sets will examine up to 500 potential nodes. Of course if count is reached before exploring 500 nodes, and the quality checks show that it is not possible to make progresses on similarity, the search is ended sooner.
+
+### Performance Considerations
+
+- If you have highly selective filters (few items match), use a higher `FILTER-EF`, or just design your application in order to handle a result set that is smaller than the requested count. Note that anyway the additional elements may be too distant than the query vector.
+- For less selective filters, the default should be sufficient.
+- Very selective filters with low `FILTER-EF` values may return fewer items than requested.
+- Extremely high values may impact performance without significantly improving results.
+
+The optimal `FILTER-EF` value depends on:
+1. The selectivity of your filter.
+2. The distribution of your data.
+3. The required recall quality.
+
+A good practice is to start with the default and increase if needed when you observe fewer results than expected.
+
+### Testing a larg-ish data set
+
+To really see how things work at scale, you can [download](https://antirez.com/word2vec_with_attribs.rdb) the following dataset:
+
+    wget https://antirez.com/word2vec_with_attribs.rdb
+
+It contains the 3 million words in Word2Vec having as attribute a JSON with just the length of the word. Because of the length distribution of words in large amounts of texts, where longer words become less and less common, this is ideal to check how filtering behaves with a filter verifying as true with less and less elements in a vector set.
+
+For instance:
+
+    > VSIM word_embeddings_bin ele "pasta" FILTER ".len == 6"
+     1) "pastas"
+     2) "rotini"
+     3) "gnocci"
+     4) "panino"
+     5) "salads"
+     6) "breads"
+     7) "salame"
+     8) "sauces"
+     9) "cheese"
+    10) "fritti"
+
+This will easily retrieve the desired amount of items (`COUNT` is 10 by default) since there are many items of length 6. However:
+
+    > VSIM word_embeddings_bin ele "pasta" FILTER ".len == 33"
+    1) "skinless_boneless_chicken_breasts"
+    2) "boneless_skinless_chicken_breasts"
+    3) "Boneless_skinless_chicken_breasts"
+
+This time even if we asked for 10 items, we only get 3, since the default filter effort will be `10*100 = 1000`. We can tune this giving the effort in an explicit way, with the risk of our query being slower, of course:
+
+    > VSIM word_embeddings_bin ele "pasta" FILTER ".len == 33" FILTER-EF 10000
+     1) "skinless_boneless_chicken_breasts"
+     2) "boneless_skinless_chicken_breasts"
+     3) "Boneless_skinless_chicken_breasts"
+     4) "mozzarella_feta_provolone_cheddar"
+     5) "Greatfood.com_R_www.greatfood.com"
+     6) "Pepperidge_Farm_Goldfish_crackers"
+     7) "Prosecuted_Mobsters_Rebuilt_Dying"
+     8) "Crispy_Snacker_Sandwiches_Popcorn"
+     9) "risultati_delle_partite_disputate"
+    10) "Peppermint_Mocha_Twist_Gingersnap"
+
+This time we get all the ten items, even if the last one will be quite far from our query vector. We encourage to experiment with this test dataset in order to understand better the dynamics of the implementation and the natural tradeoffs of filtered search.
+
+**Keep in mind** that by default, Redis Vector Sets will try to avoid a likely very useless huge scan of the HNSW graph, and will be more happy to return few or no elements at all, since this is almost always what the user actually wants in the context of retrieving *similar* items to the query.
+
+# Single Instance Scalability and Latency
+
+Vector Sets implement a threading model that allows Redis to handle many concurrent requests: by default `VSIM` is always threaded, and `VADD` is not (but can be partially threaded using the `CAS` option). This section explains how the threading and locking mechanisms work, and what to expect in terms of performance.
+
+## Threading Model
+
+- The `VSIM` command runs in a separate thread by default, allowing Redis to continue serving other commands.
+- A maximum of 32 threads can run concurrently (defined by `HNSW_MAX_THREADS`).
+- When this limit is reached, additional `VSIM` requests are queued - Redis remains responsive, no latency event is generated.
+- The `VADD` command with the `CAS` option also leverages threading for the computation-heavy candidate search phase, but the insertion itself is performed in the main thread. `VADD` always runs in a sub-millisecond time, so this is not a source of latency, but having too many hundreds of writes per second can be challenging to handle with a single instance. Please, look at the next section about multiple instances scalability.
+- Commands run within Lua scripts, MULTI/EXEC blocks, or from replication are executed in the main thread to ensure consistency.
+
+```
+> VSIM vset VALUES 3 1 1 1 FILTER '.year > 2000'  # This runs in a thread.
+> VADD vset VALUES 3 1 1 1 element CAS            # Candidate search runs in a thread.
+```
+
+## Locking Mechanism
+
+Vector Sets use a read/write locking mechanism to coordinate access:
+
+- Reads (`VSIM`, `VEMB`, etc.) acquire a read lock, allowing multiple concurrent reads.
+- Writes (`VADD`, `VREM`, etc.) acquire a write lock, temporarily blocking all reads.
+- When a write lock is requested while reads are in progress, the write operation waits for all reads to complete.
+- Once a write lock is granted, all reads are blocked until the write completes.
+- Each thread has a dedicated slot for tracking visited nodes during graph traversal, avoiding contention. This improves performances but limits the maximum number of concurrent threads, since each node has a memory cost proportional to the number of slots.
+
+## DEL latency
+
+Deleting a very large vector set (millions of elements) can cause latency spikes, as deletion rebuilds connections between nodes. This may change in the future.
+The deletion latency is most noticeable when using `DEL` on a key containing a large vector set or when the key expires.
+
+## Performance Characteristics
+
+- Search operations (`VSIM`) scale almost linearly with the number of CPU cores available, up to the thread limit. You can expect a Vector Set composed of million of items associated with components of dimension 300, with the default int8 quantization, to deliver around 50k VSIM operations per second in a single host.
+- Insertion operations (`VADD`) are more computationally expensive than searches, and can't be threaded: expect much lower throughput, in the range of a few thousands inserts per second.
+- Binary quantization offers significantly faster search performance at the cost of some recall quality, while int8 quantization, the default, seems to have very small impacts on recall quality, while it significantly improves performances and space efficiency.
+- The `EF` parameter has a major impact on both search quality and performance - higher values mean better recall but slower searches.
+- Graph traversal time scales logarithmically with the number of elements, making Vector Sets efficient even with millions of vectors
+
+## Loading / Saving performances
+
+Vector Sets are able to serialize on disk the graph structure as it is in memory, so loading back the data does not need to rebuild the HNSW graph. This means that Redis can load millions of items per minute. For instance 3 million items with 300 components vectors can be loaded back into memory into around 15 seconds.
+
+# Scaling vector sets to multiple instances
+
+The fundamental way vector sets can be scaled to very large data sets
+and to many Redis instances is that a given very large set of vectors
+can be partitioned into N different Redis keys, that can also live into
+different Redis instances.
+
+For instance, I could add my elements into `key0`, `key1`, `key2`, by hashing
+the item in some way, like doing `crc32(item)%3`, effectively splitting
+the dataset into three different parts. However once I want all the vectors
+of my dataset near to a given query vector, I could simply perform the
+`VSIM` command against all the three keys, merging the results by
+score (so the commands must be called using the `WITHSCORES` option) on
+the client side: once the union of the results are ordered by the
+similarity score, the query is equivalent to having a single key `key1+2+3`
+containing all the items.
+
+There are a few interesting facts to note about this pattern:
+
+1. It is possible to have a logical sorted set that is as big as the sum of all the Redis instances we are using.
+2. Deletion operations remain simple, we can hash the key and select the key where our item belongs.
+3. However, even if I use 10 different Redis instances, I'm not going to reach 10x the **read** operations per second, compared to using a single server: for each logical query, I need to query all the instances. Yet, smaller graphs are faster to navigate, so there is some win even from the point of view of CPU usage.
+4. Insertions, so **write** queries, will be scaled linearly: I can add N items against N instances at the same time, splitting the insertion load evenly. This is very important since vector sets, being based on HNSW data structures, are slower to add items than to query similar items, by a very big factor.
+5. While it cannot guarantee always the best results, with proper timeout management this system may be considered *highly available*, since if a subset of N instances are reachable, I'll be still be able to return similar items to my query vector.
+
+Notably, this pattern can be implemented in a way that avoids paying the sum of the round trip time with all the servers: it is possible to send the queries at the same time to all the instances, so that latency will be equal the slower reply out of of the N servers queries.
+
+# Optimizing memory usage
+
+Vector Sets, or better, HNSWs, the underlying data structure used by Vector Sets, combined with the features provided by the Vector Sets themselves (quantization, random projection, filtering, ...) form an implementation that has a non-trivial space of parameters that can be tuned. Despite to the complexity of the implementation and of vector similarity problems, here there is a list of simple ideas that can drive the user to pick the best settings:
+
+* 8 bit quantization (the default) is almost always a win. It reduces the memory usage of vectors by a factor of 4, yet the performance penality in terms of recall is minimal. It also reduces insertion and search time by around 2 times or more.
+* Binary quantization is much more extreme: it makes vector sets a lot faster, but increases the recall error in a sensible way, for instance from 95% to 80% if all the parameters remain the same. Yet, the speedup is really big, and the memory usage of vectors, compaerd to full precision vectors, 32 times smaller.
+* Vectors memory usage are not the only responsible for Vector Set high memory usage per entry: nodes contain, on average `M*2 + M*0.33` pointers, where M is by default 16 (but can be tuned in `VADD`, see the `M` option). Also each node has the string item and the optional JSON attributes: those should be as small as possible in order to avoid contributing more to the memory usage.
+* The `M` parameter should be incresed to 32 or more only when a near perfect recall is really needed.
+* It is possible to gain space (less memory usage) sacrificing time (more CPU time) by using a low `M` (the default of 16, for instance) and a high `EF` (the effort parameter of `VSIM`) in order to scan the graph more deeply.
+* When memory usage is seriosu concern, and there is the suspect the vectors we are storing don't contain as much information - at least for our use case - to justify the number of components they feature, random projection (the `REDUCE` option of `VADD`) could be tested to see if dimensionality reduction is possible with acceptable precision loss.
+
+## Random projection tradeoffs
+
+Sometimes learned vectors are not as information dense as we could guess, that
+is there are components having similar meanings in the space, and components
+having values that don't really represent features that matter in our use case.
+
+At the same time, certain vectors are very big, 1024 components or more. In this cases, it is possible to use the random projection feature of Redis Vector Sets in order to reduce both space (less RAM used) and space (more operstions per second). The feature is accessible via the `REDUCE` option of the `VADD` command. However, keep in mind that you need to test how much reduction impacts the performances of your vectors in term of recall and quality of the results you get back.
+
+## What is a random projection?
+
+The concept of Random Projection is relatively simple to grasp. For instance, a projection that turns a 100 components vector into a 10 components vector will perform a different linear transformation between the 100 components and each of the target 10 components. Please note that *each of the target components* will get some random amount of all the 100 original components. It is mathematically proved that this process results in a vector space where elements still have similar distances among them, but still some information will get lost.
+
+## Examples of projections and loss of precision
+
+To show you a bit of a extreme case, let's take Word2Vec 3 million items and compress them from 300 to 100, 50 and 25 components vectors. Then, we check the recall compared to the ground truth against each of the vector sets produced in this way (using different `REDUCE` parameters of `VADD`). This is the result, obtained asking for the top 10 elements.
+
+```
+----------------------------------------------------------------------
+Key                            Average Recall % Std Dev
+----------------------------------------------------------------------
+word_embeddings_int8           95.98           12.14
+  ^ This is the same key used for ground truth, but without TRUTH option
+word_embeddings_reduced_100    40.20           20.13
+word_embeddings_reduced_50     24.42           16.89
+word_embeddings_reduced_25     14.31           9.99
+```
+
+Here the dimensionality reduction we are using is quite extreme: from 300 to 100 means that 66.6% of the original information is lost. The recall drops from 96% to 40%, down to 24% and 14% for even more extreme dimension reduction.
+
+Reducing the dimension of vectors that are already relatively small, like the above example, of 300 components, will provide only relatively small memory savings, especially because by default Vector Sets use `int8` quantization, that will use only one byte per component:
+
+```
+> MEMORY USAGE word_embeddings_int8
+(integer) 3107002888
+> MEMORY USAGE word_embeddings_reduced_100
+(integer) 2507122888
+```
+
+Of course going, for example, from 2048 component vectors to 1024 would provide a much more sensible memory saving, even with the `int8` quantization used by Vector Sets, assuming the recall loss is acceptable. Other than the memory saving, there is also the reduction in CPU time, translating to more operations per second.
+
+Another thing to note is that, with certain embedding models, binary quantization (that offers a 8x reduction of memory usage compared to 8 bit quants, and a very big speedup in computation) performs much better than reducing the dimension of vectors of the same amount via random projections:
+
+```
+word_embeddings_bin            35.48           19.78
+```
+
+Here in the same test did above: we have a 35% recall which is not too far than the 40% obtained with a random projection from 300 to 100 components. However, while the first technique reduces the size by 3 times, the size reduced of binary quantization is by 8 times.
+
+```
+> memory usage word_embeddings_bin
+(integer) 2327002888
+```
+
+In this specific case the key uses JSON attributes and has a graph connection overhead that is much bigger than the 300 bits each vector takes, but, as already said, for big vectors (1024 components, for instance) or for lower values of `M` (see `VADD`, the `M` parameter connects the level of connectivity, so it changes the amount of pointers used per node) the memory saving is much stronger.
+
+# Vector Sets troubleshooting and understandability
+
+## Debugging poor recall or unexpected results
+
+Vector graphs and similarity queries pose many challenges mainly due to the following three problems:
+
+1. The error due to the approximated nature of Vector Sets is hard to evaluate.
+2. The error added by the quantization is often depends on the exact vector space (the embedding we are using **and** how far apart the elements we represent into such embeddings are).
+3. We live in the illusion that learned embeddings capture the best similarity possible among elements, which is obviously not always true, and highly application dependent.
+
+The only way to debug such problems, is the ability to inspect step by step what is happening inside our application, and the structure of the HNSW graph itself. To do so, we suggest to consider the following tools:
+
+1. The `TRUTH` option of the `VSIM` command is able to return the ground truth of the most similar elements, without using the HNSW graph, but doing a linear scan.
+2. The `VLINKS` command allows to explore the graph to see if the connections among nodes make sense, and to investigate why a given node may be more isolated than expected. Such command can also be used in a different way, when we want very fast "similar items" without paying the HNSW traversal time. It exploits the fact that we have a direct reference from each element in our vector set to each node in our HNSW graph.
+3. The `WITHSCORES` option, in the supported commands, return a value that is directly related to the *cosine similarity* between the query and the items vectors, the interval of the similarity is simply rescaled from the -1, 1 original range to 0, 1, otherwise the metric is identical.
+
+## Clients, latency and bandwidth usage
+
+During Vector Sets testing, we discovered that often clients introduce considerable latecy and CPU usage (in the client side, not in Redis) for two main reasons:
+
+1. Often the serialization to `VALUES ... list of floats ...` can be very slow.
+2. The vector payload of floats represented as strings is very large, resulting in high bandwidth usage and latency, compared to other Redis commands.
+
+Switching from `VALUES` to `FP32` as a method for transmitting vectors may easily provide 10-20x speedups.
+
+# Known bugs
+
+* Replication code is pretty much untested, and very vanilla (replicating the commands verbatim).
+
+# Implementation details
+
+Vector sets are based on the `hnsw.c` implementation of the HNSW data structure with extensions for speed and functionality.
+
+The main features are:
+
+* Proper nodes deletion with relinking.
+* 8 bits and binary quantization.
+* Threaded queries.
+* Filtered search with predicate callback.
--- a/modules/vector-sets/cJSON.c
+++ b/modules/vector-sets/cJSON.c
--- a/modules/vector-sets/cJSON.h
+++ b/modules/vector-sets/cJSON.h
@ -0,0 +1,306 @@
+/*
+  Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#ifndef cJSON__h
+#define cJSON__h
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32))
+#define __WINDOWS__
+#endif
+
+#ifdef __WINDOWS__
+
+/* When compiling for windows, we specify a specific calling convention to avoid issues where we are being called from a project with a different default calling convention.  For windows you have 3 define options:
+
+CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols
+CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default)
+CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol
+
+For *nix builds that support visibility attribute, you can define similar behavior by
+
+setting default visibility to hidden by adding
+-fvisibility=hidden (for gcc)
+or
+-xldscope=hidden (for sun cc)
+to CFLAGS
+
+then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does
+
+*/
+
+#define CJSON_CDECL __cdecl
+#define CJSON_STDCALL __stdcall
+
+/* export symbols by default, this is necessary for copy pasting the C and header file */
+#if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS)
+#define CJSON_EXPORT_SYMBOLS
+#endif
+
+#if defined(CJSON_HIDE_SYMBOLS)
+#define CJSON_PUBLIC(type)   type CJSON_STDCALL
+#elif defined(CJSON_EXPORT_SYMBOLS)
+#define CJSON_PUBLIC(type)   __declspec(dllexport) type CJSON_STDCALL
+#elif defined(CJSON_IMPORT_SYMBOLS)
+#define CJSON_PUBLIC(type)   __declspec(dllimport) type CJSON_STDCALL
+#endif
+#else /* !__WINDOWS__ */
+#define CJSON_CDECL
+#define CJSON_STDCALL
+
+#if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined (__SUNPRO_C)) && defined(CJSON_API_VISIBILITY)
+#define CJSON_PUBLIC(type)   __attribute__((visibility("default"))) type
+#else
+#define CJSON_PUBLIC(type) type
+#endif
+#endif
+
+/* project version */
+#define CJSON_VERSION_MAJOR 1
+#define CJSON_VERSION_MINOR 7
+#define CJSON_VERSION_PATCH 18
+
+#include <stddef.h>
+
+/* cJSON Types: */
+#define cJSON_Invalid (0)
+#define cJSON_False  (1 << 0)
+#define cJSON_True   (1 << 1)
+#define cJSON_NULL   (1 << 2)
+#define cJSON_Number (1 << 3)
+#define cJSON_String (1 << 4)
+#define cJSON_Array  (1 << 5)
+#define cJSON_Object (1 << 6)
+#define cJSON_Raw    (1 << 7) /* raw json */
+
+#define cJSON_IsReference 256
+#define cJSON_StringIsConst 512
+
+/* The cJSON structure: */
+typedef struct cJSON
+{
+    /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */
+    struct cJSON *next;
+    struct cJSON *prev;
+    /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
+    struct cJSON *child;
+
+    /* The type of the item, as above. */
+    int type;
+
+    /* The item's string, if type==cJSON_String  and type == cJSON_Raw */
+    char *valuestring;
+    /* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */
+    int valueint;
+    /* The item's number, if type==cJSON_Number */
+    double valuedouble;
+
+    /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */
+    char *string;
+} cJSON;
+
+typedef struct cJSON_Hooks
+{
+      /* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */
+      void *(CJSON_CDECL *malloc_fn)(size_t sz);
+      void (CJSON_CDECL *free_fn)(void *ptr);
+} cJSON_Hooks;
+
+typedef int cJSON_bool;
+
+/* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them.
+ * This is to prevent stack overflows. */
+#ifndef CJSON_NESTING_LIMIT
+#define CJSON_NESTING_LIMIT 1000
+#endif
+
+/* Limits the length of circular references can be before cJSON rejects to parse them.
+ * This is to prevent stack overflows. */
+#ifndef CJSON_CIRCULAR_LIMIT
+#define CJSON_CIRCULAR_LIMIT 10000
+#endif
+
+/* returns the version of cJSON as a string */
+CJSON_PUBLIC(const char*) cJSON_Version(void);
+
+/* Supply malloc, realloc and free functions to cJSON */
+CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks);
+
+/* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */
+/* Supply a block of JSON, and this returns a cJSON object you can interrogate. */
+CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value);
+CJSON_PUBLIC(cJSON *) cJSON_ParseWithLength(const char *value, size_t buffer_length);
+/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */
+/* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to the error so will match cJSON_GetErrorPtr(). */
+CJSON_PUBLIC(cJSON *) cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated);
+CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, cJSON_bool require_null_terminated);
+
+/* Render a cJSON entity to text for transfer/storage. */
+CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item);
+/* Render a cJSON entity to text for transfer/storage without any formatting. */
+CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item);
+/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */
+CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt);
+/* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on success and 0 on failure. */
+/* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 bytes more than you actually need */
+CJSON_PUBLIC(cJSON_bool) cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format);
+/* Delete a cJSON entity and all subentities. */
+CJSON_PUBLIC(void) cJSON_Delete(cJSON *item);
+
+/* Returns the number of items in an array (or object). */
+CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array);
+/* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */
+CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index);
+/* Get item "string" from object. Case insensitive. */
+CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON * const object, const char * const string);
+CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON * const object, const char * const string);
+CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string);
+/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
+CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void);
+
+/* Check item type and return its value */
+CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item);
+CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item);
+
+/* These functions check the type of an item */
+CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON * const item);
+CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item);
+
+/* These calls create a cJSON item of the appropriate type. */
+CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void);
+CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void);
+CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void);
+CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean);
+CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num);
+CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string);
+/* raw json */
+CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw);
+CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void);
+CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void);
+
+/* Create a string where valuestring references a string so
+ * it will not be freed by cJSON_Delete */
+CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string);
+/* Create an object/array that only references it's elements so
+ * they will not be freed by cJSON_Delete */
+CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child);
+CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child);
+
+/* These utilities create an Array of count items.
+ * The parameter count cannot be greater than the number of elements in the number array, otherwise array access will be out of bounds.*/
+CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count);
+CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count);
+CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count);
+CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char *const *strings, int count);
+
+/* Append item to the specified array/object. */
+CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item);
+CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item);
+/* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the cJSON object.
+ * WARNING: When this function was used, make sure to always check that (item->type & cJSON_StringIsConst) is zero before
+ * writing to `item->string` */
+CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item);
+/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */
+CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
+CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item);
+
+/* Remove/Detach items from Arrays/Objects. */
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON * const item);
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which);
+CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which);
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string);
+CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string);
+CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string);
+CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string);
+
+/* Update array items. */
+CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem); /* Shifts pre-existing items to the right. */
+CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement);
+CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem);
+CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem);
+CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object,const char *string,cJSON *newitem);
+
+/* Duplicate a cJSON item */
+CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse);
+/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will
+ * need to be released. With recurse!=0, it will duplicate any children connected to the item.
+ * The item->next and ->prev pointers are always zero on return from Duplicate. */
+/* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be considered unequal.
+ * case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */
+CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive);
+
+/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from strings.
+ * The input pointer json cannot point to a read-only address area, such as a string constant, 
+ * but should point to a readable and writable address area. */
+CJSON_PUBLIC(void) cJSON_Minify(char *json);
+
+/* Helper functions for creating and adding items to an object at the same time.
+ * They return the added item or NULL on failure. */
+CJSON_PUBLIC(cJSON*) cJSON_AddNullToObject(cJSON * const object, const char * const name);
+CJSON_PUBLIC(cJSON*) cJSON_AddTrueToObject(cJSON * const object, const char * const name);
+CJSON_PUBLIC(cJSON*) cJSON_AddFalseToObject(cJSON * const object, const char * const name);
+CJSON_PUBLIC(cJSON*) cJSON_AddBoolToObject(cJSON * const object, const char * const name, const cJSON_bool boolean);
+CJSON_PUBLIC(cJSON*) cJSON_AddNumberToObject(cJSON * const object, const char * const name, const double number);
+CJSON_PUBLIC(cJSON*) cJSON_AddStringToObject(cJSON * const object, const char * const name, const char * const string);
+CJSON_PUBLIC(cJSON*) cJSON_AddRawToObject(cJSON * const object, const char * const name, const char * const raw);
+CJSON_PUBLIC(cJSON*) cJSON_AddObjectToObject(cJSON * const object, const char * const name);
+CJSON_PUBLIC(cJSON*) cJSON_AddArrayToObject(cJSON * const object, const char * const name);
+
+/* When assigning an integer value, it needs to be propagated to valuedouble too. */
+#define cJSON_SetIntValue(object, number) ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number))
+/* helper for the cJSON_SetNumberValue macro */
+CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
+#define cJSON_SetNumberValue(object, number) ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number))
+/* Change the valuestring of a cJSON_String object, only takes effect when type of object is cJSON_String */
+CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring);
+
+/* If the object is not a boolean type this does nothing and returns cJSON_Invalid else it returns the new type*/
+#define cJSON_SetBoolValue(object, boolValue) ( \
+    (object != NULL && ((object)->type & (cJSON_False|cJSON_True))) ? \
+    (object)->type=((object)->type &(~(cJSON_False|cJSON_True)))|((boolValue)?cJSON_True:cJSON_False) : \
+    cJSON_Invalid\
+)
+
+/* Macro for iterating over an array or object */
+#define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next)
+
+/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */
+CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
+CJSON_PUBLIC(void) cJSON_free(void *object);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/modules/vector-sets/examples/cli-tool/.gitignore
+++ b/modules/vector-sets/examples/cli-tool/.gitignore
@ -0,0 +1 @@
+venv
--- a/modules/vector-sets/examples/cli-tool/README.md
+++ b/modules/vector-sets/examples/cli-tool/README.md
@ -0,0 +1,44 @@
+This tool is similar to redis-cli (but very basic) but allows
+to specify arguments that are expanded as vectors by calling
+ollama to get the embedding.
+
+Whatever is passed as !"foo bar" gets expanded into
+    VALUES ... embedding ...
+
+You must have ollama running with the mxbai-emb-large model
+already installed for this to work.
+
+Example:
+
+    redis> KEYS *
+    1) food_items
+    2) glove_embeddings_bin
+    3) many_movies_mxbai-embed-large_BIN
+    4) many_movies_mxbai-embed-large_NOQUANT
+    5) word_embeddings
+    6) word_embeddings_bin
+    7) glove_embeddings_fp32
+
+    redis> VSIM food_items !"drinks with fruit"
+    1) (Fruit)Juices,Lemonade,100ml,50 cal,210 kJ
+    2) (Fruit)Juices,Limeade,100ml,128 cal,538 kJ
+    3) CannedFruit,Canned Fruit Cocktail,100g,81 cal,340 kJ
+    4) (Fruit)Juices,Energy-Drink,100ml,87 cal,365 kJ
+    5) Fruits,Lime,100g,30 cal,126 kJ
+    6) (Fruit)Juices,Coconut Water,100ml,19 cal,80 kJ
+    7) Fruits,Lemon,100g,29 cal,122 kJ
+    8) (Fruit)Juices,Clamato,100ml,60 cal,252 kJ
+    9) Fruits,Fruit salad,100g,50 cal,210 kJ
+    10) (Fruit)Juices,Capri-Sun,100ml,41 cal,172 kJ
+
+    redis> vsim food_items !"barilla"
+    1) Pasta&Noodles,Spirelli,100g,367 cal,1541 kJ
+    2) Pasta&Noodles,Farfalle,100g,358 cal,1504 kJ
+    3) Pasta&Noodles,Capellini,100g,353 cal,1483 kJ
+    4) Pasta&Noodles,Spaetzle,100g,368 cal,1546 kJ
+    5) Pasta&Noodles,Cappelletti,100g,164 cal,689 kJ
+    6) Pasta&Noodles,Penne,100g,351 cal,1474 kJ
+    7) Pasta&Noodles,Shells,100g,353 cal,1483 kJ
+    8) Pasta&Noodles,Linguine,100g,357 cal,1499 kJ
+    9) Pasta&Noodles,Rotini,100g,353 cal,1483 kJ
+    10) Pasta&Noodles,Rigatoni,100g,353 cal,1483 kJ
--- a/modules/vector-sets/examples/cli-tool/cli.py
+++ b/modules/vector-sets/examples/cli-tool/cli.py
@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+import redis
+import requests
+import re
+import shlex
+from prompt_toolkit import PromptSession
+from prompt_toolkit.history import InMemoryHistory
+
+def get_embedding(text):
+    """Get embedding from local Ollama API"""
+    url = "http://localhost:11434/api/embeddings"
+    payload = {
+        "model": "mxbai-embed-large",
+        "prompt": text
+    }
+    try:
+        response = requests.post(url, json=payload)
+        response.raise_for_status()
+        return response.json()['embedding']
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Failed to get embedding: {str(e)}")
+
+def process_embedding_patterns(text):
+    """Process !"text" and !!"text" patterns in the command"""
+
+    def replace_with_embedding(match):
+        text = match.group(1)
+        embedding = get_embedding(text)
+        return f"VALUES {len(embedding)} {' '.join(map(str, embedding))}"
+
+    def replace_with_embedding_and_text(match):
+        text = match.group(1)
+        embedding = get_embedding(text)
+        # Return both the embedding values and the original text as next argument
+        return f'VALUES {len(embedding)} {" ".join(map(str, embedding))} "{text}"'
+
+    # First handle !!"text" pattern (must be done before !"text")
+    text = re.sub(r'!!"([^"]*)"', replace_with_embedding_and_text, text)
+    # Then handle !"text" pattern
+    text = re.sub(r'!"([^"]*)"', replace_with_embedding, text)
+    return text
+
+def parse_command(command):
+    """Parse command respecting quoted strings"""
+    try:
+        # Use shlex to properly handle quoted strings
+        return shlex.split(command)
+    except ValueError as e:
+        raise Exception(f"Invalid command syntax: {str(e)}")
+
+def format_response(response):
+    """Format the response to match Redis protocol style"""
+    if response is None:
+        return "(nil)"
+    elif isinstance(response, bool):
+        return "+OK" if response else "(error) Operation failed"
+    elif isinstance(response, (list, set)):
+        if not response:
+            return "(empty list or set)"
+        return "\n".join(f"{i+1}) {item}" for i, item in enumerate(response))
+    elif isinstance(response, int):
+        return f"(integer) {response}"
+    else:
+        return str(response)
+
+def main():
+    # Default connection to localhost:6379
+    r = redis.Redis(host='localhost', port=6379, decode_responses=True)
+
+    try:
+        # Test connection
+        r.ping()
+        print("Connected to Redis. Type your commands (CTRL+D to exit):")
+        print("Special syntax:")
+        print("  !\"text\"  - Replace with embedding")
+        print("  !!\"text\" - Replace with embedding and append text as value")
+        print("  \"text\"   - Quote strings containing spaces")
+    except redis.ConnectionError:
+        print("Error: Could not connect to Redis server")
+        return
+
+    # Setup prompt session with history
+    session = PromptSession(history=InMemoryHistory())
+
+    # Main loop
+    while True:
+        try:
+            # Read input with line editing support
+            command = session.prompt("redis> ")
+
+            # Skip empty commands
+            if not command.strip():
+                continue
+
+            # Process any embedding patterns before parsing
+            try:
+                processed_command = process_embedding_patterns(command)
+            except Exception as e:
+                print(f"(error) Embedding processing failed: {str(e)}")
+                continue
+
+            # Parse the command respecting quoted strings
+            try:
+                parts = parse_command(processed_command)
+            except Exception as e:
+                print(f"(error) {str(e)}")
+                continue
+
+            if not parts:
+                continue
+
+            cmd = parts[0].lower()
+            args = parts[1:]
+
+            # Execute command
+            try:
+                method = getattr(r, cmd, None)
+                if method is not None:
+                    result = method(*args)
+                else:
+                    # Use execute_command for unknown commands
+                    result = r.execute_command(cmd, *args)
+                print(format_response(result))
+            except AttributeError:
+                print(f"(error) Unknown command '{cmd}'")
+
+        except EOFError:
+            print("\nGoodbye!")
+            break
+        except KeyboardInterrupt:
+            continue  # Allow Ctrl+C to clear current line
+        except redis.RedisError as e:
+            print(f"(error) {str(e)}")
+        except Exception as e:
+            print(f"(error) {str(e)}")
+
+if __name__ == "__main__":
+    main()
--- a/modules/vector-sets/examples/glove-100/README
+++ b/modules/vector-sets/examples/glove-100/README
@ -0,0 +1,3 @@
+wget http://ann-benchmarks.com/glove-100-angular.hdf5
+python insert.py
+python recall.py (use --k <count> optionally, default top-10)
--- a/modules/vector-sets/examples/glove-100/insert.py
+++ b/modules/vector-sets/examples/glove-100/insert.py
@ -0,0 +1,47 @@
+import h5py
+import redis
+from tqdm import tqdm
+
+# Initialize Redis connection
+redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
+
+def add_to_redis(index, embedding):
+    """Add embedding to Redis using VADD command"""
+    args = ["VADD", "glove_embeddings", "VALUES", "100"]  # 100 is vector dimension
+    args.extend(map(str, embedding))
+    args.append(f"{index}")  # Using index as identifier since we don't have words
+    args.append("EF")
+    args.append("200")
+    # args.append("NOQUANT")
+    # args.append("BIN")
+    redis_client.execute_command(*args)
+
+def main():
+    with h5py.File('glove-100-angular.hdf5', 'r') as f:
+        # Get the train dataset
+        train_vectors = f['train']
+        total_vectors = train_vectors.shape[0]
+
+        print(f"Starting to process {total_vectors} vectors...")
+
+        # Process in batches to avoid memory issues
+        batch_size = 1000
+
+        for i in tqdm(range(0, total_vectors, batch_size)):
+            batch_end = min(i + batch_size, total_vectors)
+            batch = train_vectors[i:batch_end]
+
+            for j, vector in enumerate(batch):
+                try:
+                    current_index = i + j
+                    add_to_redis(current_index, vector)
+
+                except Exception as e:
+                    print(f"Error processing vector {current_index}: {str(e)}")
+                    continue
+
+            if (i + batch_size) % 10000 == 0:
+                print(f"Processed {i + batch_size} vectors")
+
+if __name__ == "__main__":
+    main()
--- a/modules/vector-sets/examples/glove-100/recall.py
+++ b/modules/vector-sets/examples/glove-100/recall.py
@ -0,0 +1,78 @@
+import h5py
+import redis
+import numpy as np
+from tqdm import tqdm
+import argparse
+
+# Initialize Redis connection
+redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
+
+def get_redis_neighbors(query_vector, k):
+    """Get nearest neighbors using Redis VSIM command"""
+    args = ["VSIM", "glove_embeddings_bin", "VALUES", "100"]
+    args.extend(map(str, query_vector))
+    args.extend(["COUNT", str(k)])
+    args.extend(["EF", 100])
+    if False:
+        print(args)
+        exit(1)
+    results = redis_client.execute_command(*args)
+    return [int(res) for res in results]
+
+def calculate_recall(ground_truth, predicted, k):
+    """Calculate recall@k"""
+    relevant = set(ground_truth[:k])
+    retrieved = set(predicted[:k])
+    return len(relevant.intersection(retrieved)) / len(relevant)
+
+def main():
+    parser = argparse.ArgumentParser(description='Evaluate Redis VSIM recall')
+    parser.add_argument('--k', type=int, default=10, help='Number of neighbors to evaluate (default: 10)')
+    parser.add_argument('--batch', type=int, default=100, help='Progress update frequency (default: 100)')
+    args = parser.parse_args()
+
+    k = args.k
+    batch_size = args.batch
+
+    with h5py.File('glove-100-angular.hdf5', 'r') as f:
+        test_vectors = f['test'][:]
+        ground_truth_neighbors = f['neighbors'][:]
+        
+        num_queries = len(test_vectors)
+        recalls = []
+        
+        print(f"Evaluating recall@{k} for {num_queries} test queries...")
+        
+        for i in tqdm(range(num_queries)):
+            try:
+                # Get Redis results
+                redis_neighbors = get_redis_neighbors(test_vectors[i], k)
+                
+                # Get ground truth for this query
+                true_neighbors = ground_truth_neighbors[i]
+                
+                # Calculate recall
+                recall = calculate_recall(true_neighbors, redis_neighbors, k)
+                recalls.append(recall)
+                
+                if (i + 1) % batch_size == 0:
+                    current_avg_recall = np.mean(recalls)
+                    print(f"Current average recall@{k} after {i+1} queries: {current_avg_recall:.4f}")
+                
+            except Exception as e:
+                print(f"Error processing query {i}: {str(e)}")
+                continue
+        
+        final_recall = np.mean(recalls)
+        print("\nFinal Results:")
+        print(f"Average recall@{k}: {final_recall:.4f}")
+        print(f"Total queries evaluated: {len(recalls)}")
+        
+        # Save detailed results
+        with open(f'recall_evaluation_results_k{k}.txt', 'w') as f:
+            f.write(f"Average recall@{k}: {final_recall:.4f}\n")
+            f.write(f"Total queries evaluated: {len(recalls)}\n")
+            f.write(f"Individual query recalls: {recalls}\n")
+
+if __name__ == "__main__":
+    main()
--- a/modules/vector-sets/examples/movies/.gitignore
+++ b/modules/vector-sets/examples/movies/.gitignore
@ -0,0 +1,2 @@
+mpst_full_data.csv
+partition.json
--- a/modules/vector-sets/examples/movies/README
+++ b/modules/vector-sets/examples/movies/README
@ -0,0 +1,30 @@
+This example maps long form movies plots to movies titles.
+It will create fp32 and binary vectors (the two extremes).
+
+1. Install ollama, and install the embedding model "mxbai-embed-large"
+2. Download mpst_full_data.csv from https://www.kaggle.com/datasets/cryptexcode/mpst-movie-plot-synopses-with-tags
+3. python insert.py
+
+127.0.0.1:6379> VSIM many_movies_mxbai-embed-large_NOQUANT ELE "The Matrix"
+ 1) "The Matrix"
+ 2) "The Matrix Reloaded"
+ 3) "The Matrix Revolutions"
+ 4) "Commando"
+ 5) "Avatar"
+ 6) "Forbidden Planet"
+ 7) "Terminator Salvation"
+ 8) "Mandroid"
+ 9) "The Omega Code"
+10) "Coherence"
+
+127.0.0.1:6379> VSIM many_movies_mxbai-embed-large_BIN ELE "The Matrix"
+ 1) "The Matrix"
+ 2) "The Matrix Reloaded"
+ 3) "The Matrix Revolutions"
+ 4) "The Omega Code"
+ 5) "Forbidden Planet"
+ 6) "Avatar"
+ 7) "John Carter"
+ 8) "System Shock 2"
+ 9) "Coherence"
+10) "Tomorrowland"
--- a/modules/vector-sets/examples/movies/insert.py
+++ b/modules/vector-sets/examples/movies/insert.py
@ -0,0 +1,48 @@
+import csv
+import requests
+import redis
+
+ModelName="mxbai-embed-large"
+
+# Initialize Redis connection, setting encoding to utf-8
+redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
+
+def get_embedding(text):
+    """Get embedding from local API"""
+    url = "http://localhost:11434/api/embeddings"
+    payload = {
+        "model": ModelName,
+        "prompt": "Represent this movie plot and genre: "+text
+    }
+    response = requests.post(url, json=payload)
+    return response.json()['embedding']
+
+def add_to_redis(title, embedding, quant_type):
+    """Add embedding to Redis using VADD command"""
+    args = ["VADD", "many_movies_"+ModelName+"_"+quant_type, "VALUES", str(len(embedding))]
+    args.extend(map(str, embedding))
+    args.append(title)
+    args.append(quant_type)
+    redis_client.execute_command(*args)
+
+def main():
+    with open('mpst_full_data.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+
+        for movie in reader:
+            try:
+                text_to_embed = f"{movie['title']} {movie['plot_synopsis']} {movie['tags']}"
+
+                print(f"Getting embedding for: {movie['title']}")
+                embedding = get_embedding(text_to_embed)
+
+                add_to_redis(movie['title'], embedding, "BIN")
+                add_to_redis(movie['title'], embedding, "NOQUANT")
+                print(f"Successfully processed: {movie['title']}")
+
+            except Exception as e:
+                print(f"Error processing {movie['title']}: {str(e)}")
+                continue
+
+if __name__ == "__main__":
+    main()
--- a/modules/vector-sets/expr.c
+++ b/modules/vector-sets/expr.c
@ -0,0 +1,995 @@
+/* Filtering of objects based on simple expressions.
+ * This powers the FILTER option of Vector Sets, but it is otherwise
+ * general code to be used when we want to tell if a given object (with fields)
+ * passes or fails a given test for scalars, strings, ...
+ *
+ * Copyright(C) 2024-Present, Redis Ltd. All Rights Reserved.
+ * Originally authored by: Salvatore Sanfilippo.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <math.h>
+#include <string.h>
+#include "cJSON.h"
+
+#ifdef TEST_MAIN
+#define RedisModule_Alloc malloc
+#define RedisModule_Realloc realloc
+#define RedisModule_Free free
+#define RedisModule_Strdup strdup
+#endif
+
+#define EXPR_TOKEN_EOF 0
+#define EXPR_TOKEN_NUM 1
+#define EXPR_TOKEN_STR 2
+#define EXPR_TOKEN_TUPLE 3
+#define EXPR_TOKEN_SELECTOR 4
+#define EXPR_TOKEN_OP 5
+
+#define EXPR_OP_OPAREN 0  /* ( */
+#define EXPR_OP_CPAREN 1  /* ) */
+#define EXPR_OP_NOT    2  /* ! */
+#define EXPR_OP_POW    3  /* ** */
+#define EXPR_OP_MULT   4  /* * */
+#define EXPR_OP_DIV    5  /* / */
+#define EXPR_OP_MOD    6  /* % */
+#define EXPR_OP_SUM    7  /* + */
+#define EXPR_OP_DIFF   8  /* - */
+#define EXPR_OP_GT     9  /* > */
+#define EXPR_OP_GTE    10 /* >= */
+#define EXPR_OP_LT     11 /* < */
+#define EXPR_OP_LTE    12 /* <= */
+#define EXPR_OP_EQ     13 /* == */
+#define EXPR_OP_NEQ    14 /* != */
+#define EXPR_OP_IN     15 /* in */
+#define EXPR_OP_AND    16 /* and */
+#define EXPR_OP_OR     17 /* or */
+
+/* This structure represents a token in our expression. It's either
+ * literals like 4, "foo", or operators like "+", "-", "and", or
+ * json selectors, that start with a dot: ".age", ".properties.somearray[1]" */
+typedef struct exprtoken {
+    int refcount;           // Reference counting for memory reclaiming.
+    int token_type;         // Token type of the just parsed token.
+    int offset;             // Chars offset in expression.
+    union {
+        double num;         // Value for EXPR_TOKEN_NUM.
+        struct {
+            char *start;    // String pointer for EXPR_TOKEN_STR / SELECTOR.
+            size_t len;     // String len for EXPR_TOKEN_STR / SELECTOR.
+            char *heapstr;  // True if we have a private allocation for this
+                            // string. When possible, it just references to the
+                            // string expression we compiled, exprstate->expr.
+        } str;
+        int opcode;         // Opcode ID for EXPR_TOKEN_OP.
+        struct {
+            struct exprtoken **ele;
+            size_t len;
+        } tuple;            // Tuples are like [1, 2, 3] for "in" operator.
+    };
+} exprtoken;
+
+/* Simple stack of expr tokens. This is used both to represent the stack
+ * of values and the stack of operands during VM execution. */
+typedef struct exprstack {
+    exprtoken **items;
+    int numitems;
+    int allocsize;
+} exprstack;
+
+typedef struct exprstate {
+    char *expr;             /* Expression string to compile. Note that
+                             * expression token strings point directly to this
+                             * string. */
+    char *p;                // Currnet position inside 'expr', while parsing.
+
+    // Virtual machine state.
+    exprstack values_stack;
+    exprstack ops_stack;    // Operator stack used during compilation.
+    exprstack tokens;       // Expression processed into a sequence of tokens.
+    exprstack program;      // Expression compiled into opcodes and values.
+} exprstate;
+
+/* Valid operators. */
+struct {
+    char *opname;
+    int oplen;
+    int opcode;
+    int precedence;
+    int arity;
+} ExprOptable[] = {
+    {"(",   1,  EXPR_OP_OPAREN,  7, 0},
+    {")",   1,  EXPR_OP_CPAREN,  7, 0},
+    {"!",   1,  EXPR_OP_NOT,     6, 1},
+    {"not", 3,  EXPR_OP_NOT,     6, 1},
+    {"**",  2,  EXPR_OP_POW,     5, 2},
+    {"*",   1,  EXPR_OP_MULT,    4, 2},
+    {"/",   1,  EXPR_OP_DIV,     4, 2},
+    {"%",   1,  EXPR_OP_MOD,     4, 2},
+    {"+",   1,  EXPR_OP_SUM,     3, 2},
+    {"-",   1,  EXPR_OP_DIFF,    3, 2},
+    {">",   1,  EXPR_OP_GT,      2, 2},
+    {">=",  2,  EXPR_OP_GTE,     2, 2},
+    {"<",   1,  EXPR_OP_LT,      2, 2},
+    {"<=",  2,  EXPR_OP_LTE,     2, 2},
+    {"==",  2,  EXPR_OP_EQ,      2, 2},
+    {"!=",  2,  EXPR_OP_NEQ,     2, 2},
+    {"in",  2,  EXPR_OP_IN,      2, 2},
+    {"and", 3,  EXPR_OP_AND,     1, 2},
+    {"&&",  2,  EXPR_OP_AND,     1, 2},
+    {"or",  2,  EXPR_OP_OR,      0, 2},
+    {"||",  2,  EXPR_OP_OR,      0, 2},
+    {NULL,  0,  0,               0, 0}   // Terminator.
+};
+
+#define EXPR_OP_SPECIALCHARS "+-*%/!()<>=|&"
+#define EXPR_SELECTOR_SPECIALCHARS "_-"
+
+/* ================================ Expr token ============================== */
+
+/* Return an heap allocated token of the specified type, setting the
+ * reference count to 1. */
+exprtoken *exprNewToken(int type) {
+    exprtoken *t = RedisModule_Alloc(sizeof(exprtoken));
+    memset(t,0,sizeof(*t));
+    t->token_type = type;
+    t->refcount = 1;
+    return t;
+}
+
+/* Generic free token function, can be used to free stack allocated
+ * objects (in this case the pointer itself will not be freed) or
+ * heap allocated objects. See the wrappers below. */
+void exprTokenRelease(exprtoken *t) {
+    if (t == NULL) return;
+
+    if (t->refcount <= 0) {
+        printf("exprTokenRelease() against a token with refcount %d!\n"
+               "Aborting program execution\n",
+            t->refcount);
+        exit(1);
+    }
+    t->refcount--;
+    if (t->refcount > 0) return;
+
+    // We reached refcount 0: free the object.
+    if (t->token_type == EXPR_TOKEN_STR) {
+        if (t->str.heapstr != NULL) RedisModule_Free(t->str.heapstr);
+    } else if (t->token_type == EXPR_TOKEN_TUPLE) {
+        for (size_t j = 0; j < t->tuple.len; j++)
+            exprTokenRelease(t->tuple.ele[j]);
+        if (t->tuple.ele) RedisModule_Free(t->tuple.ele);
+    }
+    RedisModule_Free(t);
+}
+
+void exprTokenRetain(exprtoken *t) {
+    t->refcount++;
+}
+
+/* ============================== Stack handling ============================ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define EXPR_STACK_INITIAL_SIZE 16
+
+/* Initialize a new expression stack. */
+void exprStackInit(exprstack *stack) {
+    stack->items = RedisModule_Alloc(sizeof(exprtoken*) * EXPR_STACK_INITIAL_SIZE);
+    stack->numitems = 0;
+    stack->allocsize = EXPR_STACK_INITIAL_SIZE;
+}
+
+/* Push a token pointer onto the stack. Does not increment the refcount
+ * of the token: it is up to the caller doing this. */
+void exprStackPush(exprstack *stack, exprtoken *token) {
+    /* Check if we need to grow the stack. */
+    if (stack->numitems == stack->allocsize) {
+        size_t newsize = stack->allocsize * 2;
+        exprtoken **newitems =
+            RedisModule_Realloc(stack->items, sizeof(exprtoken*) * newsize);
+        stack->items = newitems;
+        stack->allocsize = newsize;
+    }
+    stack->items[stack->numitems] = token;
+    stack->numitems++;
+}
+
+/* Pop a token pointer from the stack. Return NULL if the stack is
+ * empty. Does NOT recrement the refcount of the token, it's up to the
+ * caller to do so, as the new owner of the reference. */
+exprtoken *exprStackPop(exprstack *stack) {
+    if (stack->numitems == 0) return NULL;
+    stack->numitems--;
+    return stack->items[stack->numitems];
+}
+
+/* Just return the last element pushed, without consuming it nor altering
+ * the reference count. */
+exprtoken *exprStackPeek(exprstack *stack) {
+    if (stack->numitems == 0) return NULL;
+    return stack->items[stack->numitems-1];
+}
+
+/* Free the stack structure state, including the items it contains, that are
+ * assumed to be heap allocated. The passed pointer itself is not freed. */
+void exprStackFree(exprstack *stack) {
+    for (int j = 0; j < stack->numitems; j++)
+        exprTokenRelease(stack->items[j]);
+    RedisModule_Free(stack->items);
+}
+
+/* Just reset the stack removing all the items, but leaving it in a state
+ * that makes it still usable for new elements. */
+void exprStackReset(exprstack *stack) {
+    for (int j = 0; j < stack->numitems; j++)
+        exprTokenRelease(stack->items[j]);
+    stack->numitems = 0;
+}
+
+/* =========================== Expression compilation ======================= */
+
+void exprConsumeSpaces(exprstate *es) {
+    while(es->p[0] && isspace(es->p[0])) es->p++;
+}
+
+/* Parse an operator, trying to match the longer match in the
+ * operators table. */
+exprtoken *exprParseOperator(exprstate *es) {
+    exprtoken *t = exprNewToken(EXPR_TOKEN_OP);
+    char *start = es->p;
+
+    while(es->p[0] &&
+          (isalpha(es->p[0]) ||
+           strchr(EXPR_OP_SPECIALCHARS,es->p[0]) != NULL))
+    {
+        es->p++;
+    }
+
+    int matchlen = es->p - start;
+    int bestlen = 0;
+    int j;
+
+    // Find the longest matching operator.
+    for (j = 0; ExprOptable[j].opname != NULL; j++) {
+        if (ExprOptable[j].oplen > matchlen) continue;
+        if (memcmp(ExprOptable[j].opname, start, ExprOptable[j].oplen) != 0)
+        {
+            continue;
+        }
+        if (ExprOptable[j].oplen > bestlen) {
+            t->opcode = ExprOptable[j].opcode;
+            bestlen = ExprOptable[j].oplen;
+        }
+    }
+    if (bestlen == 0) {
+        exprTokenRelease(t);
+        return NULL;
+    } else {
+        es->p = start + bestlen;
+    }
+    return t;
+}
+
+// Valid selector charset.
+static int is_selector_char(int c) {
+    return (isalpha(c) ||
+            isdigit(c) ||
+            strchr(EXPR_SELECTOR_SPECIALCHARS,c) != NULL);
+}
+
+/* Parse selectors, they start with a dot and can have alphanumerical
+ * or few special chars. */
+exprtoken *exprParseSelector(exprstate *es) {
+    exprtoken *t = exprNewToken(EXPR_TOKEN_SELECTOR);
+    es->p++; // Skip dot.
+    char *start = es->p;
+
+    while(es->p[0] && is_selector_char(es->p[0])) es->p++;
+    int matchlen = es->p - start;
+    t->str.start = start;
+    t->str.len = matchlen;
+    return t;
+}
+
+exprtoken *exprParseNumber(exprstate *es) {
+    exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
+    char num[64];
+    int idx = 0;
+    while(isdigit(es->p[0]) || es->p[0] == '.' || es->p[0] == 'e' ||
+          es->p[0] == 'E' || (idx == 0 && es->p[0] == '-'))
+    {
+        if (idx >= (int)sizeof(num)-1) {
+            exprTokenRelease(t);
+            return NULL;
+        }
+        num[idx++] = es->p[0];
+        es->p++;
+    }
+    num[idx] = 0;
+
+    char *endptr;
+    t->num = strtod(num, &endptr);
+    if (*endptr != '\0') {
+        exprTokenRelease(t);
+        return NULL;
+    }
+    return t;
+}
+
+exprtoken *exprParseString(exprstate *es) {
+    char quote = es->p[0];  /* Store the quote type (' or "). */
+    es->p++;                /* Skip opening quote. */
+
+    exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
+    t->str.start = es->p;
+
+    while(es->p[0] != '\0') {
+        if (es->p[0] == '\\' && es->p[1] != '\0') {
+            es->p += 2; // Skip escaped char.
+            continue;
+        }
+        if (es->p[0] == quote) {
+            t->str.len = es->p - t->str.start;
+            es->p++; // Skip closing quote.
+            return t;
+        }
+        es->p++;
+    }
+    /* If we reach here, string was not terminated. */
+    exprTokenRelease(t);
+    return NULL;
+}
+
+/* Parse a tuple of the form [1, "foo", 42]. No nested tuples are
+ * supported. This type is useful mostly to be used with the "IN"
+ * operator. */
+exprtoken *exprParseTuple(exprstate *es) {
+    exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
+    t->tuple.ele = NULL;
+    t->tuple.len = 0;
+    es->p++; /* Skip opening '['. */
+
+    size_t allocated = 0;
+    while(1) {
+        exprConsumeSpaces(es);
+
+        /* Check for empty tuple or end. */
+        if (es->p[0] == ']') {
+            es->p++;
+            break;
+        }
+
+        /* Grow tuple array if needed. */
+        if (t->tuple.len == allocated) {
+            size_t newsize = allocated == 0 ? 4 : allocated * 2;
+            exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
+                sizeof(exprtoken*) * newsize);
+            t->tuple.ele = newele;
+            allocated = newsize;
+        }
+
+        /* Parse tuple element. */
+        exprtoken *ele = NULL;
+        if (isdigit(es->p[0]) || es->p[0] == '-') {
+            ele = exprParseNumber(es);
+        } else if (es->p[0] == '"' || es->p[0] == '\'') {
+            ele = exprParseString(es);
+        } else {
+            exprTokenRelease(t);
+            return NULL;
+        }
+
+        /* Error parsing number/string? */
+        if (ele == NULL) {
+            exprTokenRelease(t);
+            return NULL;
+        }
+
+        /* Store element if no error was detected. */
+        t->tuple.ele[t->tuple.len] = ele;
+        t->tuple.len++;
+
+        /* Check for next element. */
+        exprConsumeSpaces(es);
+        if (es->p[0] == ']') {
+            es->p++;
+            break;
+        }
+        if (es->p[0] != ',') {
+            exprTokenRelease(t);
+            return NULL;
+        }
+        es->p++; /* Skip comma. */
+    }
+    return t;
+}
+
+/* Deallocate the object returned by exprCompile(). */
+void exprFree(exprstate *es) {
+    if (es == NULL) return;
+
+    /* Free the original expression string. */
+    if (es->expr) RedisModule_Free(es->expr);
+
+    /* Free all stacks. */
+    exprStackFree(&es->values_stack);
+    exprStackFree(&es->ops_stack);
+    exprStackFree(&es->tokens);
+    exprStackFree(&es->program);
+
+    /* Free the state object itself. */
+    RedisModule_Free(es);
+}
+
+/* Split the provided expression into a stack of tokens. Returns
+ * 0 on success, 1 on error. */
+int exprTokenize(exprstate *es, int *errpos) {
+    /* Main parsing loop. */
+    while(1) {
+        exprConsumeSpaces(es);
+
+        /* Set a flag to see if we can consider the - part of the
+         * number, or an operator. */
+        int minus_is_number = 0; // By default is an operator.
+
+        exprtoken *last = exprStackPeek(&es->tokens);
+        if (last == NULL) {
+            /* If we are at the start of an expression, the minus is
+             * considered a number. */
+            minus_is_number = 1;
+        } else if (last->token_type == EXPR_TOKEN_OP &&
+                   last->opcode != EXPR_OP_CPAREN)
+        {
+            /* Also, if the previous token was an operator, the minus
+             * is considered a number, unless the previous operator is
+             * a closing parens. In such case it's like (...) -5, or alike
+             * and we want to emit an operator. */
+            minus_is_number = 1;
+        }
+
+        /* Parse based on the current character. */
+        exprtoken *current = NULL;
+        if (*es->p == '\0') {
+            current = exprNewToken(EXPR_TOKEN_EOF);
+        } else if (isdigit(*es->p) ||
+                  (minus_is_number && *es->p == '-' && isdigit(es->p[1])))
+        {
+            current = exprParseNumber(es);
+        } else if (*es->p == '"' || *es->p == '\'') {
+            current = exprParseString(es);
+        } else if (*es->p == '.' && is_selector_char(es->p[1])) {
+            current = exprParseSelector(es);
+        } else if (isalpha(*es->p) || strchr(EXPR_OP_SPECIALCHARS, *es->p)) {
+            current = exprParseOperator(es);
+        } else if (*es->p == '[') {
+            current = exprParseTuple(es);
+        }
+
+        if (current == NULL) {
+            if (errpos) *errpos = es->p - es->expr;
+            return 1; // Syntax Error.
+        }
+
+        /* Push the current token to tokens stack. */
+        exprStackPush(&es->tokens, current);
+        if (current->token_type == EXPR_TOKEN_EOF) break;
+    }
+    return 0;
+}
+
+/* Helper function to get operator precedence from the operator table. */
+int exprGetOpPrecedence(int opcode) {
+    for (int i = 0; ExprOptable[i].opname != NULL; i++) {
+        if (ExprOptable[i].opcode == opcode)
+            return ExprOptable[i].precedence;
+    }
+    return -1;
+}
+
+/* Helper function to get operator arity from the operator table. */
+int exprGetOpArity(int opcode) {
+    for (int i = 0; ExprOptable[i].opname != NULL; i++) {
+        if (ExprOptable[i].opcode == opcode)
+            return ExprOptable[i].arity;
+    }
+    return -1;
+}
+
+/* Process an operator during compilation. Returns 0 on success, 1 on error.
+ * This function will retain a reference of the operator 'op' in case it
+ * is pushed on the operators stack. */
+int exprProcessOperator(exprstate *es, exprtoken *op, int *stack_items, int *errpos) {
+    if (op->opcode == EXPR_OP_OPAREN) {
+	// This is just a marker for us. Do nothing.
+        exprStackPush(&es->ops_stack, op);
+        exprTokenRetain(op);
+        return 0;
+    }
+
+    if (op->opcode == EXPR_OP_CPAREN) {
+        /* Process operators until we find the matching opening parenthesis. */
+        while (1) {
+            exprtoken *top_op = exprStackPop(&es->ops_stack);
+            if (top_op == NULL) {
+                if (errpos) *errpos = op->offset;
+                return 1;
+            }
+
+            if (top_op->opcode == EXPR_OP_OPAREN) {
+                /* Open parethesis found. Our work finished. */
+                exprTokenRelease(top_op);
+                return 0;
+            }
+
+            int arity = exprGetOpArity(top_op->opcode);
+            if (*stack_items < arity) {
+                exprTokenRelease(top_op);
+                if (errpos) *errpos = top_op->offset;
+                return 1;
+            }
+
+            /* Move the operator on the program stack. */
+            exprStackPush(&es->program, top_op);
+            *stack_items = *stack_items - arity + 1;
+        }
+    }
+
+    int curr_prec = exprGetOpPrecedence(op->opcode);
+
+    /* Process operators with higher or equal precedence. */
+    while (1) {
+        exprtoken *top_op = exprStackPeek(&es->ops_stack);
+        if (top_op == NULL || top_op->opcode == EXPR_OP_OPAREN) break;
+
+        int top_prec = exprGetOpPrecedence(top_op->opcode);
+        if (top_prec < curr_prec) break;
+        /* Special case for **: only pop if precedence is strictly higher
+         * so that the operator is right associative, that is:
+         * 2 ** 3 ** 2 is evaluated as 2 ** (3 ** 2) == 512 instead
+         * of (2 ** 3) ** 2 == 64. */
+        if (op->opcode == EXPR_OP_POW && top_prec <= curr_prec) break;
+
+        /* Pop and add to program. */
+        top_op = exprStackPop(&es->ops_stack);
+        int arity = exprGetOpArity(top_op->opcode);
+        if (*stack_items < arity) {
+            exprTokenRelease(top_op);
+            if (errpos) *errpos = top_op->offset;
+            return 1;
+        }
+
+        /* Move to the program stack. */
+        exprStackPush(&es->program, top_op);
+        *stack_items = *stack_items - arity + 1;
+    }
+
+    /* Push current operator. */
+    exprStackPush(&es->ops_stack, op);
+    exprTokenRetain(op);
+    return 0;
+}
+
+/* Compile the expression into a set of push-value and exec-operator
+ * that exprRun() can execute. The function returns an expstate object
+ * that can be used for execution of the program. On error, NULL
+ * is returned, and optionally the position of the error into the
+ * expression is returned by reference. */
+exprstate *exprCompile(char *expr, int *errpos) {
+    /* Initialize expression state. */
+    exprstate *es = RedisModule_Alloc(sizeof(exprstate));
+    es->expr = RedisModule_Strdup(expr);
+    es->p = es->expr;
+
+    /* Initialize all stacks. */
+    exprStackInit(&es->values_stack);
+    exprStackInit(&es->ops_stack);
+    exprStackInit(&es->tokens);
+    exprStackInit(&es->program);
+
+    /* Tokenization. */
+    if (exprTokenize(es, errpos)) {
+        exprFree(es);
+        return NULL;
+    }
+
+    /* Compile the expression into a sequence of operations. */
+    int stack_items = 0;  // Track # of items that would be on the stack
+                         // during execution. This way we can detect arity
+                         // issues at compile time.
+
+    /* Process each token. */
+    for (int i = 0; i < es->tokens.numitems; i++) {
+        exprtoken *token = es->tokens.items[i];
+
+        if (token->token_type == EXPR_TOKEN_EOF) break;
+
+        /* Handle values (numbers, strings, selectors). */
+        if (token->token_type == EXPR_TOKEN_NUM ||
+            token->token_type == EXPR_TOKEN_STR ||
+            token->token_type == EXPR_TOKEN_TUPLE ||
+            token->token_type == EXPR_TOKEN_SELECTOR)
+        {
+            exprStackPush(&es->program, token);
+            exprTokenRetain(token);
+            stack_items++;
+            continue;
+        }
+
+        /* Handle operators. */
+        if (token->token_type == EXPR_TOKEN_OP) {
+            if (exprProcessOperator(es, token, &stack_items, errpos)) {
+                exprFree(es);
+                return NULL;
+            }
+            continue;
+        }
+    }
+
+    /* Process remaining operators on the stack. */
+    while (es->ops_stack.numitems > 0) {
+        exprtoken *op = exprStackPop(&es->ops_stack);
+        if (op->opcode == EXPR_OP_OPAREN) {
+            if (errpos) *errpos = op->offset;
+            exprTokenRelease(op);
+            exprFree(es);
+            return NULL;
+        }
+
+        int arity = exprGetOpArity(op->opcode);
+        if (stack_items < arity) {
+            if (errpos) *errpos = op->offset;
+            exprTokenRelease(op);
+            exprFree(es);
+            return NULL;
+        }
+
+        exprStackPush(&es->program, op);
+        stack_items = stack_items - arity + 1;
+    }
+
+    /* Verify that exactly one value would remain on the stack after
+     * execution. We could also check that such value is a number, but this
+     * would make the code more complex without much gains. */
+    if (stack_items != 1) {
+        if (errpos) {
+            /* Point to the last token's offset for error reporting. */
+            exprtoken *last = es->tokens.items[es->tokens.numitems - 1];
+            *errpos = last->offset;
+        }
+        exprFree(es);
+        return NULL;
+    }
+    return es;
+}
+
+/* ============================ Expression execution ======================== */
+
+/* Convert a token to its numeric value. For strings we attempt to parse them
+ * as numbers, returning 0 if conversion fails. */
+double exprTokenToNum(exprtoken *t) {
+    char buf[128];
+    if (t->token_type == EXPR_TOKEN_NUM) {
+        return t->num;
+    } else if (t->token_type == EXPR_TOKEN_STR && t->str.len < sizeof(buf)) {
+        memcpy(buf, t->str.start, t->str.len);
+        buf[t->str.len] = '\0';
+        char *endptr;
+        double val = strtod(buf, &endptr);
+        return *endptr == '\0' ? val : 0;
+    } else {
+        return 0;
+    }
+}
+
+/* Conver obejct to true/false (0 or 1) */
+double exprTokenToBool(exprtoken *t) {
+    if (t->token_type == EXPR_TOKEN_NUM) {
+        return t->num != 0;
+    } else if (t->token_type == EXPR_TOKEN_STR && t->str.len == 0) {
+        return 0; // Empty string are false, like in Javascript.
+    } else {
+        return 1; // Every non numerical type is true.
+    }
+}
+
+/* Compare two tokens. Returns true if they are equal. */
+int exprTokensEqual(exprtoken *a, exprtoken *b) {
+    // If both are strings, do string comparison.
+    if (a->token_type == EXPR_TOKEN_STR && b->token_type == EXPR_TOKEN_STR) {
+        return a->str.len == b->str.len &&
+               memcmp(a->str.start, b->str.start, a->str.len) == 0;
+    }
+
+    // If both are numbers, do numeric comparison.
+    if (a->token_type == EXPR_TOKEN_NUM && b->token_type == EXPR_TOKEN_NUM) {
+        return a->num == b->num;
+    }
+
+    // Mixed types - convert to numbers and compare.
+    return exprTokenToNum(a) == exprTokenToNum(b);
+}
+
+/* Convert a json object to an expression token. There is only
+ * limited support for JSON arrays: they must be composed of
+ * just numbers and strings. Returns NULL if the JSON object
+ * cannot be converted. */
+exprtoken *exprJsonToToken(cJSON *js) {
+    if (cJSON_IsNumber(js)) {
+        exprtoken *obj = exprNewToken(EXPR_TOKEN_NUM);
+        obj->num = cJSON_GetNumberValue(js);
+        return obj;
+    } else if (cJSON_IsString(js)) {
+        exprtoken *obj = exprNewToken(EXPR_TOKEN_STR);
+        char *strval = cJSON_GetStringValue(js);
+        obj->str.heapstr = RedisModule_Strdup(strval);
+        obj->str.start = obj->str.heapstr;
+        obj->str.len = strlen(obj->str.heapstr);
+        return obj;
+    } else if (cJSON_IsBool(js)) {
+        exprtoken *obj = exprNewToken(EXPR_TOKEN_NUM);
+        obj->num = cJSON_IsTrue(js);
+        return obj;
+    } else if (cJSON_IsArray(js)) {
+        // First, scan the array to ensure it only
+        // contains strings and numbers. Otherwise the
+        // expression will evaluate to false.
+        int array_size = cJSON_GetArraySize(js);
+
+        for (int j = 0; j < array_size; j++) {
+            cJSON *item = cJSON_GetArrayItem(js, j);
+            if (!cJSON_IsNumber(item) && !cJSON_IsString(item)) return NULL;
+        }
+
+        // Create a tuple token for the array.
+        exprtoken *obj = exprNewToken(EXPR_TOKEN_TUPLE);
+        obj->tuple.len = array_size;
+        obj->tuple.ele = NULL;
+        if (obj->tuple.len == 0) return obj; // No elements, already ok.
+
+        obj->tuple.ele =
+            RedisModule_Alloc(sizeof(exprtoken*) * obj->tuple.len);
+
+        // Convert each array element to a token.
+        for (size_t j = 0; j < obj->tuple.len; j++) {
+            cJSON *item = cJSON_GetArrayItem(js, j);
+            if (cJSON_IsNumber(item)) {
+                exprtoken *eleToken = exprNewToken(EXPR_TOKEN_NUM);
+                eleToken->num = cJSON_GetNumberValue(item);
+                obj->tuple.ele[j] = eleToken;
+            } else if (cJSON_IsString(item)) {
+                exprtoken *eleToken = exprNewToken(EXPR_TOKEN_STR);
+                char *strval = cJSON_GetStringValue(item);
+                eleToken->str.heapstr = RedisModule_Strdup(strval);
+                eleToken->str.start = eleToken->str.heapstr;
+                eleToken->str.len = strlen(eleToken->str.heapstr);
+                obj->tuple.ele[j] = eleToken;
+            }
+        }
+        return obj;
+    }
+    return NULL; // No conversion possible for this type.
+}
+
+/* Execute the compiled expression program. Returns 1 if the final stack value
+ * evaluates to true, 0 otherwise. Also returns 0 if any selector callback
+ * fails. */
+int exprRun(exprstate *es, char *json, size_t json_len) {
+    exprStackReset(&es->values_stack);
+    cJSON *parsed_json = NULL;
+
+    // Execute each instruction in the program.
+    for (int i = 0; i < es->program.numitems; i++) {
+        exprtoken *t = es->program.items[i];
+
+        // Handle selectors by calling the callback.
+        if (t->token_type == EXPR_TOKEN_SELECTOR) {
+            if (json != NULL) {
+                cJSON *attrib = NULL;
+                if (parsed_json == NULL) {
+                    parsed_json = cJSON_ParseWithLength(json,json_len);
+                    // Will be left to NULL if the above fails.
+                }
+                if (parsed_json) {
+                    char item_name[128];
+                    if (t->str.len > 0 && t->str.len < sizeof(item_name)) {
+                        memcpy(item_name,t->str.start,t->str.len);
+                        item_name[t->str.len] = 0;
+                        attrib = cJSON_GetObjectItem(parsed_json,item_name);
+                    }
+                    /* Fill the token according to the JSON type stored
+                     * at the attribute. */
+                    if (attrib) {
+                        exprtoken *obj = exprJsonToToken(attrib);
+                        if (obj) {
+                            exprStackPush(&es->values_stack, obj);
+                            continue;
+                        }
+                    }
+                }
+            }
+
+            // Selector not found or JSON object not convertible to
+            // expression tokens. Evaluate the expression to false.
+            if (parsed_json) cJSON_Delete(parsed_json);
+            return 0;
+        }
+
+        // Push non-operator values directly onto the stack.
+        if (t->token_type != EXPR_TOKEN_OP) {
+            exprStackPush(&es->values_stack, t);
+            exprTokenRetain(t);
+            continue;
+        }
+
+        // Handle operators.
+        exprtoken *result = exprNewToken(EXPR_TOKEN_NUM);
+
+        // Pop operands - we know we have enough from compile-time checks.
+        exprtoken *b = exprStackPop(&es->values_stack);
+        exprtoken *a = NULL;
+        if (exprGetOpArity(t->opcode) == 2) {
+            a = exprStackPop(&es->values_stack);
+        }
+
+        switch(t->opcode) {
+        case EXPR_OP_NOT:
+            result->num = exprTokenToBool(b) == 0 ? 1 : 0;
+            break;
+        case EXPR_OP_POW: {
+            double base = exprTokenToNum(a);
+            double exp = exprTokenToNum(b);
+            result->num = pow(base, exp);
+            break;
+        }
+        case EXPR_OP_MULT:
+            result->num = exprTokenToNum(a) * exprTokenToNum(b);
+            break;
+        case EXPR_OP_DIV:
+            result->num = exprTokenToNum(a) / exprTokenToNum(b);
+            break;
+        case EXPR_OP_MOD: {
+            double va = exprTokenToNum(a);
+            double vb = exprTokenToNum(b);
+            result->num = fmod(va, vb);
+            break;
+        }
+        case EXPR_OP_SUM:
+            result->num = exprTokenToNum(a) + exprTokenToNum(b);
+            break;
+        case EXPR_OP_DIFF:
+            result->num = exprTokenToNum(a) - exprTokenToNum(b);
+            break;
+        case EXPR_OP_GT:
+            result->num = exprTokenToNum(a) > exprTokenToNum(b) ? 1 : 0;
+            break;
+        case EXPR_OP_GTE:
+            result->num = exprTokenToNum(a) >= exprTokenToNum(b) ? 1 : 0;
+            break;
+        case EXPR_OP_LT:
+            result->num = exprTokenToNum(a) < exprTokenToNum(b) ? 1 : 0;
+            break;
+        case EXPR_OP_LTE:
+            result->num = exprTokenToNum(a) <= exprTokenToNum(b) ? 1 : 0;
+            break;
+        case EXPR_OP_EQ:
+            result->num = exprTokensEqual(a, b) ? 1 : 0;
+            break;
+        case EXPR_OP_NEQ:
+            result->num = !exprTokensEqual(a, b) ? 1 : 0;
+            break;
+        case EXPR_OP_IN: {
+            // For 'in' operator, b must be a tuple.
+            result->num = 0;  // Default to false.
+            if (b->token_type == EXPR_TOKEN_TUPLE) {
+                for (size_t j = 0; j < b->tuple.len; j++) {
+                    if (exprTokensEqual(a, b->tuple.ele[j])) {
+                        result->num = 1;  // Found a match.
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+        case EXPR_OP_AND:
+            result->num =
+                exprTokenToBool(a) != 0 && exprTokenToBool(b) != 0 ? 1 : 0;
+            break;
+        case EXPR_OP_OR:
+            result->num =
+                exprTokenToBool(a) != 0 || exprTokenToBool(b) != 0 ? 1 : 0;
+            break;
+        default:
+            // Do nothing: we don't want runtime errors.
+            break;
+        }
+
+        // Free operands and push result.
+        if (a) exprTokenRelease(a);
+        exprTokenRelease(b);
+        exprStackPush(&es->values_stack, result);
+    }
+
+    if (parsed_json) cJSON_Delete(parsed_json);
+
+    // Get final result from stack.
+    exprtoken *final = exprStackPop(&es->values_stack);
+    if (final == NULL) return 0;
+
+    // Convert result to boolean.
+    int retval = exprTokenToBool(final);
+    exprTokenRelease(final);
+    return retval;
+}
+
+/* ============================ Simple test main ============================ */
+
+#ifdef TEST_MAIN
+void exprPrintToken(exprtoken *t) {
+    switch(t->token_type) {
+        case EXPR_TOKEN_EOF:
+            printf("EOF");
+            break;
+        case EXPR_TOKEN_NUM:
+            printf("NUM:%g", t->num);
+            break;
+        case EXPR_TOKEN_STR:
+            printf("STR:\"%.*s\"", (int)t->str.len, t->str.start);
+            break;
+        case EXPR_TOKEN_SELECTOR:
+            printf("SEL:%.*s", (int)t->str.len, t->str.start);
+            break;
+        case EXPR_TOKEN_OP:
+            printf("OP:");
+            for (int i = 0; ExprOptable[i].opname != NULL; i++) {
+                if (ExprOptable[i].opcode == t->opcode) {
+                    printf("%s", ExprOptable[i].opname);
+                    break;
+                }
+            }
+            break;
+        default:
+            printf("UNKNOWN");
+            break;
+    }
+}
+
+void exprPrintStack(exprstack *stack, const char *name) {
+    printf("%s (%d items):", name, stack->numitems);
+    for (int j = 0; j < stack->numitems; j++) {
+        printf(" ");
+        exprPrintToken(stack->items[j]);
+    }
+    printf("\n");
+}
+
+int main(int argc, char **argv) {
+    char *testexpr = "(5+2)*3 and .year > 1980 and 'foo' == 'foo'";
+    char *testjson = "{\"year\": 1984, \"name\": \"The Matrix\"}";
+    if (argc >= 2) testexpr = argv[1];
+    if (argc >= 3) testjson = argv[2];
+
+    printf("Compiling expression: %s\n", testexpr);
+
+    int errpos = 0;
+    exprstate *es = exprCompile(testexpr,&errpos);
+    if (es == NULL) {
+        printf("Compilation failed near \"...%s\"\n", testexpr+errpos);
+        return 1;
+    }
+
+    exprPrintStack(&es->tokens, "Tokens");
+    exprPrintStack(&es->program, "Program");
+    printf("Running against object: %s\n", testjson);
+    int result = exprRun(es,testjson,strlen(testjson));
+    printf("Result1: %s\n", result ? "True" : "False");
+    result = exprRun(es,testjson,strlen(testjson));
+    printf("Result2: %s\n", result ? "True" : "False");
+
+    exprFree(es);
+    return 0;
+}
+#endif
--- a/modules/vector-sets/hnsw.c
+++ b/modules/vector-sets/hnsw.c
--- a/modules/vector-sets/hnsw.h
+++ b/modules/vector-sets/hnsw.h
@ -0,0 +1,183 @@
+/*
+ * HNSW (Hierarchical Navigable Small World) Implementation
+ * Based on the paper by Yu. A. Malkov, D. A. Yashunin
+ *
+ * Copyright(C) 2024-Pesent Redis Ltd. All Rights Reserved.
+ */
+
+#ifndef HNSW_H
+#define HNSW_H
+
+#include <pthread.h>
+#include <stdatomic.h>
+
+#define HNSW_DEFAULT_M  16     /* Used when 0 is given at creation time. */
+#define HNSW_MIN_M      4      /* Probably even too low already. */
+#define HNSW_MAX_M      4096   /* Safeguard sanity limit. */
+#define HNSW_MAX_THREADS 32    /* Maximum number of concurrent threads */
+
+/* Quantization types you can enable at creation time in hnsw_new() */
+#define HNSW_QUANT_NONE  0   // No quantization.
+#define HNSW_QUANT_Q8    1   // Q8 quantization.
+#define HNSW_QUANT_BIN   2   // Binary quantization.
+
+/* Layer structure for HNSW nodes. Each node will have from one to a few
+ * of this depending on its level. */
+typedef struct {
+    struct hnswNode **links;  /* Array of neighbors for this layer */
+    uint32_t num_links;       /* Number of used links */
+    uint32_t max_links;       /* Maximum links for this layer. We may
+                               * reallocate the node in very particular
+                               * conditions in order to allow linking of
+                               * new inserted nodes, so this may change
+                               * dynamically and be > M*2 for a small set of
+                               * nodes. */
+    float worst_distance;     /* Distance to the worst neighbor */
+    uint32_t worst_idx;       /* Index of the worst neighbor */
+} hnswNodeLayer;
+
+/* Node structure for HNSW graph */
+typedef struct hnswNode {
+    uint32_t level;         /* Node's maximum level */
+    uint64_t id;            /* Unique identifier, may be useful in order to
+                             * have a bitmap of visited notes to use as
+                             * alternative to epoch / visited_epoch.
+                             * Also used in serialization in order to retain
+                             * links specifying IDs. */
+    void *vector;           /* The vector, quantized or not. */
+    float quants_range;     /* Quantization range for this vector:
+                             * min/max values will be in the range
+                             * -quants_range, +quants_range */
+    float l2;               /* L2 before normalization. */
+
+    /* Last time (epoch) this node was visited. We need one per thread.
+     * This avoids having a different data structure where we track
+     * visited nodes, but costs memory per node. */
+    uint64_t visited_epoch[HNSW_MAX_THREADS];
+
+    void *value;                    /* Associated value */
+    struct hnswNode *prev, *next;   /* Prev/Next node in the list starting at
+                                     * HNSW->head. */
+
+    /* Links (and links info) per each layer. Note that this is part
+     * of the node allocation to be more cache friendly: reliable 3% speedup
+     * on Apple silicon, and does not make anything more complex. */
+    hnswNodeLayer layers[];
+} hnswNode;
+
+struct HNSW;
+
+/* It is possible to navigate an HNSW with a cursor that guarantees
+ * visiting all the elements that remain in the HNSW from the start to the
+ * end of the process (but not the new ones, so that the process will
+ * eventually finish). Check hnsw_cursor_init(), hnsw_cursor_next() and
+ * hnsw_cursor_free(). */
+typedef struct hnswCursor {
+    struct HNSW *index; // Reference to the index of this cursor.
+    hnswNode *current;  // Element to report when hnsw_cursor_next() is called.
+    struct hnswCursor *next; // Next cursor active.
+} hnswCursor;
+
+/* Main HNSW index structure */
+typedef struct HNSW {
+    hnswNode *enter_point;   /* Entry point for the graph */
+    uint32_t M;               /* M as in the paper: layer 0 has M*2 max
+                                 neighbors (M populated at insertion time)
+                                 while all the other layers have M neighbors. */
+    uint32_t max_level;      /* Current maximum level in the graph */
+    uint32_t vector_dim;     /* Dimensionality of stored vectors */
+    uint64_t node_count;     /* Total number of nodes */
+    _Atomic uint64_t last_id; /* Last node ID used */
+    uint64_t current_epoch[HNSW_MAX_THREADS];  /* Current epoch for visit tracking */
+    hnswNode *head;             /* Linked list of nodes. Last first */
+
+    /* We have two locks here:
+     * 1. A global_lock that is used to perform write operations blocking all
+     * the readers.
+     * 2. One mutex per epoch slot, in order for read operations to acquire
+     * a lock on a specific slot to use epochs tracking of visited nodes. */
+    pthread_rwlock_t global_lock;  /* Global read-write lock */
+    pthread_mutex_t slot_locks[HNSW_MAX_THREADS];  /* Per-slot locks */
+
+    _Atomic uint32_t next_slot; /* Next thread slot to try */
+    _Atomic uint64_t version;   /* Version for optimistic concurrency, this is
+                                 * incremented on deletions and entry point
+                                 * updates. */
+    uint32_t quant_type;        /* Quantization used. HNSW_QUANT_... */
+    hnswCursor *cursors;
+} HNSW;
+
+/* Serialized node. This structure is used as return value of
+ * hnsw_serialize_node(). */
+typedef struct hnswSerNode {
+    void *vector;
+    uint32_t vector_size;
+    uint64_t *params;
+    uint32_t params_count;
+} hnswSerNode;
+
+/* Insert preparation context */
+typedef struct InsertContext InsertContext;
+
+/* Core HNSW functions */
+HNSW *hnsw_new(uint32_t vector_dim, uint32_t quant_type, uint32_t m);
+void hnsw_free(HNSW *index,void(*free_value)(void*value));
+void hnsw_node_free(hnswNode *node);
+void hnsw_print_stats(HNSW *index);
+hnswNode *hnsw_insert(HNSW *index, const float *vector, const int8_t *qvector,
+                float qrange, uint64_t id, void *value, int ef);
+int hnsw_search(HNSW *index, const float *query, uint32_t k,
+                hnswNode **neighbors, float *distances, uint32_t slot,
+                int query_vector_is_normalized);
+int hnsw_search_with_filter
+               (HNSW *index, const float *query_vector, uint32_t k,
+                hnswNode **neighbors, float *distances, uint32_t slot,
+                int query_vector_is_normalized,
+                int (*filter_callback)(void *value, void *privdata),
+                void *filter_privdata, uint32_t max_candidates);
+void hnsw_get_node_vector(HNSW *index, hnswNode *node, float *vec);
+int hnsw_delete_node(HNSW *index, hnswNode *node, void(*free_value)(void*value));
+hnswNode *hnsw_random_node(HNSW *index, int slot);
+
+/* Thread safety functions. */
+int hnsw_acquire_read_slot(HNSW *index);
+void hnsw_release_read_slot(HNSW *index, int slot);
+
+/* Optimistic insertion API. */
+InsertContext *hnsw_prepare_insert(HNSW *index, const float *vector, const int8_t *qvector, float qrange, uint64_t id, int ef);
+hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx, void *value);
+void hnsw_free_insert_context(InsertContext *ctx);
+
+/* Serialization. */
+hnswSerNode *hnsw_serialize_node(HNSW *index, hnswNode *node);
+void hnsw_free_serialized_node(hnswSerNode *sn);
+hnswNode *hnsw_insert_serialized(HNSW *index, void *vector, uint64_t *params, uint32_t params_len, void *value);
+int hnsw_deserialize_index(HNSW *index);
+
+// Helper function in case the user wants to directly copy
+// the vector bytes.
+uint32_t hnsw_quants_bytes(HNSW *index);
+
+/* Cursors. */
+hnswCursor *hnsw_cursor_init(HNSW *index);
+void hnsw_cursor_free(hnswCursor *cursor);
+hnswNode *hnsw_cursor_next(hnswCursor *cursor);
+int hnsw_cursor_acquire_lock(hnswCursor *cursor);
+void hnsw_cursor_release_lock(hnswCursor *cursor);
+
+/* Allocator selection. */
+void hnsw_set_allocator(void (*free_ptr)(void*), void *(*malloc_ptr)(size_t),
+                        void *(*realloc_ptr)(void*, size_t));
+
+/* Testing. */
+int hnsw_validate_graph(HNSW *index, uint64_t *connected_nodes, int *reciprocal_links);
+void hnsw_test_graph_recall(HNSW *index, int test_ef, int verbose);
+float hnsw_distance(HNSW *index, hnswNode *a, hnswNode *b);
+int hnsw_ground_truth_with_filter
+               (HNSW *index, const float *query_vector, uint32_t k,
+                hnswNode **neighbors, float *distances, uint32_t slot,
+                int query_vector_is_normalized,
+                int (*filter_callback)(void *value, void *privdata),
+                void *filter_privdata);
+
+#endif /* HNSW_H */
--- a/modules/vector-sets/redismodule.h
+++ b/modules/vector-sets/redismodule.h
--- a/modules/vector-sets/test.py
+++ b/modules/vector-sets/test.py
@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+#
+# Vector set tests.
+# A Redis instance should be running in the default port.
+# Copyright(C) 2024-2025 Salvatore Sanfilippo.
+# All Rights Reserved.
+
+#!/usr/bin/env python3
+import redis
+import random
+import struct
+import math
+import time
+import sys
+import os
+import importlib
+import inspect
+from typing import List, Tuple, Optional
+from dataclasses import dataclass
+
+def colored(text: str, color: str) -> str:
+    colors = {
+        'red': '\033[91m',
+        'green': '\033[92m'
+    }
+    reset = '\033[0m'
+    return f"{colors.get(color, '')}{text}{reset}"
+
+@dataclass
+class VectorData:
+    vectors: List[List[float]]
+    names: List[str]
+
+    def find_k_nearest(self, query_vector: List[float], k: int) -> List[Tuple[str, float]]:
+        """Find k-nearest neighbors using the same scoring as Redis VSIM WITHSCORES."""
+        similarities = []
+        query_norm = math.sqrt(sum(x*x for x in query_vector))
+        if query_norm == 0:
+            return []
+
+        for i, vec in enumerate(self.vectors):
+            vec_norm = math.sqrt(sum(x*x for x in vec))
+            if vec_norm == 0:
+                continue
+
+            dot_product = sum(a*b for a,b in zip(query_vector, vec))
+            cosine_sim = dot_product / (query_norm * vec_norm)
+            distance = 1.0 - cosine_sim
+            redis_similarity = 1.0 - (distance/2.0)
+            similarities.append((self.names[i], redis_similarity))
+
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:k]
+
+def generate_random_vector(dim: int) -> List[float]:
+    """Generate a random normalized vector."""
+    vec = [random.gauss(0, 1) for _ in range(dim)]
+    norm = math.sqrt(sum(x*x for x in vec))
+    return [x/norm for x in vec]
+
+def fill_redis_with_vectors(r: redis.Redis, key: str, count: int, dim: int, 
+                          with_reduce: Optional[int] = None) -> VectorData:
+    """Fill Redis with random vectors and return a VectorData object for verification."""
+    vectors = []
+    names = []
+
+    r.delete(key)
+    for i in range(count):
+        vec = generate_random_vector(dim)
+        name = f"{key}:item:{i}"
+        vectors.append(vec)
+        names.append(name)
+
+        vec_bytes = struct.pack(f'{dim}f', *vec)
+        args = [key]
+        if with_reduce:
+            args.extend(['REDUCE', with_reduce])
+        args.extend(['FP32', vec_bytes, name])
+        r.execute_command('VADD', *args)
+
+    return VectorData(vectors=vectors, names=names)
+
+class TestCase:
+    def __init__(self):
+        self.error_msg = None
+        self.error_details = None
+        self.test_key = f"test:{self.__class__.__name__.lower()}"
+        # Primary Redis instance (default port)
+        self.redis = redis.Redis()
+        # Replica Redis instance (port 6380)
+        self.replica = redis.Redis(port=6380)
+        # Replication status
+        self.replication_setup = False
+
+    def setup(self):
+        self.redis.delete(self.test_key)
+
+    def teardown(self):
+        self.redis.delete(self.test_key)
+
+    def setup_replication(self) -> bool:
+        """
+        Setup replication between primary and replica Redis instances.
+        Returns True if replication is successfully established, False otherwise.
+        """
+        # Configure replica to replicate from primary
+        self.replica.execute_command('REPLICAOF', '127.0.0.1', 6379)
+
+        # Wait for replication to be established
+        max_attempts = 10
+        for attempt in range(max_attempts):
+            # Check replication info
+            repl_info = self.replica.info('replication')
+
+            # Check if replication is established
+            if (repl_info.get('role') == 'slave' and
+                repl_info.get('master_host') == '127.0.0.1' and
+                repl_info.get('master_port') == 6379 and
+                repl_info.get('master_link_status') == 'up'):
+
+                self.replication_setup = True
+                return True
+
+            # Wait before next attempt
+            time.sleep(0.5)
+
+        # If we get here, replication wasn't established
+        self.error_msg = "Failed to establish replication between primary and replica"
+        return False
+
+    def test(self):
+        raise NotImplementedError("Subclasses must implement test method")
+
+    def run(self):
+        try:
+            self.setup()
+            self.test()
+            return True
+        except AssertionError as e:
+            self.error_msg = str(e)
+            import traceback
+            self.error_details = traceback.format_exc()
+            return False
+        except Exception as e:
+            self.error_msg = f"Unexpected error: {str(e)}"
+            import traceback
+            self.error_details = traceback.format_exc()
+            return False
+        finally:
+            self.teardown()
+
+    def getname(self):
+        """Each test class should override this to provide its name"""
+        return self.__class__.__name__
+
+    def estimated_runtime(self):
+        """"Each test class should override this if it takes a significant amount of time to run. Default is 100ms"""
+        return 0.1
+
+def find_test_classes():
+    test_classes = []
+    tests_dir = 'tests'
+
+    if not os.path.exists(tests_dir):
+        return []
+
+    for file in os.listdir(tests_dir):
+        if file.endswith('.py'):
+            module_name = f"tests.{file[:-3]}"
+            try:
+                module = importlib.import_module(module_name)
+                for name, obj in inspect.getmembers(module):
+                    if inspect.isclass(obj) and obj.__name__ != 'TestCase' and hasattr(obj, 'test'):
+                        test_classes.append(obj())
+            except Exception as e:
+                print(f"Error loading {file}: {e}")
+
+    return test_classes
+
+def run_tests():
+    print("================================================\n"+
+          "Make sure to have Redis running in the localhost\n"+
+          "with --enable-debug-command yes\n"+
+          "Both primary (6379) and replica (6380) instances\n"+
+          "================================================\n")
+
+    tests = find_test_classes()
+    if not tests:
+        print("No tests found!")
+        return
+
+    # Sort tests by estimated runtime
+    tests.sort(key=lambda t: t.estimated_runtime())
+
+    passed = 0
+    total = len(tests)
+
+    for test in tests:
+        print(f"{test.getname()}: ", end="")
+        sys.stdout.flush()
+
+        start_time = time.time()
+        success = test.run()
+        duration = time.time() - start_time
+
+        if success:
+            print(colored("OK", "green"), f"({duration:.2f}s)")
+            passed += 1
+        else:
+            print(colored("ERR", "red"), f"({duration:.2f}s)")
+            print(f"Error: {test.error_msg}")
+            if test.error_details:
+                print("\nTraceback:")
+                print(test.error_details)
+
+    print("\n" + "="*50)
+    print(f"\nTest Summary: {passed}/{total} tests passed")
+
+    if passed == total:
+        print(colored("\nALL TESTS PASSED!", "green"))
+    else:
+        print(colored(f"\n{total-passed} TESTS FAILED!", "red"))
+
+if __name__ == "__main__":
+    run_tests()
--- a/modules/vector-sets/tests/basic_commands.py
+++ b/modules/vector-sets/tests/basic_commands.py
@ -0,0 +1,21 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class BasicCommands(TestCase):
+    def getname(self):
+        return "VADD, VDIM, VCARD basic usage"
+
+    def test(self):
+        # Test VADD
+        vec = generate_random_vector(4)
+        vec_bytes = struct.pack('4f', *vec)
+        result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+        assert result == 1, "VADD should return 1 for first item"
+
+        # Test VDIM
+        dim = self.redis.execute_command('VDIM', self.test_key)
+        assert dim == 4, f"VDIM should return 4, got {dim}"
+
+        # Test VCARD
+        card = self.redis.execute_command('VCARD', self.test_key)
+        assert card == 1, f"VCARD should return 1, got {card}"
--- a/modules/vector-sets/tests/basic_similarity.py
+++ b/modules/vector-sets/tests/basic_similarity.py
@ -0,0 +1,35 @@
+from test import TestCase
+
+class BasicSimilarity(TestCase):
+    def getname(self):
+        return "VSIM reported distance makes sense with 4D vectors"
+
+    def test(self):
+        # Add two very similar vectors, one different
+        vec1 = [1, 0, 0, 0]
+        vec2 = [0.99, 0.01, 0, 0]
+        vec3 = [0.1, 1, -1, 0.5]
+
+        # Add vectors using VALUES format
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, 
+                                 *[str(x) for x in vec1], f'{self.test_key}:item:1')
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, 
+                                 *[str(x) for x in vec2], f'{self.test_key}:item:2')
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, 
+                                 *[str(x) for x in vec3], f'{self.test_key}:item:3')
+
+        # Query similarity with vec1
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, 
+                                          *[str(x) for x in vec1], 'WITHSCORES')
+
+        # Convert results to dictionary
+        results_dict = {}
+        for i in range(0, len(result), 2):
+            key = result[i].decode()
+            score = float(result[i+1])
+            results_dict[key] = score
+
+        # Verify results
+        assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high"
+        assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity"
+        assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity"
--- a/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
+++ b/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
@ -0,0 +1,156 @@
+from test import TestCase, generate_random_vector
+import threading
+import time
+import struct
+
+class ThreadingStressTest(TestCase):
+    def getname(self):
+        return "Concurrent VADD/DEL/VSIM operations stress test"
+
+    def estimated_runtime(self):
+        return 10  # Test runs for 10 seconds
+
+    def test(self):
+        # Constants - easy to modify if needed
+        NUM_VADD_THREADS = 10
+        NUM_VSIM_THREADS = 1
+        NUM_DEL_THREADS = 1
+        TEST_DURATION = 10  # seconds
+        VECTOR_DIM = 100
+        DEL_INTERVAL = 1  # seconds
+
+        # Shared flags and state
+        stop_event = threading.Event()
+        error_list = []
+        error_lock = threading.Lock()
+
+        def log_error(thread_name, error):
+            with error_lock:
+                error_list.append(f"{thread_name}: {error}")
+
+        def vadd_worker(thread_id):
+            """Thread function to perform VADD operations"""
+            thread_name = f"VADD-{thread_id}"
+            try:
+                vector_count = 0
+                while not stop_event.is_set():
+                    try:
+                        # Generate random vector
+                        vec = generate_random_vector(VECTOR_DIM)
+                        vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec)
+
+                        # Add vector with CAS option
+                        self.redis.execute_command(
+                            'VADD',
+                            self.test_key,
+                            'FP32',
+                            vec_bytes,
+                            f'{self.test_key}:item:{thread_id}:{vector_count}',
+                            'CAS'
+                        )
+
+                        vector_count += 1
+
+                        # Small sleep to reduce CPU pressure
+                        if vector_count % 10 == 0:
+                            time.sleep(0.001)
+                    except Exception as e:
+                        log_error(thread_name, f"Error: {str(e)}")
+                        time.sleep(0.1)  # Slight backoff on error
+            except Exception as e:
+                log_error(thread_name, f"Thread error: {str(e)}")
+
+        def del_worker():
+            """Thread function that deletes the key periodically"""
+            thread_name = "DEL"
+            try:
+                del_count = 0
+                while not stop_event.is_set():
+                    try:
+                        # Sleep first, then delete
+                        time.sleep(DEL_INTERVAL)
+                        if stop_event.is_set():
+                            break
+
+                        self.redis.delete(self.test_key)
+                        del_count += 1
+                    except Exception as e:
+                        log_error(thread_name, f"Error: {str(e)}")
+            except Exception as e:
+                log_error(thread_name, f"Thread error: {str(e)}")
+
+        def vsim_worker(thread_id):
+            """Thread function to perform VSIM operations"""
+            thread_name = f"VSIM-{thread_id}"
+            try:
+                search_count = 0
+                while not stop_event.is_set():
+                    try:
+                        # Generate query vector
+                        query_vec = generate_random_vector(VECTOR_DIM)
+                        query_str = [str(x) for x in query_vec]
+
+                        # Perform similarity search
+                        args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM]
+                        args.extend(query_str)
+                        args.extend(['COUNT', 10])
+                        self.redis.execute_command(*args)
+
+                        search_count += 1
+
+                        # Small sleep to reduce CPU pressure
+                        if search_count % 10 == 0:
+                            time.sleep(0.005)
+                    except Exception as e:
+                        # Don't log empty array errors, as they're expected when key doesn't exist
+                        if "empty array" not in str(e).lower():
+                            log_error(thread_name, f"Error: {str(e)}")
+                        time.sleep(0.1)  # Slight backoff on error
+            except Exception as e:
+                log_error(thread_name, f"Thread error: {str(e)}")
+
+        # Start all threads
+        threads = []
+
+        # VADD threads
+        for i in range(NUM_VADD_THREADS):
+            thread = threading.Thread(target=vadd_worker, args=(i,))
+            thread.start()
+            threads.append(thread)
+
+        # DEL threads
+        for _ in range(NUM_DEL_THREADS):
+            thread = threading.Thread(target=del_worker)
+            thread.start()
+            threads.append(thread)
+
+        # VSIM threads
+        for i in range(NUM_VSIM_THREADS):
+            thread = threading.Thread(target=vsim_worker, args=(i,))
+            thread.start()
+            threads.append(thread)
+
+        # Let the test run for the specified duration
+        time.sleep(TEST_DURATION)
+
+        # Signal all threads to stop
+        stop_event.set()
+
+        # Wait for threads to finish
+        for thread in threads:
+            thread.join(timeout=2.0)
+
+        # Check if Redis is still responsive
+        try:
+            ping_result = self.redis.ping()
+            assert ping_result, "Redis did not respond to PING after stress test"
+        except Exception as e:
+            assert False, f"Redis connection failed after stress test: {str(e)}"
+
+        # Report any errors for diagnosis, but don't fail the test unless PING fails
+        if error_list:
+            error_count = len(error_list)
+            print(f"\nEncountered {error_count} errors during stress test.")
+            print("First 5 errors:")
+            for error in error_list[:5]:
+                print(f"- {error}")
--- a/modules/vector-sets/tests/concurrent_vsim_and_del.py
+++ b/modules/vector-sets/tests/concurrent_vsim_and_del.py
@ -0,0 +1,48 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import threading, time
+
+class ConcurrentVSIMAndDEL(TestCase):
+    def getname(self):
+        return "Concurrent VSIM and DEL operations"
+
+    def estimated_runtime(self):
+        return 2
+
+    def test(self):
+        # Fill the key with 5000 random vectors
+        dim = 128
+        count = 5000
+        fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+        # List to store results from threads
+        thread_results = []
+
+        def vsim_thread():
+            """Thread function to perform VSIM operations until the key is deleted"""
+            while True:
+                query_vec = generate_random_vector(dim)
+                result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+                                                   *[str(x) for x in query_vec], 'COUNT', 10)
+                if not result:
+                    # Empty array detected, key is deleted
+                    thread_results.append(True)
+                    break
+
+        # Start multiple threads to perform VSIM operations
+        threads = []
+        for _ in range(4):  # Start 4 threads
+            t = threading.Thread(target=vsim_thread)
+            t.start()
+            threads.append(t)
+
+        # Delete the key while threads are still running
+        time.sleep(1)
+        self.redis.delete(self.test_key)
+
+        # Wait for all threads to finish (they will exit once they detect the key is deleted)
+        for t in threads:
+            t.join()
+
+        # Verify that all threads detected an empty array or error
+        assert len(thread_results) == len(threads), "Not all threads detected the key deletion"
+        assert all(thread_results), "Some threads did not detect an empty array or error after DEL"
--- a/modules/vector-sets/tests/debug_digest.py
+++ b/modules/vector-sets/tests/debug_digest.py
@ -0,0 +1,39 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class DebugDigestTest(TestCase):
+    def getname(self):
+        return "[regression] DEBUG DIGEST-VALUE with attributes"
+
+    def test(self):
+        # Generate random vectors
+        vec1 = generate_random_vector(4)
+        vec2 = generate_random_vector(4)
+        vec_bytes1 = struct.pack('4f', *vec1)
+        vec_bytes2 = struct.pack('4f', *vec2)
+
+        # Add vectors to the key, one with attribute, one without
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
+
+        # Call DEBUG DIGEST-VALUE on the key
+        try:
+            digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+            assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value"
+
+            # Change attribute and verify digest changes
+            self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}')
+
+            digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+            assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change"
+            assert digest1 != digest2, "Digest should change when an attribute is modified"
+
+            # Remove attribute and verify digest changes again
+            self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '')
+
+            digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+            assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal"
+            assert digest2 != digest3, "Digest should change when an attribute is removed"
+
+        except Exception as e:
+            raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}")
--- a/modules/vector-sets/tests/deletion.py
+++ b/modules/vector-sets/tests/deletion.py
@ -0,0 +1,173 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+"""
+A note about this test:
+It was experimentally tried to modify hnsw.c in order to
+avoid calling hnsw_reconnect_nodes(). In this case, the test
+fails very often with EF set to 250, while it hardly
+fails at all with the same parameters if hnsw_reconnect_nodes()
+is called.
+
+Note that for the nature of the test (it is very strict) it can
+still fail from time to time, without this signaling any
+actual bug.
+"""
+
+class VREM(TestCase):
+    def getname(self):
+        return "Deletion and graph state after deletion"
+
+    def estimated_runtime(self):
+        return 2.0
+
+    def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None):
+        """Format neighbors with their similarity scores and status indicators"""
+        if not links_result:
+            return "No neighbors"
+
+        output = []
+        for level, neighbors in enumerate(links_result):
+            level_num = len(links_result) - level - 1
+            output.append(f"Level {level_num}:")
+
+            # Get neighbors and scores
+            neighbors_with_scores = []
+            for i in range(0, len(neighbors), 2):
+                neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i]
+                score = float(neighbors[i+1]) if i+1 < len(neighbors) else None
+                status = ""
+
+                # For old links, mark deleted ones
+                if items_to_remove and neighbor in items_to_remove:
+                    status = " [lost]"
+                # For new links, mark newly added ones
+                elif old_links is not None:
+                    # Check if this neighbor was in the old links at this level
+                    was_present = False
+                    if old_links and level < len(old_links):
+                        old_neighbors = [n.decode() if isinstance(n, bytes) else n
+                                      for n in old_links[level]]
+                        was_present = neighbor in old_neighbors
+                    if not was_present:
+                        status = " [gained]"
+
+                if score is not None:
+                    neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}")
+                else:
+                    neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}")
+
+            output.extend(["    " + n for n in neighbors_with_scores])
+        return "\n".join(output)
+
+    def test(self):
+        # 1. Fill server with random elements
+        dim = 128
+        count = 5000
+        data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+        # 2. Do VSIM to get 200 items
+        query_vec = generate_random_vector(dim)
+        results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+                                    *[str(x) for x in query_vec],
+                                    'COUNT', 200, 'WITHSCORES')
+
+        # Convert results to list of (item, score) pairs, sorted by score
+        items = []
+        for i in range(0, len(results), 2):
+            item = results[i].decode()
+            score = float(results[i+1])
+            items.append((item, score))
+        items.sort(key=lambda x: x[1], reverse=True)  # Sort by similarity
+
+        # Store the graph structure for all items before deletion
+        neighbors_before = {}
+        for item, _ in items:
+            links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES')
+            if links:  # Some items might not have links
+                neighbors_before[item] = links
+
+        # 3. Remove 100 random items
+        items_to_remove = set(item for item, _ in random.sample(items, 100))
+        # Keep track of top 10 non-removed items
+        top_remaining = []
+        for item, score in items:
+            if item not in items_to_remove:
+                top_remaining.append((item, score))
+                if len(top_remaining) == 10:
+                    break
+
+        # Remove the items
+        for item in items_to_remove:
+            result = self.redis.execute_command('VREM', self.test_key, item)
+            assert result == 1, f"VREM failed to remove {item}"
+
+        # 4. Do VSIM again with same vector
+        new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+                                        *[str(x) for x in query_vec],
+                                        'COUNT', 200, 'WITHSCORES',
+                                        'EF', 500)
+
+        # Convert new results to dict of item -> score
+        new_scores = {}
+        for i in range(0, len(new_results), 2):
+            item = new_results[i].decode()
+            score = float(new_results[i+1])
+            new_scores[item] = score
+
+        failure = False
+        failed_item = None
+        failed_reason = None
+        # 5. Verify all top 10 non-removed items are still found with similar scores
+        for item, old_score in top_remaining:
+            if item not in new_scores:
+                failure = True
+                failed_item = item
+                failed_reason = "missing"
+                break
+            new_score = new_scores[item]
+            if abs(new_score - old_score) >= 0.01:
+                failure = True
+                failed_item = item
+                failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}"
+                break
+
+        if failure:
+            print("\nTest failed!")
+            print(f"Problem with item: {failed_item} ({failed_reason})")
+
+            print("\nOriginal neighbors (with similarity scores):")
+            if failed_item in neighbors_before:
+                print(self.format_neighbors_with_scores(
+                    neighbors_before[failed_item], 
+                    items_to_remove=items_to_remove))
+            else:
+                print("No neighbors found in original graph")
+
+            print("\nCurrent neighbors (with similarity scores):")
+            current_links = self.redis.execute_command('VLINKS', self.test_key, 
+                                                     failed_item, 'WITHSCORES')
+            if current_links:
+                print(self.format_neighbors_with_scores(
+                    current_links,
+                    old_links=neighbors_before.get(failed_item)))
+            else:
+                print("No neighbors in current graph")
+
+            print("\nOriginal results (top 20):")
+            for item, score in items[:20]:
+                deleted = "[deleted]" if item in items_to_remove else ""
+                print(f"{item}: {score:.6f} {deleted}")
+
+            print("\nNew results after removal (top 20):")
+            new_items = []
+            for i in range(0, len(new_results), 2):
+                item = new_results[i].decode()
+                score = float(new_results[i+1])
+                new_items.append((item, score))
+            new_items.sort(key=lambda x: x[1], reverse=True)
+            for item, score in new_items[:20]:
+                print(f"{item}: {score:.6f}")
+
+            raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.")
+
--- a/modules/vector-sets/tests/dimension_validation.py
+++ b/modules/vector-sets/tests/dimension_validation.py
@ -0,0 +1,67 @@
+from test import TestCase, generate_random_vector
+import struct
+import redis.exceptions
+
+class DimensionValidation(TestCase):
+    def getname(self):
+        return "[regression] Dimension Validation with Projection"
+
+    def estimated_runtime(self):
+        return 0.5
+
+    def test(self):
+        # Test scenario 1: Create a set with projection
+        original_dim = 100
+        reduced_dim = 50
+
+        # Create the initial vector and set with projection
+        vec1 = generate_random_vector(original_dim)
+        vec1_bytes = struct.pack(f'{original_dim}f', *vec1)
+
+        # Add first vector with projection
+        result = self.redis.execute_command('VADD', self.test_key,
+                                          'REDUCE', reduced_dim,
+                                          'FP32', vec1_bytes, f'{self.test_key}:item:1')
+        assert result == 1, "First VADD with REDUCE should return 1"
+
+        # Check VINFO returns the correct projection information
+        info = self.redis.execute_command('VINFO', self.test_key)
+        info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])}
+        assert 'vector-dim' in info_map, "VINFO should contain vector-dim"
+        assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}"
+        assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim"
+        assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}"
+
+        # Test scenario 2: Try adding a mismatched vector - should fail
+        wrong_dim = 80
+        wrong_vec = generate_random_vector(wrong_dim)
+        wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec)
+
+        # This should fail with dimension mismatch error
+        try:
+            self.redis.execute_command('VADD', self.test_key,
+                                     'REDUCE', reduced_dim,
+                                     'FP32', wrong_vec_bytes, f'{self.test_key}:item:2')
+            assert False, "VADD with wrong dimension should fail"
+        except redis.exceptions.ResponseError as e:
+            assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}"
+
+        # Test scenario 3: Add a correctly-sized vector
+        vec2 = generate_random_vector(original_dim)
+        vec2_bytes = struct.pack(f'{original_dim}f', *vec2)
+
+        # This should succeed
+        result = self.redis.execute_command('VADD', self.test_key,
+                                          'REDUCE', reduced_dim,
+                                          'FP32', vec2_bytes, f'{self.test_key}:item:3')
+        assert result == 1, "VADD with correct dimensions should succeed"
+
+        # Check VSIM also validates input dimensions
+        wrong_query = generate_random_vector(wrong_dim)
+        try:
+            self.redis.execute_command('VSIM', self.test_key,
+                                     'VALUES', wrong_dim, *[str(x) for x in wrong_query],
+                                     'COUNT', 10)
+            assert False, "VSIM with wrong dimension should fail"
+        except redis.exceptions.ResponseError as e:
+            assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}"
--- a/modules/vector-sets/tests/evict_empty.py
+++ b/modules/vector-sets/tests/evict_empty.py
@ -0,0 +1,27 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class VREM_LastItemDeletesKey(TestCase):
+    def getname(self):
+        return "VREM last item deletes key"
+
+    def test(self):
+        # Generate a random vector
+        vec = generate_random_vector(4)
+        vec_bytes = struct.pack('4f', *vec)
+
+        # Add the vector to the key
+        result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+        assert result == 1, "VADD should return 1 for first item"
+
+        # Verify the key exists
+        exists = self.redis.exists(self.test_key)
+        assert exists == 1, "Key should exist after VADD"
+
+        # Remove the item
+        result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1')
+        assert result == 1, "VREM should return 1 for successful removal"
+
+        # Verify the key no longer exists
+        exists = self.redis.exists(self.test_key)
+        assert exists == 0, "Key should no longer exist after VREM of last item"
--- a/modules/vector-sets/tests/filter_expr.py
+++ b/modules/vector-sets/tests/filter_expr.py
@ -0,0 +1,177 @@
+from test import TestCase
+
+class VSIMFilterExpressions(TestCase):
+    def getname(self):
+        return "VSIM FILTER expressions basic functionality"
+
+    def test(self):
+        # Create a small set of vectors with different attributes
+
+        # Basic vectors for testing - all orthogonal for clear results
+        vec1 = [1, 0, 0, 0]
+        vec2 = [0, 1, 0, 0]
+        vec3 = [0, 0, 1, 0]
+        vec4 = [0, 0, 0, 1]
+        vec5 = [0.5, 0.5, 0, 0]
+
+        # Add vectors with various attributes
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+                                 *[str(x) for x in vec1], f'{self.test_key}:item:1')
+        self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1',
+                                  '{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}')
+
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+                                 *[str(x) for x in vec2], f'{self.test_key}:item:2')
+        self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2',
+                                  '{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}')
+
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+                                 *[str(x) for x in vec3], f'{self.test_key}:item:3')
+        self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3',
+                                  '{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}')
+
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+                                 *[str(x) for x in vec4], f'{self.test_key}:item:4')
+        # Item 4 has no attribute at all
+
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+                                 *[str(x) for x in vec5], f'{self.test_key}:item:5')
+        self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5',
+                                  'invalid json')  # Intentionally malformed JSON
+
+        # Test 1: Basic equality with numbers
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age == 25')
+        assert len(result) == 1, "Expected 1 result for age == 25"
+        assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25"
+
+        # Test 2: Greater than
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age > 25')
+        assert len(result) == 2, "Expected 2 results for age > 25"
+
+        # Test 3: Less than or equal
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age <= 30')
+        assert len(result) == 2, "Expected 2 results for age <= 30"
+
+        # Test 4: String equality
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.name == "Alice"')
+        assert len(result) == 1, "Expected 1 result for name == Alice"
+        assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice"
+
+        # Test 5: String inequality
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.name != "Alice"')
+        assert len(result) == 2, "Expected 2 results for name != Alice"
+
+        # Test 6: Boolean value
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.active')
+        assert len(result) == 1, "Expected 1 result for .active being true"
+
+        # Test 7: Logical AND
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age > 20 and .age < 30')
+        assert len(result) == 1, "Expected 1 result for 20 < age < 30"
+        assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30"
+
+        # Test 8: Logical OR
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age < 30 or .age > 35')
+        assert len(result) == 1, "Expected 1 result for age < 30 or age > 35"
+
+        # Test 9: Logical NOT
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '!(.age == 25)')
+        assert len(result) == 2, "Expected 2 results for NOT(age == 25)"
+
+        # Test 10: The "in" operator with array
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age in [25, 35]')
+        assert len(result) == 2, "Expected 2 results for age in [25, 35]"
+
+        # Test 11: The "in" operator with strings in array
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.name in ["Alice", "David"]')
+        assert len(result) == 1, "Expected 1 result for name in [Alice, David]"
+
+        # Test 12: Arithmetic operations - addition
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age + 10 > 40')
+        assert len(result) == 1, "Expected 1 result for age + 10 > 40"
+
+        # Test 13: Arithmetic operations - multiplication
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age * 2 > 60')
+        assert len(result) == 1, "Expected 1 result for age * 2 > 60"
+
+        # Test 14: Arithmetic operations - division
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age / 5 == 5')
+        assert len(result) == 1, "Expected 1 result for age / 5 == 5"
+
+        # Test 15: Arithmetic operations - modulo
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age % 2 == 0')
+        assert len(result) == 1, "Expected 1 result for age % 2 == 0"
+
+        # Test 16: Power operator
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age ** 2 > 900')
+        assert len(result) == 1, "Expected 1 result for age^2 > 900"
+
+        # Test 17: Missing attribute (should exclude items missing that attribute)
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.missing_field == "value"')
+        assert len(result) == 0, "Expected 0 results for missing_field == value"
+
+        # Test 18: No attribute set at all
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.any_field')
+        assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded"
+
+        # Test 19: Malformed JSON
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.any_field')
+        assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded"
+
+        # Test 20: Complex expression combining multiple operators
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")')
+        assert len(result) == 2, "Expected 2 results for the complex expression"
+        expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2']
+        assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression"
+
+        # Test 21: Parentheses to control operator precedence
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.age > (20 + 10)')
+        assert len(result) == 1, "Expected 1 result for age > (20 + 10)"
+
+        # Test 22: Array access (arrays evaluate to true)
+        result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+                                          *[str(x) for x in vec1],
+                                          'FILTER', '.scores')
+        assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)"
--- a/modules/vector-sets/tests/filter_int.py
+++ b/modules/vector-sets/tests/filter_int.py
@ -0,0 +1,668 @@
+from test import TestCase, generate_random_vector
+import struct
+import random
+import math
+import json
+import time
+
+class VSIMFilterAdvanced(TestCase):
+    def getname(self):
+        return "VSIM FILTER comprehensive functionality testing"
+
+    def estimated_runtime(self):
+        return 15  # This test might take up to 15 seconds for the large dataset
+
+    def setup(self):
+        super().setup()
+        self.dim = 32        # Vector dimension
+        self.count = 5000    # Number of vectors for large tests
+        self.small_count = 50 # Number of vectors for small/quick tests
+
+        # Categories for attributes
+        self.categories = ["electronics", "furniture", "clothing", "books", "food"]
+        self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"]
+        self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)]
+        self.years = list(range(2000, 2025))
+
+    def create_attributes(self, index):
+        """Create realistic attributes for a vector"""
+        category = random.choice(self.categories)
+        city = random.choice(self.cities)
+        min_price, max_price = random.choice(self.price_ranges)
+        price = round(random.uniform(min_price, max_price), 2)
+        year = random.choice(self.years)
+        in_stock = random.random() > 0.3  # 70% chance of being in stock
+        rating = round(random.uniform(1, 5), 1)
+        views = int(random.expovariate(1/1000))  # Exponential distribution for page views
+        tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"],
+                           k=random.randint(0, 3))
+
+        # Add some specific patterns for testing
+        # Every 10th item has a specific property combination for testing
+        is_premium = (index % 10 == 0)
+
+        # Create attributes dictionary
+        attrs = {
+            "id": index,
+            "category": category,
+            "location": city,
+            "price": price,
+            "year": year,
+            "in_stock": in_stock,
+            "rating": rating,
+            "views": views,
+            "tags": tags
+        }
+
+        if is_premium:
+            attrs["is_premium"] = True
+            attrs["special_features"] = ["premium", "warranty", "support"]
+
+        # Add sub-categories for more complex filters
+        if category == "electronics":
+            attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"])
+        elif category == "furniture":
+            attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"])
+        elif category == "clothing":
+            attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"])
+
+        # Add some intentionally missing fields for testing
+        if random.random() > 0.9:  # 10% chance of missing price
+            del attrs["price"]
+
+        # Some items have promotion field
+        if random.random() > 0.7:  # 30% chance of having a promotion
+            attrs["promotion"] = random.choice(["discount", "bundle", "gift"])
+
+        # Create invalid JSON for a small percentage of vectors
+        if random.random() > 0.98:  # 2% chance of having invalid JSON
+            return "{{invalid json}}"
+
+        return json.dumps(attrs)
+
+    def create_vectors_with_attributes(self, key, count):
+        """Create vectors and add attributes to them"""
+        vectors = []
+        names = []
+        attribute_map = {}  # To store attributes for verification
+
+        # Create vectors
+        for i in range(count):
+            vec = generate_random_vector(self.dim)
+            vectors.append(vec)
+            name = f"{key}:item:{i}"
+            names.append(name)
+
+            # Add to Redis
+            vec_bytes = struct.pack(f'{self.dim}f', *vec)
+            self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name)
+
+            # Create and add attributes
+            attrs = self.create_attributes(i)
+            self.redis.execute_command('VSETATTR', key, name, attrs)
+
+            # Store attributes for later verification
+            try:
+                attribute_map[name] = json.loads(attrs) if '{' in attrs else None
+            except json.JSONDecodeError:
+                attribute_map[name] = None
+
+        return vectors, names, attribute_map
+
+    def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10):
+        """Perform a linear search with filtering for verification"""
+        similarities = []
+        query_norm = math.sqrt(sum(x*x for x in query_vector))
+
+        if query_norm == 0:
+            return []
+
+        for i, vec in enumerate(vectors):
+            name = names[i]
+            attributes = attribute_map.get(name)
+
+            # Skip if doesn't match filter
+            if not self.matches_filter(attributes, filter_expr):
+                continue
+
+            vec_norm = math.sqrt(sum(x*x for x in vec))
+            if vec_norm == 0:
+                continue
+
+            dot_product = sum(a*b for a,b in zip(query_vector, vec))
+            cosine_sim = dot_product / (query_norm * vec_norm)
+            distance = 1.0 - cosine_sim
+            redis_similarity = 1.0 - (distance/2.0)
+            similarities.append((name, redis_similarity))
+
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:k]
+
+    def matches_filter(self, attributes, filter_expr):
+        """Filter matching for verification - uses Python eval to handle complex expressions"""
+        if attributes is None:
+            return False  # No attributes or invalid JSON
+
+        # Replace JSON path selectors with Python dictionary access
+        py_expr = filter_expr
+
+        # Handle `.field` notation (replace with attributes['field'])
+        i = 0
+        while i < len(py_expr):
+            if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()):
+                # Find the end of the selector (stops at operators or whitespace)
+                j = i + 1
+                while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'):
+                    j += 1
+
+                if j > i + 1:  # Found a valid selector
+                    field = py_expr[i+1:j]
+                    # Use a safe access pattern that returns a default value based on context
+                    py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:]
+                    i = i + len(f"attributes.get('{field}')")
+                else:
+                    i += 1
+            else:
+                i += 1
+
+        # Convert not operator if needed
+        py_expr = py_expr.replace('!', ' not ')
+
+        try:
+            # Custom evaluation that handles exceptions for missing fields
+            # by returning False for the entire expression
+
+            # Split the expression on logical operators
+            parts = []
+            for op in [' and ', ' or ']:
+                if op in py_expr:
+                    parts = py_expr.split(op)
+                    break
+
+            if not parts:  # No logical operators found
+                parts = [py_expr]
+
+            # Try to evaluate each part - if any part fails,
+            # the whole expression should fail
+            try:
+                result = eval(py_expr, {"attributes": attributes})
+                return bool(result)
+            except (TypeError, AttributeError):
+                # This typically happens when trying to compare None with
+                # numbers or other types, or when an attribute doesn't exist
+                return False
+            except Exception as e:
+                print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
+                return False
+
+        except Exception as e:
+            print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
+            return False
+
+    def safe_decode(self,item):
+        return item.decode() if isinstance(item, bytes) else item
+
+    def calculate_recall(self, redis_results, linear_results, k=10):
+        """Calculate recall (percentage of correct results retrieved)"""
+        redis_set = set(self.safe_decode(item) for item in redis_results)
+        linear_set = set(item[0] for item in linear_results[:k])
+
+        if not linear_set:
+            return 1.0  # If no linear results, consider it perfect recall
+
+        intersection = redis_set.intersection(linear_set)
+        return len(intersection) / len(linear_set)
+
+    def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None):
+        """Test recall for a given filter expression"""
+        # Create query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # First, get ground truth using linear scan
+        linear_results = self.filter_linear_search(
+            self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50)
+
+        # Calculate true selectivity from ground truth
+        true_selectivity = len(linear_results) / len(self.names) if self.names else 0
+
+        # Perform Redis search with filter
+        cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+        cmd_args.extend([str(x) for x in query_vec])
+        cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr])
+        if filter_ef:
+            cmd_args.extend(['FILTER-EF', filter_ef])
+
+        start_time = time.time()
+        redis_results = self.redis.execute_command(*cmd_args)
+        query_time = time.time() - start_time
+
+        # Convert Redis results to dict
+        redis_items = {}
+        for i in range(0, len(redis_results), 2):
+            key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i]
+            score = float(redis_results[i+1])
+            redis_items[key] = score
+
+        # Calculate metrics
+        recall = self.calculate_recall(redis_items.keys(), linear_results)
+        selectivity = len(redis_items) / len(self.names) if redis_items else 0
+
+        # Compare against the true selectivity from linear scan
+        assert abs(selectivity - true_selectivity) < 0.1, \
+            f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}"
+
+        # We expect high recall for standard parameters
+        if ef >= 500 and (filter_ef is None or filter_ef >= 1000):
+            try:
+                assert recall >= 0.7, \
+                    f"Low recall {recall:.2f} for filter '{filter_expr}'"
+            except AssertionError as e:
+                # Get items found in each set
+                redis_items_set = set(redis_items.keys())
+                linear_items_set = set(item[0] for item in linear_results)
+
+                # Find items in each set
+                only_in_redis = redis_items_set - linear_items_set
+                only_in_linear = linear_items_set - redis_items_set
+                in_both = redis_items_set & linear_items_set
+
+                # Build comprehensive debug message
+                debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})"
+                debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}"
+                debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})"
+                debug += f"\nItems only in Redis: {len(only_in_redis)}"
+                debug += f"\nItems only in Ground Truth: {len(only_in_linear)}"
+
+                # Show some example items from each set with their scores
+                if only_in_redis:
+                    debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:"
+                    sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True)
+                    for i, (item, score) in enumerate(sorted_redis[:5]):
+                        if item in only_in_redis:
+                            debug += f"\n  {i+1}. {item} (Score: {score:.4f})"
+
+                            # Show attribute that should match filter
+                            attr = self.attribute_map.get(item)
+                            if attr:
+                                debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
+
+                if only_in_linear:
+                    debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:"
+                    for i, (item, score) in enumerate(linear_results[:5]):
+                        if item in only_in_linear:
+                            debug += f"\n  {i+1}. {item} (Score: {score:.4f})"
+
+                            # Show attribute that should match filter
+                            attr = self.attribute_map.get(item)
+                            if attr:
+                                debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
+
+                # Help identify parsing issues
+                debug += "\n\nPARSING CHECK:"
+                debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'"
+
+                # Check for WITHSCORES handling issues
+                if len(redis_results) > 0 and len(redis_results) % 2 == 0:
+                    debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)"
+                    debug += f"\nFirst few results: {redis_results[:4]}"
+
+                # Check the filter implementation
+                debug += "\n\nFILTER IMPLEMENTATION CHECK:"
+                debug += f"\nFilter expression: '{filter_expr}'"
+                debug += "\nSample attribute matches from attribute_map:"
+                count_matching = 0
+                for i, (name, attrs) in enumerate(self.attribute_map.items()):
+                    if attrs and self.matches_filter(attrs, filter_expr):
+                        count_matching += 1
+                        if i < 3:  # Show first 3 matches
+                            debug += f"\n  - {name}: {attrs}"
+                debug += f"\nTotal items matching filter in attribute_map: {count_matching}"
+
+                # Check if results array handling could be wrong
+                debug += "\n\nRESULT ARRAYS CHECK:"
+                if len(linear_results) >= 1:
+                    debug += f"\nlinear_results[0]: {linear_results[0]}"
+                    if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2:
+                        debug += " (correct tuple format: (name, score))"
+                    else:
+                        debug += " (UNEXPECTED FORMAT!)"
+
+                # Debug sort order
+                debug += "\n\nSORTING CHECK:"
+                if len(linear_results) >= 2:
+                    debug += f"\nGround truth first item score: {linear_results[0][1]}"
+                    debug += f"\nGround truth second item score: {linear_results[1][1]}"
+                    debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}"
+
+                # Re-raise with detailed information
+                raise AssertionError(str(e) + debug)
+
+        return recall, selectivity, query_time, len(redis_items)
+
+    def test(self):
+        print(f"\nRunning comprehensive VSIM FILTER tests...")
+
+        # Create a larger dataset for testing
+        print(f"Creating dataset with {self.count} vectors and attributes...")
+        self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes(
+            self.test_key, self.count)
+
+        # ==== 1. Recall and Precision Testing ====
+        print("Testing recall for various filters...")
+
+        # Test basic filters with different selectivity
+        results = {}
+        results["category"] = self.test_recall_with_filter('.category == "electronics"')
+        results["price_high"] = self.test_recall_with_filter('.price > 1000')
+        results["in_stock"] = self.test_recall_with_filter('.in_stock')
+        results["rating"] = self.test_recall_with_filter('.rating >= 4')
+        results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500')
+
+        print("Filter | Recall | Selectivity | Time (ms) | Results")
+        print("----------------------------------------------------")
+        for name, (recall, selectivity, time_ms, count) in results.items():
+            print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}")
+
+        # ==== 2. Filter Selectivity Performance ====
+        print("\nTesting filter selectivity performance...")
+
+        # High selectivity (very few matches)
+        high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium')
+
+        # Medium selectivity
+        med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000')
+
+        # Low selectivity (many matches)
+        low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000')
+
+        print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms")
+        print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms")
+        print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms")
+
+        # ==== 3. FILTER-EF Parameter Testing ====
+        print("\nTesting FILTER-EF parameter...")
+
+        # Test with different FILTER-EF values
+        filter_expr = '.category == "electronics" and .price > 200'
+        ef_values = [100, 500, 2000, 5000]
+
+        print("FILTER-EF | Recall | Time (ms)")
+        print("-----------------------------")
+        for filter_ef in ef_values:
+            recall, _, query_time, _ = self.test_recall_with_filter(
+                filter_expr, ef=500, filter_ef=filter_ef)
+            print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}")
+
+        # Assert that higher FILTER-EF generally gives better recall
+        low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100)
+        high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000)
+
+        # This might not always be true due to randomness, but generally holds
+        # We use a softer assertion to avoid flaky tests
+        assert high_ef_recall >= low_ef_recall * 0.8, \
+            f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}"
+
+        # ==== 4. Complex Filter Expressions ====
+        print("\nTesting complex filter expressions...")
+
+        # Test a variety of complex expressions
+        complex_filters = [
+            '.price > 100 and (.category == "electronics" or .category == "furniture")',
+            '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)',
+            '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3',
+            '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)',
+            '.year > 2010 and !(.price < 100) and .in_stock'
+        ]
+
+        print("Expression | Results | Time (ms)")
+        print("-----------------------------")
+        for i, expr in enumerate(complex_filters):
+            try:
+                _, _, query_time, result_count = self.test_recall_with_filter(expr)
+                print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}")
+            except Exception as e:
+                print(f"Complex {i+1} | Error: {str(e)}")
+
+        # ==== 5. Attribute Type Testing ====
+        print("\nTesting different attribute types...")
+
+        type_filters = [
+            ('.price > 500', "Numeric"),
+            ('.category == "books"', "String equality"),
+            ('.in_stock', "Boolean"),
+            ('.tags in ["sale", "new"]', "Array membership"),
+            ('.rating * 2 > 8', "Arithmetic")
+        ]
+
+        for expr, type_name in type_filters:
+            try:
+                _, _, query_time, result_count = self.test_recall_with_filter(expr)
+                print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms")
+            except Exception as e:
+                print(f"{type_name:16} | {expr:30} | Error: {str(e)}")
+
+        # ==== 6. Filter + Count Interaction ====
+        print("\nTesting COUNT parameter with filters...")
+
+        filter_expr = '.category == "electronics"'
+        counts = [5, 20, 100]
+
+        for count in counts:
+            query_vec = generate_random_vector(self.dim)
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr])
+
+            results = self.redis.execute_command(*cmd_args)
+            result_count = len(results) // 2  # Divide by 2 because WITHSCORES returns pairs
+
+            # We expect result count to be at most the requested count
+            assert result_count <= count, f"Got {result_count} results with COUNT {count}"
+            print(f"COUNT {count:3} | Got {result_count:3} results")
+
+        # ==== 7. Edge Cases ====
+        print("\nTesting edge cases...")
+
+        # Test with no matching items
+        no_match_expr = '.category == "nonexistent_category"'
+        results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                           *[str(x) for x in generate_random_vector(self.dim)],
+                                           'FILTER', no_match_expr)
+        assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}"
+        print(f"No matching items: {len(results)} results (expected 0)")
+
+        # Test with invalid filter syntax
+        try:
+            self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                     *[str(x) for x in generate_random_vector(self.dim)],
+                                     'FILTER', '.category === "books"')  # Triple equals is invalid
+            assert False, "Expected error for invalid filter syntax"
+        except:
+            print("Invalid filter syntax correctly raised an error")
+
+        # Test with extremely long complex expression
+        long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)])
+        try:
+            results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                               *[str(x) for x in generate_random_vector(self.dim)],
+                                               'FILTER', long_expr)
+            print(f"Long expression: {len(results)} results")
+        except Exception as e:
+            print(f"Long expression error: {str(e)}")
+
+        print("\nComprehensive VSIM FILTER tests completed successfully")
+
+
+class VSIMFilterSelectivityTest(TestCase):
+    def getname(self):
+        return "VSIM FILTER selectivity performance benchmark"
+
+    def estimated_runtime(self):
+        return 8  # This test might take up to 8 seconds
+
+    def setup(self):
+        super().setup()
+        self.dim = 32
+        self.count = 10000
+        self.test_key = f"{self.test_key}:selectivity"  # Use a different key
+
+    def create_vector_with_age_attribute(self, name, age):
+        """Create a vector with a specific age attribute"""
+        vec = generate_random_vector(self.dim)
+        vec_bytes = struct.pack(f'{self.dim}f', *vec)
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+        self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age}))
+
+    def test(self):
+        print("\nRunning VSIM FILTER selectivity benchmark...")
+
+        # Create a dataset where we control the exact selectivity
+        print(f"Creating controlled dataset with {self.count} vectors...")
+
+        # Create vectors with age attributes from 1 to 100
+        for i in range(self.count):
+            age = (i % 100) + 1  # Ages from 1 to 100
+            name = f"{self.test_key}:item:{i}"
+            self.create_vector_with_age_attribute(name, age)
+
+        # Create a query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # Test filters with different selectivities
+        selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99]
+        results = []
+
+        print("\nSelectivity | Filter          | Results | Time (ms)")
+        print("--------------------------------------------------")
+
+        for target_selectivity in selectivities:
+            # Calculate age threshold for desired selectivity
+            # For example, age <= 10 gives 10% selectivity
+            age_threshold = int(target_selectivity * 100)
+            filter_expr = f'.age <= {age_threshold}'
+
+            # Run query and measure time
+            start_time = time.time()
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr])
+
+            results = self.redis.execute_command(*cmd_args)
+            query_time = time.time() - start_time
+
+            actual_selectivity = len(results) / min(100, int(target_selectivity * self.count))
+            print(f"{target_selectivity:.2f}      | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}")
+
+            # Add assertion to ensure reasonable performance for different selectivities
+            # For very selective queries (1%), we might need more exploration
+            if target_selectivity <= 0.05:
+                # For very selective queries, ensure we can find some results
+                assert len(results) > 0, f"No results found for {filter_expr}"
+            else:
+                # For less selective queries, performance should be reasonable
+                assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}"
+
+        print("\nSelectivity benchmark completed successfully")
+
+
+class VSIMFilterComparisonTest(TestCase):
+    def getname(self):
+        return "VSIM FILTER EF parameter comparison"
+
+    def estimated_runtime(self):
+        return 8  # This test might take up to 8 seconds
+
+    def setup(self):
+        super().setup()
+        self.dim = 32
+        self.count = 5000
+        self.test_key = f"{self.test_key}:efparams"  # Use a different key
+
+    def create_dataset(self):
+        """Create a dataset with specific attribute patterns for testing FILTER-EF"""
+        vectors = []
+        names = []
+
+        # Create vectors with category and quality score attributes
+        for i in range(self.count):
+            vec = generate_random_vector(self.dim)
+            name = f"{self.test_key}:item:{i}"
+
+            # Add vector to Redis
+            vec_bytes = struct.pack(f'{self.dim}f', *vec)
+            self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+
+            # Create attributes - we want a very selective filter
+            # Only 2% of items have category=premium AND quality>90
+            category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"])
+            quality = random.randint(1, 100)
+
+            attrs = {
+                "id": i,
+                "category": category,
+                "quality": quality
+            }
+
+            self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
+            vectors.append(vec)
+            names.append(name)
+
+        return vectors, names
+
+    def test(self):
+        print("\nRunning VSIM FILTER-EF parameter comparison...")
+
+        # Create dataset
+        vectors, names = self.create_dataset()
+
+        # Create a selective filter that matches ~2% of items
+        filter_expr = '.category == "premium" and .quality > 90'
+
+        # Create query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # Test different FILTER-EF values
+        ef_values = [50, 100, 500, 1000, 5000]
+        results = []
+
+        print("\nFILTER-EF | Results | Time (ms) | Notes")
+        print("---------------------------------------")
+
+        baseline_count = None
+
+        for ef in ef_values:
+            # Run query and measure time
+            start_time = time.time()
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef])
+
+            query_results = self.redis.execute_command(*cmd_args)
+            query_time = time.time() - start_time
+
+            # Set baseline for comparison
+            if baseline_count is None:
+                baseline_count = len(query_results)
+
+            recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0
+
+            notes = ""
+            if ef == 5000:
+                notes = "Baseline"
+            elif recall_rate < 0.5:
+                notes = "Low recall!"
+
+            print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}")
+            results.append((ef, len(query_results), query_time))
+
+        # If we have enough results at highest EF, check that recall improves with higher EF
+        if results[-1][1] >= 5:  # At least 5 results for highest EF
+            # Extract result counts
+            result_counts = [r[1] for r in results]
+
+            # The last result (highest EF) should typically find more results than the first (lowest EF)
+            # but we use a soft assertion to avoid flaky tests
+            assert result_counts[-1] >= result_counts[0], \
+                f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}"
+
+        print("\nFILTER-EF parameter comparison completed successfully")
--- a/modules/vector-sets/tests/large_scale.py
+++ b/modules/vector-sets/tests/large_scale.py
@ -0,0 +1,56 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+class LargeScale(TestCase):
+    def getname(self):
+        return "Large Scale Comparison"
+
+    def estimated_runtime(self):
+        return 10
+
+    def test(self):
+        dim = 300
+        count = 20000
+        k = 50
+
+        # Fill Redis and get reference data for comparison
+        random.seed(42)  # Make test deterministic
+        data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+        # Generate query vector
+        query_vec = generate_random_vector(dim)
+
+        # Get results from Redis with good exploration factor
+        redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, 
+                                             *[str(x) for x in query_vec],
+                                             'COUNT', k, 'WITHSCORES', 'EF', 500)
+
+        # Convert Redis results to dict
+        redis_results = {}
+        for i in range(0, len(redis_raw), 2):
+            key = redis_raw[i].decode()
+            score = float(redis_raw[i+1])
+            redis_results[key] = score
+
+        # Get results from linear scan
+        linear_results = data.find_k_nearest(query_vec, k)
+        linear_items = {name: score for name, score in linear_results}
+
+        # Compare overlap
+        redis_set = set(redis_results.keys())
+        linear_set = set(linear_items.keys())
+        overlap = len(redis_set & linear_set)
+
+        # If test fails, print comparison for debugging
+        if overlap < k * 0.7:
+            data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
+
+        assert overlap >= k * 0.7, \
+            f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
+
+        # Verify scores for common items
+        for item in redis_set & linear_set:
+            redis_score = redis_results[item]
+            linear_score = linear_items[item]
+            assert abs(redis_score - linear_score) < 0.01, \
+                f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
--- a/modules/vector-sets/tests/memory_usage.py
+++ b/modules/vector-sets/tests/memory_usage.py
@ -0,0 +1,36 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class MemoryUsageTest(TestCase):
+    def getname(self):
+        return "[regression] MEMORY USAGE with attributes"
+
+    def test(self):
+        # Generate random vectors
+        vec1 = generate_random_vector(4)
+        vec2 = generate_random_vector(4)
+        vec_bytes1 = struct.pack('4f', *vec1)
+        vec_bytes2 = struct.pack('4f', *vec2)
+
+        # Add vectors to the key, one with attribute, one without
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
+
+        # Get memory usage for the key
+        try:
+            memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
+            # If we got here without exception, the command worked
+            assert memory_usage > 0, "MEMORY USAGE should return a positive value"
+
+            # Add more attributes to increase complexity
+            self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}')
+
+            # Check memory usage again
+            new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
+            assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes"
+
+            # Memory usage should be higher after adding attributes
+            assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes"
+
+        except Exception as e:
+            raise AssertionError(f"MEMORY USAGE command failed: {str(e)}")
--- a/modules/vector-sets/tests/node_update.py
+++ b/modules/vector-sets/tests/node_update.py
@ -0,0 +1,85 @@
+from test import TestCase, generate_random_vector
+import struct
+import math
+import random
+
+class VectorUpdateAndClusters(TestCase):
+   def getname(self):
+       return "VADD vector update with cluster relocation"
+
+   def estimated_runtime(self):
+       return 2.0  # Should take around 2 seconds
+
+   def generate_cluster_vector(self, base_vec, noise=0.1):
+       """Generate a vector that's similar to base_vec with some noise."""
+       vec = [x + random.gauss(0, noise) for x in base_vec]
+       # Normalize
+       norm = math.sqrt(sum(x*x for x in vec))
+       return [x/norm for x in vec]
+
+   def test(self):
+       dim = 128
+       vectors_per_cluster = 5000
+
+       # Create two very different base vectors for our clusters
+       cluster1_base = generate_random_vector(dim)
+       cluster2_base = [-x for x in cluster1_base]  # Opposite direction
+
+       # Add vectors from first cluster
+       for i in range(vectors_per_cluster):
+           vec = self.generate_cluster_vector(cluster1_base)
+           vec_bytes = struct.pack(f'{dim}f', *vec)
+           self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+                                    f'{self.test_key}:cluster1:{i}')
+
+       # Add vectors from second cluster
+       for i in range(vectors_per_cluster):
+           vec = self.generate_cluster_vector(cluster2_base)
+           vec_bytes = struct.pack(f'{dim}f', *vec)
+           self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+                                    f'{self.test_key}:cluster2:{i}')
+
+       # Pick a test vector from cluster1
+       test_key = f'{self.test_key}:cluster1:0'
+
+       # Verify it's in cluster1 using VSIM
+       initial_vec = self.generate_cluster_vector(cluster1_base)
+       results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+                                          *[str(x) for x in initial_vec],
+                                          'COUNT', 100, 'WITHSCORES')
+
+       # Count how many cluster1 items are in top results
+       cluster1_count = sum(1 for i in range(0, len(results), 2)
+                          if b'cluster1' in results[i])
+       assert cluster1_count > 80, "Initial clustering check failed"
+
+       # Now update the test vector to be in cluster2
+       new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05)
+       vec_bytes = struct.pack(f'{dim}f', *new_vec)
+       self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key)
+
+       # Verify the embedding was actually updated using VEMB
+       emb_result = self.redis.execute_command('VEMB', self.test_key, test_key)
+       updated_vec = [float(x) for x in emb_result]
+
+       # Verify updated vector matches what we inserted
+       dot_product = sum(a*b for a,b in zip(updated_vec, new_vec))
+       similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) *
+                                 math.sqrt(sum(x*x for x in new_vec)))
+       assert similarity > 0.9, "Vector was not properly updated"
+
+       # Verify it's now in cluster2 using VSIM
+       results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+                                          *[str(x) for x in cluster2_base],
+                                          'COUNT', 100, 'WITHSCORES')
+
+       # Verify our updated vector is among top results
+       found = False
+       for i in range(0, len(results), 2):
+           if results[i].decode() == test_key:
+               found = True
+               similarity = float(results[i+1])
+               assert similarity > 0.80, f"Updated vector has low similarity: {similarity}"
+               break
+
+       assert found, "Updated vector not found in cluster2 proximity"
--- a/modules/vector-sets/tests/persistence.py
+++ b/modules/vector-sets/tests/persistence.py
@ -0,0 +1,83 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+class HNSWPersistence(TestCase):
+    def getname(self):
+        return "HNSW Persistence"
+
+    def estimated_runtime(self):
+        return 30
+
+    def _verify_results(self, key, dim, query_vec, reduced_dim=None):
+        """Run a query and return results dict"""
+        k = 10
+        args = ['VSIM', key]
+
+        if reduced_dim:
+            args.extend(['VALUES', dim])
+            args.extend([str(x) for x in query_vec])
+        else:
+            args.extend(['VALUES', dim])
+            args.extend([str(x) for x in query_vec])
+
+        args.extend(['COUNT', k, 'WITHSCORES'])
+        results = self.redis.execute_command(*args)
+
+        results_dict = {}
+        for i in range(0, len(results), 2):
+            key = results[i].decode()
+            score = float(results[i+1])
+            results_dict[key] = score
+        return results_dict
+
+    def test(self):
+        # Setup dimensions
+        dim = 128
+        reduced_dim = 32
+        count = 5000
+        random.seed(42)
+
+        # Create two datasets - one normal and one with dimension reduction
+        normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim)
+        projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected",
+                                               count, dim, reduced_dim)
+
+        # Generate query vectors we'll use before and after reload
+        query_vec_normal = generate_random_vector(dim)
+        query_vec_projected = generate_random_vector(dim)
+
+        # Get initial results for both sets
+        initial_normal = self._verify_results(f"{self.test_key}:normal", 
+                                            dim, query_vec_normal)
+        initial_projected = self._verify_results(f"{self.test_key}:projected", 
+                                               dim, query_vec_projected, reduced_dim)
+
+        # Force Redis to save and reload the dataset
+        self.redis.execute_command('DEBUG', 'RELOAD')
+
+        # Verify results after reload
+        reloaded_normal = self._verify_results(f"{self.test_key}:normal", 
+                                             dim, query_vec_normal)
+        reloaded_projected = self._verify_results(f"{self.test_key}:projected", 
+                                                dim, query_vec_projected, reduced_dim)
+
+        # Verify normal vectors results
+        assert len(initial_normal) == len(reloaded_normal), \
+            "Normal vectors: Result count mismatch before/after reload"
+
+        for key in initial_normal:
+            assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}"
+            assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \
+                f"Normal vectors: Score mismatch for {key}: " + \
+                f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}"
+
+        # Verify projected vectors results
+        assert len(initial_projected) == len(reloaded_projected), \
+            "Projected vectors: Result count mismatch before/after reload"
+
+        for key in initial_projected:
+            assert key in reloaded_projected, \
+                f"Projected vectors: Missing item after reload: {key}"
+            assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \
+                f"Projected vectors: Score mismatch for {key}: " + \
+                f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}"
--- a/modules/vector-sets/tests/reduce.py
+++ b/modules/vector-sets/tests/reduce.py
@ -0,0 +1,71 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+
+class Reduce(TestCase):
+    def getname(self):
+        return "Dimension Reduction"
+
+    def estimated_runtime(self):
+        return 0.2
+
+    def test(self):
+        original_dim = 100
+        reduced_dim = 80
+        count = 1000
+        k = 50  # Number of nearest neighbors to check
+
+        # Fill Redis with vectors using REDUCE and get reference data
+        data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
+
+        # Verify dimension is reduced
+        dim = self.redis.execute_command('VDIM', self.test_key)
+        assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
+
+        # Generate query vector and get nearest neighbors using Redis
+        query_vec = generate_random_vector(original_dim)
+        redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 
+                                             original_dim, *[str(x) for x in query_vec],
+                                             'COUNT', k, 'WITHSCORES')
+
+        # Convert Redis results to dict
+        redis_results = {}
+        for i in range(0, len(redis_raw), 2):
+            key = redis_raw[i].decode()
+            score = float(redis_raw[i+1])
+            redis_results[key] = score
+
+        # Get results from linear scan with original vectors
+        linear_results = data.find_k_nearest(query_vec, k)
+        linear_items = {name: score for name, score in linear_results}
+
+        # Compare overlap between reduced and non-reduced results
+        redis_set = set(redis_results.keys())
+        linear_set = set(linear_items.keys())
+        overlap = len(redis_set & linear_set)
+        overlap_ratio = overlap / k
+
+        # With random projection, we expect some loss of accuracy but should
+        # maintain at least some similarity structure.
+        # Note that gaussian distribution is the worse with this test, so
+        # in real world practice, things will be better.
+        min_expected_overlap = 0.1  # At least 10% overlap in top-k
+        assert overlap_ratio >= min_expected_overlap, \
+            f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
+
+        # For items that appear in both results, scores should be reasonably correlated
+        common_items = redis_set & linear_set
+        for item in common_items:
+            redis_score = redis_results[item]
+            linear_score = linear_items[item]
+            # Allow for some deviation due to dimensionality reduction
+            assert abs(redis_score - linear_score) < 0.2, \
+                f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
+
+        # If test fails, print comparison for debugging
+        if overlap_ratio < min_expected_overlap:
+            print("\nLow overlap in results. Details:")
+            print("\nTop results from linear scan (original vectors):")
+            for name, score in linear_results:
+                print(f"{name}: {score:.3f}")
+            print("\nTop results from Redis (reduced vectors):")
+            for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
+                print(f"{item}: {score:.3f}")
--- a/modules/vector-sets/tests/replication.py
+++ b/modules/vector-sets/tests/replication.py
@ -0,0 +1,92 @@
+from test import TestCase, generate_random_vector
+import struct
+import random
+import time
+
+class ComprehensiveReplicationTest(TestCase):
+    def getname(self):
+        return "Comprehensive Replication Test with mixed operations"
+
+    def estimated_runtime(self):
+        # This test will take longer than the default 100ms
+        return 20.0  # 20 seconds estimate
+
+    def test(self):
+        # Setup replication between primary and replica
+        assert self.setup_replication(), "Failed to setup replication"
+
+        # Test parameters
+        num_vectors = 5000
+        vector_dim = 8
+        delete_probability = 0.1
+        cas_probability = 0.3
+
+        # Keep track of added items for potential deletion
+        added_items = []
+
+        # Add vectors and occasionally delete
+        for i in range(num_vectors):
+            # Generate a random vector
+            vec = generate_random_vector(vector_dim)
+            vec_bytes = struct.pack(f'{vector_dim}f', *vec)
+            item_name = f"{self.test_key}:item:{i}"
+
+            # Decide whether to use CAS or not
+            use_cas = random.random() < cas_probability
+
+            if use_cas and added_items:
+                # Get an existing item for CAS reference (if available)
+                cas_item = random.choice(added_items)
+                try:
+                    # Add with CAS
+                    result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+                                                   item_name, 'CAS')
+                    # Only add to our list if actually added (CAS might fail)
+                    if result == 1:
+                        added_items.append(item_name)
+                except Exception as e:
+                    print(f"  CAS VADD failed: {e}")
+            else:
+                try:
+                    # Add without CAS
+                    result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name)
+                    # Only add to our list if actually added
+                    if result == 1:
+                        added_items.append(item_name)
+                except Exception as e:
+                    print(f"  VADD failed: {e}")
+
+            # Randomly delete items (with 10% probability)
+            if random.random() < delete_probability and added_items:
+                try:
+                    # Select a random item to delete
+                    item_to_delete = random.choice(added_items)
+                    # Delete the item using VREM (not VDEL)
+                    self.redis.execute_command('VREM', self.test_key, item_to_delete)
+                    # Remove from our list
+                    added_items.remove(item_to_delete)
+                except Exception as e:
+                    print(f"  VREM failed: {e}")
+
+        # Allow time for replication to complete
+        time.sleep(2.0)
+
+        # Verify final VCARD matches
+        primary_card = self.redis.execute_command('VCARD', self.test_key)
+        replica_card = self.replica.execute_command('VCARD', self.test_key)
+        assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}"
+
+        # Verify VDIM matches
+        primary_dim = self.redis.execute_command('VDIM', self.test_key)
+        replica_dim = self.replica.execute_command('VDIM', self.test_key)
+        assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}"
+
+        # Verify digests match using DEBUG DIGEST
+        primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+        replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+        assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}"
+
+        # Print summary
+        print(f"\n  Added and maintained {len(added_items)} vectors with dimension {vector_dim}")
+        print(f"  Final vector count: {primary_card}")
+        print(f"  Final digest: {primary_digest[0].decode()}")
--- a/modules/vector-sets/tests/vadd_cas.py
+++ b/modules/vector-sets/tests/vadd_cas.py
@ -0,0 +1,98 @@
+from test import TestCase, generate_random_vector
+import threading
+import struct
+import math
+import time
+import random
+from typing import List, Dict
+
+class ConcurrentCASTest(TestCase):
+    def getname(self):
+        return "Concurrent VADD with CAS"
+
+    def estimated_runtime(self):
+        return 1.5
+
+    def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int,
+              dim: int, results: Dict[str, bool]):
+        """Worker thread that adds a subset of vectors using VADD CAS"""
+        for i in range(start_idx, end_idx):
+            vec = vectors[i]
+            name = f"{self.test_key}:item:{i}"
+            vec_bytes = struct.pack(f'{dim}f', *vec)
+
+            # Try to add the vector with CAS
+            try:
+                result = self.redis.execute_command('VADD', self.test_key, 'FP32',
+                                                  vec_bytes, name, 'CAS')
+                results[name] = (result == 1)  # Store if it was actually added
+            except Exception as e:
+                results[name] = False
+                print(f"Error adding {name}: {e}")
+
+    def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float:
+        """Calculate cosine similarity between two vectors"""
+        dot_product = sum(a*b for a,b in zip(vec1, vec2))
+        norm1 = math.sqrt(sum(x*x for x in vec1))
+        norm2 = math.sqrt(sum(x*x for x in vec2))
+        return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0
+
+    def test(self):
+        # Test parameters
+        dim = 128
+        total_vectors = 5000
+        num_threads = 8
+        vectors_per_thread = total_vectors // num_threads
+
+        # Generate all vectors upfront
+        random.seed(42)  # For reproducibility
+        vectors = [generate_random_vector(dim) for _ in range(total_vectors)]
+
+        # Prepare threads and results dictionary
+        threads = []
+        results = {}  # Will store success/failure for each vector
+
+        # Launch threads
+        for i in range(num_threads):
+            start_idx = i * vectors_per_thread
+            end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors
+            thread = threading.Thread(target=self.worker,
+                                   args=(vectors, start_idx, end_idx, dim, results))
+            threads.append(thread)
+            thread.start()
+
+        # Wait for all threads to complete
+        for thread in threads:
+            thread.join()
+
+        # Verify cardinality
+        card = self.redis.execute_command('VCARD', self.test_key)
+        assert card == total_vectors, \
+            f"Expected {total_vectors} elements, but found {card}"
+
+        # Verify each vector
+        num_verified = 0
+        for i in range(total_vectors):
+            name = f"{self.test_key}:item:{i}"
+
+            # Verify the item was successfully added
+            assert results[name], f"Vector {name} was not successfully added"
+
+            # Get the stored vector
+            stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name)
+            stored_vec = [float(x) for x in stored_vec_raw]
+
+            # Verify vector dimensions
+            assert len(stored_vec) == dim, \
+                f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}"
+
+            # Calculate similarity with original vector
+            similarity = self.verify_vector_similarity(vectors[i], stored_vec)
+            assert similarity > 0.99, \
+                f"Low similarity ({similarity}) for {name}"
+
+            num_verified += 1
+
+        # Final verification
+        assert num_verified == total_vectors, \
+            f"Only verified {num_verified} out of {total_vectors} vectors"
--- a/modules/vector-sets/tests/vemb.py
+++ b/modules/vector-sets/tests/vemb.py
@ -0,0 +1,41 @@
+from test import TestCase
+import struct
+import math
+
+class VEMB(TestCase):
+    def getname(self):
+        return "VEMB Command"
+
+    def test(self):
+        dim = 4
+
+        # Add same vector in both formats
+        vec = [1, 0, 0, 0]
+        norm = math.sqrt(sum(x*x for x in vec))
+        vec = [x/norm for x in vec]  # Normalize the vector
+
+        # Add using FP32
+        vec_bytes = struct.pack(f'{dim}f', *vec)
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+
+        # Add using VALUES
+        self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, 
+                         *[str(x) for x in vec], f'{self.test_key}:item:2')
+
+        # Get both back with VEMB
+        result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1')
+        result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2')
+
+        retrieved_vec1 = [float(x) for x in result1]
+        retrieved_vec2 = [float(x) for x in result2]
+
+        # Compare both vectors with original (allow for small quantization errors)
+        for i in range(dim):
+            assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \
+                f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}"
+            assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \
+                f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}"
+
+        # Test non-existent item
+        result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent')
+        assert result is None, "Non-existent item should return nil"
--- a/modules/vector-sets/tests/vrandmember.py
+++ b/modules/vector-sets/tests/vrandmember.py
@ -0,0 +1,55 @@
+from test import TestCase, generate_random_vector, fill_redis_with_vectors
+import struct
+
+class VRANDMEMBERTest(TestCase):
+    def getname(self):
+        return "VRANDMEMBER basic functionality"
+
+    def test(self):
+        # Test with empty key
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key)
+        assert result is None, "VRANDMEMBER on non-existent key should return NULL"
+
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5)
+        assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array"
+
+        # Fill with vectors
+        dim = 4
+        count = 100
+        data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+        # Test single random member
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key)
+        assert result is not None, "VRANDMEMBER should return a random member"
+        assert result.decode() in data.names, "Random member should be in the set"
+
+        # Test multiple unique members (positive count)
+        positive_count = 10
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count)
+        assert isinstance(result, list), "VRANDMEMBER with positive count should return an array"
+        assert len(result) == positive_count, f"Should return {positive_count} members"
+
+        # Check for uniqueness
+        decoded_results = [r.decode() for r in result]
+        assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count"
+        for item in decoded_results:
+            assert item in data.names, "All returned items should be in the set"
+
+        # Test more members than in the set
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10)
+        assert len(result) == count, "Should return only the available members when asking for more than exist"
+
+        # Test with duplicates (negative count)
+        negative_count = -20
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count)
+        assert isinstance(result, list), "VRANDMEMBER with negative count should return an array"
+        assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members"
+
+        # Check that all returned elements are valid
+        decoded_results = [r.decode() for r in result]
+        for item in decoded_results:
+            assert item in data.names, "All returned items should be in the set"
+
+        # Test with count = 0 (edge case)
+        result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0)
+        assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array"
--- a/modules/vector-sets/vset.c
+++ b/modules/vector-sets/vset.c
--- a/modules/vector-sets/w2v.c
+++ b/modules/vector-sets/w2v.c
@ -0,0 +1,510 @@
+/*
+ * HNSW (Hierarchical Navigable Small World) Implementation
+ * Based on the paper by Yu. A. Malkov, D. A. Yashunin
+ *
+ * Copyright(C) 2024-Present, Redis Ltd. All Rights Reserved.
+ * Originally authored by: Salvatore Sanfilippo
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <math.h>
+
+#include "hnsw.h"
+
+/* Get current time in milliseconds */
+uint64_t ms_time(void) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
+}
+
+/* Implementation of the recall test with random vectors. */
+void test_recall(HNSW *index, int ef) {
+    const int num_test_vectors = 10000;
+    const int k = 100; // Number of nearest neighbors to find.
+    if (ef < k) ef = k;
+
+    // Add recall distribution counters (2% bins from 0-100%).
+    int recall_bins[50] = {0};
+
+    // Create array to store vectors for mixing.
+    int num_source_vectors = 1000; // Enough, since we mix them.
+    float **source_vectors = malloc(sizeof(float*) * num_source_vectors);
+    if (!source_vectors) {
+        printf("Failed to allocate memory for source vectors\n");
+        return;
+    }
+
+    // Allocate memory for each source vector.
+    for (int i = 0; i < num_source_vectors; i++) {
+        source_vectors[i] = malloc(sizeof(float) * 300);
+        if (!source_vectors[i]) {
+            printf("Failed to allocate memory for source vector %d\n", i);
+            // Clean up already allocated vectors.
+            for (int j = 0; j < i; j++) free(source_vectors[j]);
+            free(source_vectors);
+            return;
+        }
+    }
+
+    /* Populate source vectors from the index, we just scan the
+     * first N items. */
+    int source_count = 0;
+    hnswNode *current = index->head;
+    while (current && source_count < num_source_vectors) {
+        hnsw_get_node_vector(index, current, source_vectors[source_count]);
+        source_count++;
+        current = current->next;
+    }
+
+    if (source_count < num_source_vectors) {
+        printf("Warning: Only found %d nodes for source vectors\n",
+            source_count);
+        num_source_vectors = source_count;
+    }
+
+    // Allocate memory for test vector.
+    float *test_vector = malloc(sizeof(float) * 300);
+    if (!test_vector) {
+        printf("Failed to allocate memory for test vector\n");
+        for (int i = 0; i < num_source_vectors; i++) {
+            free(source_vectors[i]);
+        }
+        free(source_vectors);
+        return;
+    }
+
+    // Allocate memory for results.
+    hnswNode **hnsw_results = malloc(sizeof(hnswNode*) * ef);
+    hnswNode **linear_results = malloc(sizeof(hnswNode*) * ef);
+    float *hnsw_distances = malloc(sizeof(float) * ef);
+    float *linear_distances = malloc(sizeof(float) * ef);
+
+    if (!hnsw_results || !linear_results || !hnsw_distances || !linear_distances) {
+        printf("Failed to allocate memory for results\n");
+        if (hnsw_results) free(hnsw_results);
+        if (linear_results) free(linear_results);
+        if (hnsw_distances) free(hnsw_distances);
+        if (linear_distances) free(linear_distances);
+        for (int i = 0; i < num_source_vectors; i++) free(source_vectors[i]);
+        free(source_vectors);
+        free(test_vector);
+        return;
+    }
+
+    // Initialize random seed.
+    srand(time(NULL));
+
+    // Perform recall test.
+    printf("\nPerforming recall test with EF=%d on %d random vectors...\n",
+           ef, num_test_vectors);
+    double total_recall = 0.0;
+
+    for (int t = 0; t < num_test_vectors; t++) {
+        // Create a random vector by mixing 3 existing vectors.
+        float weights[3] = {0.0};
+        int src_indices[3] = {0};
+
+        // Generate random weights.
+        float weight_sum = 0.0;
+        for (int i = 0; i < 3; i++) {
+            weights[i] = (float)rand() / RAND_MAX;
+            weight_sum += weights[i];
+            src_indices[i] = rand() % num_source_vectors;
+        }
+
+        // Normalize weights.
+        for (int i = 0; i < 3; i++) weights[i] /= weight_sum;
+
+        // Mix vectors.
+        memset(test_vector, 0, sizeof(float) * 300);
+        for (int i = 0; i < 3; i++) {
+            for (int j = 0; j < 300; j++) {
+                test_vector[j] +=
+                    weights[i] * source_vectors[src_indices[i]][j];
+            }
+        }
+
+        // Perform HNSW search with the specified EF parameter.
+        int slot = hnsw_acquire_read_slot(index);
+        int hnsw_found = hnsw_search(index, test_vector, ef, hnsw_results, hnsw_distances, slot, 0);
+
+        // Perform linear search (ground truth).
+        int linear_found = hnsw_ground_truth_with_filter(index, test_vector, ef, linear_results, linear_distances, slot, 0, NULL, NULL);
+        hnsw_release_read_slot(index, slot);
+
+        // Calculate recall for this query (intersection size / k).
+        if (hnsw_found > k) hnsw_found = k;
+        if (linear_found > k) linear_found = k;
+        int intersection_count = 0;
+        for (int i = 0; i < linear_found; i++) {
+            for (int j = 0; j < hnsw_found; j++) {
+                if (linear_results[i] == hnsw_results[j]) {
+                    intersection_count++;
+                    break;
+                }
+            }
+        }
+
+        double recall = (double)intersection_count / linear_found;
+        total_recall += recall;
+
+        // Add to distribution bins (2% steps)
+        int bin_index = (int)(recall * 50);
+        if (bin_index >= 50) bin_index = 49; // Handle 100% recall case
+        recall_bins[bin_index]++;
+
+        // Show progress.
+        if ((t+1) % 1000 == 0 || t == num_test_vectors-1) {
+            printf("Processed %d/%d queries, current avg recall: %.2f%%\n",
+                t+1, num_test_vectors, (total_recall / (t+1)) * 100);
+        }
+    }
+
+    // Calculate and print final average recall.
+    double avg_recall = (total_recall / num_test_vectors) * 100;
+    printf("\nRecall Test Results:\n");
+    printf("Average recall@%d (EF=%d): %.2f%%\n", k, ef, avg_recall);
+
+    // Print recall distribution histogram.
+    printf("\nRecall Distribution (2%% bins):\n");
+    printf("================================\n");
+
+    // Find the maximum bin count for scaling.
+    int max_count = 0;
+    for (int i = 0; i < 50; i++) {
+        if (recall_bins[i] > max_count) max_count = recall_bins[i];
+    }
+
+    // Scale factor for histogram (max 50 chars wide)
+    const int max_bars = 50;
+    double scale = (max_count > max_bars) ? (double)max_bars / max_count : 1.0;
+
+    // Print the histogram.
+    for (int i = 0; i < 50; i++) {
+        int bar_len = (int)(recall_bins[i] * scale);
+        printf("%3d%%-%-3d%% | %-6d |", i*2, (i+1)*2, recall_bins[i]);
+        for (int j = 0; j < bar_len; j++) printf("#");
+        printf("\n");
+    }
+
+    // Cleanup.
+    free(hnsw_results);
+    free(linear_results);
+    free(hnsw_distances);
+    free(linear_distances);
+    free(test_vector);
+    for (int i = 0; i < num_source_vectors; i++) free(source_vectors[i]);
+    free(source_vectors);
+}
+
+/* Example usage in main() */
+int w2v_single_thread(int m_param, int quantization, uint64_t numele, int massdel, int self_recall, int recall_ef) {
+    /* Create index */
+    HNSW *index = hnsw_new(300, quantization, m_param);
+    float v[300];
+    uint16_t wlen;
+
+    FILE *fp = fopen("word2vec.bin","rb");
+    if (fp == NULL) {
+        perror("word2vec.bin file missing");
+        exit(1);
+    }
+    unsigned char header[8];
+    fread(header,8,1,fp); // Skip header
+
+    uint64_t id = 0;
+    uint64_t start_time = ms_time();
+    char *word = NULL;
+    hnswNode *search_node = NULL;
+
+    while(id < numele) {
+        if (fread(&wlen,2,1,fp) == 0) break;
+        word = malloc(wlen+1);
+        fread(word,wlen,1,fp);
+        word[wlen] = 0;
+        fread(v,300*sizeof(float),1,fp);
+
+        // Plain API that acquires a write lock for the whole time.
+        hnswNode *added = hnsw_insert(index, v, NULL, 0, id++, word, 200);
+
+        if (!strcmp(word,"banana")) search_node = added;
+        if (!(id % 10000)) printf("%llu added\n", (unsigned long long)id);
+    }
+    uint64_t elapsed = ms_time() - start_time;
+    fclose(fp);
+
+    printf("%llu words added (%llu words/sec), last word: %s\n",
+        (unsigned long long)index->node_count,
+        (unsigned long long)id*1000/elapsed, word);
+
+    /* Search query */
+    if (search_node == NULL) search_node = index->head;
+    hnsw_get_node_vector(index,search_node,v);
+    hnswNode *neighbors[10];
+    float distances[10];
+
+    int found, j;
+    start_time = ms_time();
+    for (j = 0; j < 20000; j++)
+        found = hnsw_search(index, v, 10, neighbors, distances, 0, 0);
+    elapsed = ms_time() - start_time;
+    printf("%d searches performed (%llu searches/sec), nodes found: %d\n",
+        j, (unsigned long long)j*1000/elapsed, found);
+
+    if (found > 0) {
+        printf("Found %d neighbors:\n", found);
+        for (int i = 0; i < found; i++) {
+            printf("Node ID: %llu, distance: %f, word: %s\n",
+                   (unsigned long long)neighbors[i]->id,
+                   distances[i], (char*)neighbors[i]->value);
+        }
+    }
+
+    // Self-recall test (ability to find the node by its own vector).
+    if (self_recall) {
+        hnsw_print_stats(index);
+        hnsw_test_graph_recall(index,200,0);
+    }
+
+    // Recall test with random vectors.
+    if (recall_ef > 0) {
+        test_recall(index, recall_ef);
+    }
+
+    uint64_t connected_nodes;
+    int reciprocal_links;
+    hnsw_validate_graph(index, &connected_nodes, &reciprocal_links);
+
+    if (massdel) {
+        int remove_perc = 95;
+        printf("\nRemoving %d%% of nodes...\n", remove_perc);
+        uint64_t initial_nodes = index->node_count;
+
+        hnswNode *current = index->head;
+        while (current && index->node_count > initial_nodes*(100-remove_perc)/100) {
+            hnswNode *next = current->next;
+            hnsw_delete_node(index,current,free);
+            current = next;
+            // In order to don't remove only contiguous nodes, from time
+            // skip a node.
+            if (current && !(random() % remove_perc)) current = current->next;
+        }
+        printf("%llu nodes left\n", (unsigned long long)index->node_count);
+
+        // Test again.
+        hnsw_validate_graph(index, &connected_nodes, &reciprocal_links);
+        hnsw_test_graph_recall(index,200,0);
+    }
+
+    hnsw_free(index,free);
+    return 0;
+}
+
+struct threadContext {
+    pthread_mutex_t FileAccessMutex;
+    uint64_t numele;
+    _Atomic uint64_t SearchesDone;
+    _Atomic uint64_t id;
+    FILE *fp;
+    HNSW *index;
+    float *search_vector;
+};
+
+// Note that in practical terms inserting with many concurrent threads
+// may be *slower* and not faster, because there is a lot of
+// contention. So this is more a robustness test than anything else.
+//
+// The optimistic commit API goal is actually to exploit the ability to
+// add faster when there are many concurrent reads.
+void *threaded_insert(void *ctxptr) {
+    struct threadContext *ctx = ctxptr;
+    char *word;
+    float v[300];
+    uint16_t wlen;
+
+    while(1) {
+        pthread_mutex_lock(&ctx->FileAccessMutex);
+        if (fread(&wlen,2,1,ctx->fp) == 0) break;
+        pthread_mutex_unlock(&ctx->FileAccessMutex);
+        word = malloc(wlen+1);
+        fread(word,wlen,1,ctx->fp);
+        word[wlen] = 0;
+        fread(v,300*sizeof(float),1,ctx->fp);
+
+        // Check-and-set API that performs the costly scan for similar
+        // nodes concurrently with other read threads, and finally
+        // applies the check if the graph wasn't modified.
+        InsertContext *ic;
+        uint64_t next_id = ctx->id++;
+        ic = hnsw_prepare_insert(ctx->index, v, NULL, 0, next_id, 200);
+        if (hnsw_try_commit_insert(ctx->index, ic, word) == NULL) {
+            // This time try locking since the start.
+            hnsw_insert(ctx->index, v, NULL, 0, next_id, word, 200);
+        }
+
+        if (next_id >= ctx->numele) break;
+        if (!((next_id+1) % 10000))
+            printf("%llu added\n", (unsigned long long)next_id+1);
+    }
+    return NULL;
+}
+
+void *threaded_search(void *ctxptr) {
+    struct threadContext *ctx = ctxptr;
+
+    /* Search query */
+    hnswNode *neighbors[10];
+    float distances[10];
+    int found = 0;
+    uint64_t last_id = 0;
+
+    while(ctx->id < 1000000) {
+        int slot = hnsw_acquire_read_slot(ctx->index);
+        found = hnsw_search(ctx->index, ctx->search_vector, 10, neighbors, distances, slot, 0);
+        hnsw_release_read_slot(ctx->index,slot);
+        last_id = ++ctx->id;
+    }
+
+    if (found > 0 && last_id == 1000000) {
+        printf("Found %d neighbors:\n", found);
+        for (int i = 0; i < found; i++) {
+            printf("Node ID: %llu, distance: %f, word: %s\n",
+                   (unsigned long long)neighbors[i]->id,
+                   distances[i], (char*)neighbors[i]->value);
+        }
+    }
+    return NULL;
+}
+
+int w2v_multi_thread(int m_param, int numthreads, int quantization, uint64_t numele) {
+    /* Create index */
+    struct threadContext ctx;
+
+    ctx.index = hnsw_new(300, quantization, m_param);
+
+    ctx.fp = fopen("word2vec.bin","rb");
+    if (ctx.fp == NULL) {
+        perror("word2vec.bin file missing");
+        exit(1);
+    }
+
+    unsigned char header[8];
+    fread(header,8,1,ctx.fp); // Skip header
+    pthread_mutex_init(&ctx.FileAccessMutex,NULL);
+
+    uint64_t start_time = ms_time();
+    ctx.id = 0;
+    ctx.numele = numele;
+    pthread_t threads[numthreads];
+    for (int j = 0; j < numthreads; j++)
+        pthread_create(&threads[j], NULL, threaded_insert, &ctx);
+
+    // Wait for all the threads to terminate adding items.
+    for (int j = 0; j < numthreads; j++)
+        pthread_join(threads[j],NULL);
+
+    uint64_t elapsed = ms_time() - start_time;
+    fclose(ctx.fp);
+
+    // Obtain the last word.
+    hnswNode *node = ctx.index->head;
+    char *word = node->value;
+
+    // We will search this last inserted word in the next test.
+    // Let's save its embedding.
+    ctx.search_vector = malloc(sizeof(float)*300);
+    hnsw_get_node_vector(ctx.index,node,ctx.search_vector);
+
+    printf("%llu words added (%llu words/sec), last word: %s\n",
+        (unsigned long long)ctx.index->node_count,
+        (unsigned long long)ctx.id*1000/elapsed, word);
+
+    /* Search query */
+    start_time = ms_time();
+    ctx.id = 0; // We will use this atomic field to stop at N queries done.
+
+    for (int j = 0; j < numthreads; j++)
+        pthread_create(&threads[j], NULL, threaded_search, &ctx);
+
+    // Wait for all the threads to terminate searching.
+    for (int j = 0; j < numthreads; j++)
+        pthread_join(threads[j],NULL);
+
+    elapsed = ms_time() - start_time;
+    printf("%llu searches performed (%llu searches/sec)\n",
+        (unsigned long long)ctx.id,
+        (unsigned long long)ctx.id*1000/elapsed);
+
+    hnsw_print_stats(ctx.index);
+    uint64_t connected_nodes;
+    int reciprocal_links;
+    hnsw_validate_graph(ctx.index, &connected_nodes, &reciprocal_links);
+    printf("%llu connected nodes. Links all reciprocal: %d\n",
+        (unsigned long long)connected_nodes, reciprocal_links);
+    hnsw_free(ctx.index,free);
+    return 0;
+}
+
+int main(int argc, char **argv) {
+    int quantization = HNSW_QUANT_NONE;
+    int numthreads = 0;
+    uint64_t numele = 20000;
+    int m_param = 0;  // Default value (0 means use HNSW_DEFAULT_M)
+
+    /* This you can enable in single thread mode for testing: */
+    int massdel = 0;       // If true, does the mass deletion test.
+    int self_recall = 0;   // If true, does the self-recall test.
+    int recall_ef = 0;     // If not 0, does the recall test with this EF value.
+
+    for (int j = 1; j < argc; j++) {
+        int moreargs = argc-j-1;
+
+        if (!strcasecmp(argv[j],"--quant")) {
+            quantization = HNSW_QUANT_Q8;
+        } else if (!strcasecmp(argv[j],"--bin")) {
+            quantization = HNSW_QUANT_BIN;
+        } else if (!strcasecmp(argv[j],"--mass-del")) {
+            massdel = 1;
+        } else if (!strcasecmp(argv[j],"--self-recall")) {
+            self_recall = 1;
+        } else if (moreargs >= 1 && !strcasecmp(argv[j],"--recall")) {
+            recall_ef = atoi(argv[j+1]);
+            j++;
+        } else if (moreargs >= 1 && !strcasecmp(argv[j],"--threads")) {
+            numthreads = atoi(argv[j+1]);
+            j++;
+        } else if (moreargs >= 1 && !strcasecmp(argv[j],"--numele")) {
+            numele = strtoll(argv[j+1],NULL,0);
+            j++;
+            if (numele < 1) numele = 1;
+        } else if (moreargs >= 1 && !strcasecmp(argv[j],"--m")) {
+            m_param = atoi(argv[j+1]);
+            j++;
+        } else if (!strcasecmp(argv[j],"--help")) {
+            printf("%s [--quant] [--bin] [--thread <count>] [--numele <count>] [--m <count>] [--mass-del] [--self-recall] [--recall <ef>]\n", argv[0]);
+            exit(0);
+        } else {
+            printf("Unrecognized option or wrong number of arguments: %s\n", argv[j]);
+            exit(1);
+        }
+    }
+
+    if (quantization == HNSW_QUANT_NONE) {
+        printf("You can enable quantization with --quant\n");
+    }
+
+    if (numthreads > 0) {
+        w2v_multi_thread(m_param, numthreads, quantization, numele);
+    } else {
+        printf("Single thread execution. Use --threads 4 for concurrent API\n");
+        w2v_single_thread(m_param, quantization, numele, massdel, self_recall, recall_ef);
+    }
+}