redis/modules/vector-sets/tests/bin_vectorization.py
Martin Dimitrov 0024d5dfde
Some checks failed
CI / test-ubuntu-latest (push) Waiting to run
CI / test-sanitizer-address (push) Waiting to run
CI / build-debian-old (push) Waiting to run
CI / build-macos-latest (push) Waiting to run
CI / build-32bit (push) Waiting to run
CI / build-libc-malloc (push) Waiting to run
CI / build-centos-jemalloc (push) Waiting to run
CI / build-old-chain-jemalloc (push) Waiting to run
Codecov / code-coverage (push) Waiting to run
External Server Tests / test-external-standalone (push) Waiting to run
External Server Tests / test-external-cluster (push) Waiting to run
External Server Tests / test-external-nodebug (push) Waiting to run
Spellcheck / Spellcheck (push) Waiting to run
Reply-schemas linter / reply-schemas-linter (push) Has been cancelled
Vectorize binary quantization path for vectorsets distance calculation (#14492)
This PR adds SIMD vectorization for binary quantization distance
calculation, similar to PR #14474.

---------

Co-authored-by: debing.sun <debing.sun@redis.com>
2026-01-29 19:59:48 +08:00

54 lines
3 KiB
Python

from test import TestCase
class BinVectorization(TestCase):
def getname(self):
return "Binary quantization: verify vectorized vs scalar paths produce consistent results"
def test(self):
# Test with different dimensions to exercise different code paths:
# - dim=1: Edge case for minimal valid dimension (scalar path)
# - dim=64: Exact alignment boundary, one uint64_t word (scalar path)
# - dim=128: Scalar path (< 256)
# - dim=384: AVX2 path if available (>= 256, < 512)
# - dim=768: AVX512 path if available (>= 512)
# Note: dim=0 is not tested as it's invalid input (division by zero)
test_dims = [1, 64, 128, 384, 768]
for dim in test_dims:
# Add two very similar vectors, one different
vec1 = [1.0] * dim
vec2 = [0.99] * dim # Very similar to vec1
vec3 = [-1.0] * dim # Opposite direction - should have low similarity
# Add vectors with binary quantization
self.redis.execute_command('VADD', f'{self.test_key}:dim{dim}', 'VALUES', dim,
*[str(x) for x in vec1], f'{self.test_key}:dim{dim}:item:1', 'BIN')
self.redis.execute_command('VADD', f'{self.test_key}:dim{dim}', 'VALUES', dim,
*[str(x) for x in vec2], f'{self.test_key}:dim{dim}:item:2', 'BIN')
self.redis.execute_command('VADD', f'{self.test_key}:dim{dim}', 'VALUES', dim,
*[str(x) for x in vec3], f'{self.test_key}:dim{dim}:item:3', 'BIN')
# Query similarity
result = self.redis.execute_command('VSIM', f'{self.test_key}:dim{dim}', 'VALUES', dim,
*[str(x) for x in vec1], 'WITHSCORES')
# Convert results to dictionary
results_dict = {}
for i in range(0, len(result), 2):
key = result[i].decode()
score = float(result[i+1])
results_dict[key] = score
# Verify results are consistent across dimensions
# Self-similarity should be very high (binary quantization is less precise)
assert results_dict[f'{self.test_key}:dim{dim}:item:1'] > 0.99, \
f"Dim {dim}: Self-similarity too low: {results_dict[f'{self.test_key}:dim{dim}:item:1']}"
# Similar vector should have high similarity (binary quant loses some precision)
assert results_dict[f'{self.test_key}:dim{dim}:item:2'] > 0.95, \
f"Dim {dim}: Similar vector similarity too low: {results_dict[f'{self.test_key}:dim{dim}:item:2']}"
# Opposite vector should have very low similarity
assert results_dict[f'{self.test_key}:dim{dim}:item:3'] < 0.1, \
f"Dim {dim}: Opposite vector similarity too high: {results_dict[f'{self.test_key}:dim{dim}:item:3']}"