Add tests for low-level PGLZ [de]compression routines

The goal of this module is to provide an entry point for the coverage of
the low-level compression and decompression PGLZ routines.  The new test
is moved to a new parallel group, with all the existing
compression-related tests added to it.

This includes tests for the cases detected by fuzzing that emulate
corrupted compressed data, as fixed by 2b5ba2a0a1:
- Set control bit with read of a match tag, where no data follows.
- Set control bit with read of a match tag, where 1 byte follows.
- Set control bit with match tag where length nibble is 3 bytes
(extended case).

While on it, some tests are added for compress/decompress roundtrips,
and for check_complete=false/true.  Like 2b5ba2a0a1, backpatch to all
the stable branches.

Discussion: https://postgr.es/m/adw647wuGjh1oU6p@paquier.xyz
Backpatch-through: 14
This commit is contained in:
Michael Paquier 2026-04-15 05:09:05 +09:00
parent 66ad764c8d
commit 67d318e704
4 changed files with 190 additions and 1 deletions

View file

@ -0,0 +1,65 @@
--
-- Tests for PGLZ compression
--
-- directory paths and dlsuffix are passed to us in environment variables
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
CREATE FUNCTION test_pglz_compress(bytea)
RETURNS bytea
AS :'regresslib' LANGUAGE C STRICT;
CREATE FUNCTION test_pglz_decompress(bytea, int4, bool)
RETURNS bytea
AS :'regresslib' LANGUAGE C STRICT;
-- Round-trip with pglz: compress then decompress.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 400, false) =
decode(repeat('abcd', 100), 'escape') AS roundtrip_ok;
roundtrip_ok
--------------
t
(1 row)
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 400, true) =
decode(repeat('abcd', 100), 'escape') AS roundtrip_ok;
roundtrip_ok
--------------
t
(1 row)
-- Decompression with rawsize too large, fails to fill the destination
-- buffer.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 500, true);
ERROR: pglz_decompress failed
-- Decompression with rawsize too small, fails with source not fully
-- consumed.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 100, true);
ERROR: pglz_decompress failed
-- Corrupted compressed data. Set control bit with read of a match tag,
-- no data follows.
SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len;
ctrl_only_len
---------------
0
(1 row)
SELECT test_pglz_decompress('\x01'::bytea, 1024, true);
ERROR: pglz_decompress failed
-- Corrupted compressed data. Set control bit with read of a match tag,
-- 1 byte follows.
SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false);
ERROR: pglz_decompress failed
SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true);
ERROR: pglz_decompress failed
-- Corrupted compressed data. Set control bit with match tag where length
-- nibble is 3 bytes (extended length), no data follows.
SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false);
ERROR: pglz_decompress failed
SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true);
ERROR: pglz_decompress failed
-- Clean up
DROP FUNCTION test_pglz_compress;
DROP FUNCTION test_pglz_decompress;

View file

@ -123,7 +123,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# The stats test resets stats, so nothing else needing stats access can be in
# this group.
# ----------
test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls planner_est
test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain memoize stats predicate numa eager_aggregate graph_table_rls planner_est
# ----------
# Another group of parallel tests (compression)
# ----------
test: compression compression_lz4 compression_pglz
# event_trigger depends on create_am and cannot run concurrently with
# any test that runs DDL

View file

@ -27,6 +27,7 @@
#include "catalog/pg_type.h"
#include "commands/sequence.h"
#include "commands/trigger.h"
#include "common/pg_lzcompress.h"
#include "executor/executor.h"
#include "executor/functions.h"
#include "executor/spi.h"
@ -1422,3 +1423,68 @@ test_instr_time(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(true);
}
/*
* test_pglz_compress
*
* Compress the input using pglz_compress(). Only the "always" strategy is
* currently supported.
*
* Returns the compressed data, or NULL if compression fails.
*/
PG_FUNCTION_INFO_V1(test_pglz_compress);
Datum
test_pglz_compress(PG_FUNCTION_ARGS)
{
bytea *input = PG_GETARG_BYTEA_PP(0);
char *source = VARDATA_ANY(input);
int32 slen = VARSIZE_ANY_EXHDR(input);
int32 maxout = PGLZ_MAX_OUTPUT(slen);
bytea *result;
int32 clen;
result = (bytea *) palloc(maxout + VARHDRSZ);
clen = pglz_compress(source, slen, VARDATA(result),
PGLZ_strategy_always);
if (clen < 0)
PG_RETURN_NULL();
SET_VARSIZE(result, clen + VARHDRSZ);
PG_RETURN_BYTEA_P(result);
}
/*
* test_pglz_decompress
*
* Decompress the input using pglz_decompress().
*
* The second argument is the expected uncompressed data size. The third
* argument is here for the check_complete flag.
*
* Returns the decompressed data, or raises an error if decompression fails.
*/
PG_FUNCTION_INFO_V1(test_pglz_decompress);
Datum
test_pglz_decompress(PG_FUNCTION_ARGS)
{
bytea *input = PG_GETARG_BYTEA_PP(0);
int32 rawsize = PG_GETARG_INT32(1);
bool check_complete = PG_GETARG_BOOL(2);
char *source = VARDATA_ANY(input);
int32 slen = VARSIZE_ANY_EXHDR(input);
bytea *result;
int32 dlen;
if (rawsize < 0)
elog(ERROR, "rawsize must not be negative");
result = (bytea *) palloc(rawsize + VARHDRSZ);
dlen = pglz_decompress(source, slen, VARDATA(result),
rawsize, check_complete);
if (dlen < 0)
elog(ERROR, "pglz_decompress failed");
SET_VARSIZE(result, dlen + VARHDRSZ);
PG_RETURN_BYTEA_P(result);
}

View file

@ -0,0 +1,53 @@
--
-- Tests for PGLZ compression
--
-- directory paths and dlsuffix are passed to us in environment variables
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
CREATE FUNCTION test_pglz_compress(bytea)
RETURNS bytea
AS :'regresslib' LANGUAGE C STRICT;
CREATE FUNCTION test_pglz_decompress(bytea, int4, bool)
RETURNS bytea
AS :'regresslib' LANGUAGE C STRICT;
-- Round-trip with pglz: compress then decompress.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 400, false) =
decode(repeat('abcd', 100), 'escape') AS roundtrip_ok;
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 400, true) =
decode(repeat('abcd', 100), 'escape') AS roundtrip_ok;
-- Decompression with rawsize too large, fails to fill the destination
-- buffer.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 500, true);
-- Decompression with rawsize too small, fails with source not fully
-- consumed.
SELECT test_pglz_decompress(test_pglz_compress(
decode(repeat('abcd', 100), 'escape')), 100, true);
-- Corrupted compressed data. Set control bit with read of a match tag,
-- no data follows.
SELECT length(test_pglz_decompress('\x01'::bytea, 1024, false)) AS ctrl_only_len;
SELECT test_pglz_decompress('\x01'::bytea, 1024, true);
-- Corrupted compressed data. Set control bit with read of a match tag,
-- 1 byte follows.
SELECT test_pglz_decompress('\x01ff'::bytea, 1024, false);
SELECT test_pglz_decompress('\x01ff'::bytea, 1024, true);
-- Corrupted compressed data. Set control bit with match tag where length
-- nibble is 3 bytes (extended length), no data follows.
SELECT test_pglz_decompress('\x010f01'::bytea, 1024, false);
SELECT test_pglz_decompress('\x010f01'::bytea, 1024, true);
-- Clean up
DROP FUNCTION test_pglz_compress;
DROP FUNCTION test_pglz_decompress;