From 4da2afd01f938af35c1a52bbf6bc40baa52462f6 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 9 Mar 2026 13:46:27 +0900 Subject: [PATCH] Fix size underestimation of DSA pagemap for odd-sized segments When make_new_segment() creates an odd-sized segment, the pagemap was only sized based on a number of usable_pages entries, forgetting that a segment also contains metadata pages, and that the FreePageManager uses absolute page indices that cover the entire segment. This miscalculation could cause accesses to pagemap entries to be out of bounds. During subsequent reuse of the allocated segment, allocations landing on pages with indices higher than usable_pages could cause out-of-bounds pagemap reads and/or writes. On write, 'span' pointers are stored into the data area, corrupting the allocated objects. On read (aka during a dsa_free), garbage is interpreted as a span pointer, typically crashing the server in dsa_get_address(). The normal geometric path correctly sizes the pagemap for all pages in the segment. The odd-sized path needs to do the same, but it works forward from usable_pages rather than backward from total_size. This commit fixes the sizing of the odd-sized case by adding pagemap entries for the metadata pages after the initial metadata_bytes calculation, using an integer ceiling division to compute the exact number of additional entries needed in one go, avoiding any iteration in the calculation. An assertion is added in the code path for odd-sized segments, ensuring that the pagemap includes the metadata area, and that the result is appropriately sized. This problem would show up depending on the size requested for the allocation of a DSA segment. The reporter has noticed this issue when a parallel hash join makes a DSA allocation large enough to trigger the odd-sized segment path, but it could happen for anything that does a DSA allocation. A regression test is added to test_dsa, down to v17 where the test module has been introduced. This adds a set of cheap tests to check the problem, the new assertion being useful for this purpose. Sami has proposed a test that took a longer time than what I have done here; the test committed is faster and good enough to check the odd-sized allocation path. Author: Paul Bunn Reviewed-by: Sami Imseih Reviewed-by: Chao Li Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/044401dcabac$fe432490$fac96db0$@icloud.com Backpatch-through: 14 --- src/backend/utils/mmgr/dsa.c | 29 ++++++++++++++ .../modules/test_dsa/expected/test_dsa.out | 16 ++++++++ src/test/modules/test_dsa/sql/test_dsa.sql | 7 ++++ src/test/modules/test_dsa/test_dsa--1.0.sql | 4 ++ src/test/modules/test_dsa/test_dsa.c | 40 +++++++++++++++++++ 5 files changed, 96 insertions(+) diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index ce9ede4c196..4b4f1e1965b 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -2196,6 +2196,8 @@ make_new_segment(dsa_area *area, size_t requested_pages) /* See if that is enough... */ if (requested_pages > usable_pages) { + size_t total_requested_pages PG_USED_FOR_ASSERTS_ONLY; + /* * We'll make an odd-sized segment, working forward from the requested * number of pages. @@ -2206,10 +2208,37 @@ make_new_segment(dsa_area *area, size_t requested_pages) MAXALIGN(sizeof(FreePageManager)) + usable_pages * sizeof(dsa_pointer); + /* + * We must also account for pagemap entries needed to cover the + * metadata pages themselves. The pagemap must track all pages in the + * segment, including the pages occupied by metadata. + * + * This formula uses integer ceiling division to compute the exact + * number of additional entries needed. The divisor (FPM_PAGE_SIZE - + * sizeof(dsa_pointer)) accounts for the fact that each metadata page + * consumes one pagemap entry of sizeof(dsa_pointer) bytes, leaving + * only (FPM_PAGE_SIZE - sizeof(dsa_pointer)) net bytes per metadata + * page. + */ + metadata_bytes += + ((metadata_bytes + (FPM_PAGE_SIZE - sizeof(dsa_pointer)) - 1) / + (FPM_PAGE_SIZE - sizeof(dsa_pointer))) * + sizeof(dsa_pointer); + /* Add padding up to next page boundary. */ if (metadata_bytes % FPM_PAGE_SIZE != 0) metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE); total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE; + total_requested_pages = total_size / FPM_PAGE_SIZE; + + /* + * Verify that we allocated enough pagemap entries for metadata and + * usable pages. This reverse-engineers the new calculation of + * "metadata_bytes" done based on the new "requested_pages" for an + * odd-sized segment. + */ + Assert((metadata_bytes - MAXALIGN(sizeof(dsa_segment_header)) - + MAXALIGN(sizeof(FreePageManager))) / sizeof(dsa_pointer) >= total_requested_pages); /* Is that too large for dsa_pointer's addressing scheme? */ if (total_size > DSA_MAX_SEGMENT_SIZE) diff --git a/src/test/modules/test_dsa/expected/test_dsa.out b/src/test/modules/test_dsa/expected/test_dsa.out index 266010e77fe..4b53a7de4a4 100644 --- a/src/test/modules/test_dsa/expected/test_dsa.out +++ b/src/test/modules/test_dsa/expected/test_dsa.out @@ -11,3 +11,19 @@ SELECT test_dsa_resowners(); (1 row) +-- Test allocations across a pre-defined range of pages. This covers enough +-- range to check for the case of odd-sized segments, without making the test +-- too slow. +SELECT test_dsa_allocate(1001, 2000, 100); + test_dsa_allocate +------------------- + +(1 row) + +-- Larger size with odd-sized segment. +SELECT test_dsa_allocate(6501, 6600, 100); + test_dsa_allocate +------------------- + +(1 row) + diff --git a/src/test/modules/test_dsa/sql/test_dsa.sql b/src/test/modules/test_dsa/sql/test_dsa.sql index c3d8db94372..99b4a60dd14 100644 --- a/src/test/modules/test_dsa/sql/test_dsa.sql +++ b/src/test/modules/test_dsa/sql/test_dsa.sql @@ -2,3 +2,10 @@ CREATE EXTENSION test_dsa; SELECT test_dsa_basic(); SELECT test_dsa_resowners(); + +-- Test allocations across a pre-defined range of pages. This covers enough +-- range to check for the case of odd-sized segments, without making the test +-- too slow. +SELECT test_dsa_allocate(1001, 2000, 100); +-- Larger size with odd-sized segment. +SELECT test_dsa_allocate(6501, 6600, 100); diff --git a/src/test/modules/test_dsa/test_dsa--1.0.sql b/src/test/modules/test_dsa/test_dsa--1.0.sql index 2904cb23525..3ee2e44cc00 100644 --- a/src/test/modules/test_dsa/test_dsa--1.0.sql +++ b/src/test/modules/test_dsa/test_dsa--1.0.sql @@ -10,3 +10,7 @@ CREATE FUNCTION test_dsa_basic() CREATE FUNCTION test_dsa_resowners() RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION test_dsa_allocate(int, int, int) + RETURNS pg_catalog.void + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_dsa/test_dsa.c b/src/test/modules/test_dsa/test_dsa.c index ed2a07c962f..edcab105de6 100644 --- a/src/test/modules/test_dsa/test_dsa.c +++ b/src/test/modules/test_dsa/test_dsa.c @@ -16,6 +16,7 @@ #include "storage/dsm_registry.h" #include "storage/lwlock.h" #include "utils/dsa.h" +#include "utils/freepage.h" #include "utils/resowner.h" PG_MODULE_MAGIC; @@ -120,3 +121,42 @@ test_dsa_resowners(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * test_dsa_allocate + * + * Test DSA allocation across a range of sizes to exercise the pagemap + * sizing logic in make_new_segment(). A fresh DSA is created for each + * iteration so that each allocation triggers a new segment creation, + * including the odd-sized segment path. + */ +PG_FUNCTION_INFO_V1(test_dsa_allocate); +Datum +test_dsa_allocate(PG_FUNCTION_ARGS) +{ + int start_num_pages = PG_GETARG_INT32(0); + int end_num_pages = PG_GETARG_INT32(1); + int step = PG_GETARG_INT32(2); + size_t usable_pages; + int *tranche_id; + bool found; + dsa_area *a; + dsa_pointer dp; + + if (start_num_pages > end_num_pages) + elog(ERROR, "incorrect start and end parameters"); + + tranche_id = GetNamedDSMSegment("test_dsa", sizeof(int), + init_tranche, &found, NULL); + + for (usable_pages = start_num_pages; usable_pages < end_num_pages; usable_pages += step) + { + a = dsa_create(*tranche_id); + dp = dsa_allocate(a, usable_pages * FPM_PAGE_SIZE); + + dsa_free(a, dp); + dsa_detach(a); + } + + PG_RETURN_VOID(); +}