Fix size underestimation of DSA pagemap for odd-sized segments

When make_new_segment() creates an odd-sized segment, the pagemap was
only sized based on a number of usable_pages entries, forgetting that a
segment also contains metadata pages, and that the FreePageManager uses
absolute page indices that cover the entire segment.  This
miscalculation could cause accesses to pagemap entries to be out of
bounds.  During subsequent reuse of the allocated segment, allocations
landing on pages with indices higher than usable_pages could cause
out-of-bounds pagemap reads and/or writes.  On write, 'span' pointers
are stored into the data area, corrupting the allocated objects.  On
read (aka during a dsa_free), garbage is interpreted as a span pointer,
typically crashing the server in dsa_get_address().

The normal geometric path correctly sizes the pagemap for all pages in
the segment.  The odd-sized path needs to do the same, but it works
forward from usable_pages rather than backward from total_size.

This commit fixes the sizing of the odd-sized case by adding pagemap
entries for the metadata pages after the initial metadata_bytes
calculation, using an integer ceiling division to compute the exact
number of additional entries needed in one go, avoiding any iteration in
the calculation.

An assertion is added in the code path for odd-sized segments, ensuring
that the pagemap includes the metadata area, and that the result is
appropriately sized.

This problem would show up depending on the size requested for the
allocation of a DSA segment.  The reporter has noticed this issue when a
parallel hash join makes a DSA allocation large enough to trigger the
odd-sized segment path, but it could happen for anything that does a DSA
allocation.

A regression test is added to test_dsa, down to v17 where the test
module has been introduced.  This adds a set of cheap tests to check the
problem, the new assertion being useful for this purpose.  Sami has
proposed a test that took a longer time than what I have done here; the
test committed is faster and good enough to check the odd-sized
allocation path.

Author: Paul Bunn <paul.bunn@icloud.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/044401dcabac$fe432490$fac96db0$@icloud.com
Backpatch-through: 14
This commit is contained in:
Michael Paquier 2026-03-09 13:46:27 +09:00
parent ccd7abaa45
commit 4da2afd01f
5 changed files with 96 additions and 0 deletions

View file

@ -2196,6 +2196,8 @@ make_new_segment(dsa_area *area, size_t requested_pages)
/* See if that is enough... */
if (requested_pages > usable_pages)
{
size_t total_requested_pages PG_USED_FOR_ASSERTS_ONLY;
/*
* We'll make an odd-sized segment, working forward from the requested
* number of pages.
@ -2206,10 +2208,37 @@ make_new_segment(dsa_area *area, size_t requested_pages)
MAXALIGN(sizeof(FreePageManager)) +
usable_pages * sizeof(dsa_pointer);
/*
* We must also account for pagemap entries needed to cover the
* metadata pages themselves. The pagemap must track all pages in the
* segment, including the pages occupied by metadata.
*
* This formula uses integer ceiling division to compute the exact
* number of additional entries needed. The divisor (FPM_PAGE_SIZE -
* sizeof(dsa_pointer)) accounts for the fact that each metadata page
* consumes one pagemap entry of sizeof(dsa_pointer) bytes, leaving
* only (FPM_PAGE_SIZE - sizeof(dsa_pointer)) net bytes per metadata
* page.
*/
metadata_bytes +=
((metadata_bytes + (FPM_PAGE_SIZE - sizeof(dsa_pointer)) - 1) /
(FPM_PAGE_SIZE - sizeof(dsa_pointer))) *
sizeof(dsa_pointer);
/* Add padding up to next page boundary. */
if (metadata_bytes % FPM_PAGE_SIZE != 0)
metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
total_requested_pages = total_size / FPM_PAGE_SIZE;
/*
* Verify that we allocated enough pagemap entries for metadata and
* usable pages. This reverse-engineers the new calculation of
* "metadata_bytes" done based on the new "requested_pages" for an
* odd-sized segment.
*/
Assert((metadata_bytes - MAXALIGN(sizeof(dsa_segment_header)) -
MAXALIGN(sizeof(FreePageManager))) / sizeof(dsa_pointer) >= total_requested_pages);
/* Is that too large for dsa_pointer's addressing scheme? */
if (total_size > DSA_MAX_SEGMENT_SIZE)

View file

@ -11,3 +11,19 @@ SELECT test_dsa_resowners();
(1 row)
-- Test allocations across a pre-defined range of pages. This covers enough
-- range to check for the case of odd-sized segments, without making the test
-- too slow.
SELECT test_dsa_allocate(1001, 2000, 100);
test_dsa_allocate
-------------------
(1 row)
-- Larger size with odd-sized segment.
SELECT test_dsa_allocate(6501, 6600, 100);
test_dsa_allocate
-------------------
(1 row)

View file

@ -2,3 +2,10 @@ CREATE EXTENSION test_dsa;
SELECT test_dsa_basic();
SELECT test_dsa_resowners();
-- Test allocations across a pre-defined range of pages. This covers enough
-- range to check for the case of odd-sized segments, without making the test
-- too slow.
SELECT test_dsa_allocate(1001, 2000, 100);
-- Larger size with odd-sized segment.
SELECT test_dsa_allocate(6501, 6600, 100);

View file

@ -10,3 +10,7 @@ CREATE FUNCTION test_dsa_basic()
CREATE FUNCTION test_dsa_resowners()
RETURNS pg_catalog.void
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION test_dsa_allocate(int, int, int)
RETURNS pg_catalog.void
AS 'MODULE_PATHNAME' LANGUAGE C;

View file

@ -16,6 +16,7 @@
#include "storage/dsm_registry.h"
#include "storage/lwlock.h"
#include "utils/dsa.h"
#include "utils/freepage.h"
#include "utils/resowner.h"
PG_MODULE_MAGIC;
@ -120,3 +121,42 @@ test_dsa_resowners(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
/*
* test_dsa_allocate
*
* Test DSA allocation across a range of sizes to exercise the pagemap
* sizing logic in make_new_segment(). A fresh DSA is created for each
* iteration so that each allocation triggers a new segment creation,
* including the odd-sized segment path.
*/
PG_FUNCTION_INFO_V1(test_dsa_allocate);
Datum
test_dsa_allocate(PG_FUNCTION_ARGS)
{
int start_num_pages = PG_GETARG_INT32(0);
int end_num_pages = PG_GETARG_INT32(1);
int step = PG_GETARG_INT32(2);
size_t usable_pages;
int *tranche_id;
bool found;
dsa_area *a;
dsa_pointer dp;
if (start_num_pages > end_num_pages)
elog(ERROR, "incorrect start and end parameters");
tranche_id = GetNamedDSMSegment("test_dsa", sizeof(int),
init_tranche, &found, NULL);
for (usable_pages = start_num_pages; usable_pages < end_num_pages; usable_pages += step)
{
a = dsa_create(*tranche_id);
dp = dsa_allocate(a, usable_pages * FPM_PAGE_SIZE);
dsa_free(a, dp);
dsa_detach(a);
}
PG_RETURN_VOID();
}