Harden our regex engine against integer overflow in size calculations.

The number of NFA states, number of NFA arcs, and number of colors
are all bounded to reasonably small values.  However, there are
places where we try to allocate arrays sized by products of those
quantities, and those calculations could overflow, enabling
buffer-overrun attacks.  In practice there's no problem on 64-bit
machines, but there are some live scenarios on 32-bit machines.

A related problem is that citerdissect() and creviterdissect()
allocate arrays based on the length of the input string, which
potentially could overflow.

To fix, invent MALLOC_ARRAY and REALLOC_ARRAY macros that rely on
palloc_array_extended and repalloc_array_extended with the NO_OOM
option, similarly to the existing MALLOC and REALLOC macros.
(Like those, they'll throw an error not return a NULL result for
oversize requests.  This doesn't really fit into the regex code's
view of error handling, but it'll do for now.  We can consider
whether to change that behavior in a non-security follow-up patch.)

I installed similar defenses in the colormap construction code.
It's not entirely clear whether integer overflow is possible
there, but analyzing the behavior in detail seems not worth
the trouble, as the risky spots are not in hot code paths.

I left a bunch of calls as-is after verifying that they can't
overflow given reasonable limits on nstates and narcs.  Those
limits were enforced already via REG_MAX_COMPILE_SPACE, but
add commentary to document the interactions.

In passing, also fix a related edge case, which is that the
special color numbers used in LACON carcs could overflow the
"color" data type, if ncolors is close to MAX_COLOR.

In v14 and v15, the regex engine calls malloc() directly instead
of using palloc(), so MALLOC_ARRAY and REALLOC_ARRAY do likewise.

Reported-by: Xint Code
Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Backpatch-through: 14
Security: CVE-2026-6473
This commit is contained in:
Tom Lane 2026-05-11 05:13:48 -07:00 committed by Noah Misch
parent fe2720c450
commit e3a2bea41c
8 changed files with 59 additions and 22 deletions

View file

@ -218,6 +218,7 @@ newcolor(struct colormap *cm)
n = cm->ncds * 2;
if (n > MAX_COLOR + 1)
n = MAX_COLOR + 1;
/* the MAX_COLOR+1 limit ensures these alloc sizes can't overflow: */
if (cm->cd == cm->cdspace)
{
newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
@ -434,9 +435,8 @@ newhicolorrow(struct colormap *cm,
CERR(REG_ESPACE);
return 0;
}
newarray = (color *) REALLOC(cm->hicolormap,
cm->maxarrayrows * 2 *
cm->hiarraycols * sizeof(color));
newarray = REALLOC_ARRAY(cm->hicolormap, color,
cm->maxarrayrows * 2 * cm->hiarraycols);
if (newarray == NULL)
{
CERR(REG_ESPACE);
@ -477,9 +477,8 @@ newhicolorcols(struct colormap *cm)
CERR(REG_ESPACE);
return;
}
newarray = (color *) REALLOC(cm->hicolormap,
cm->maxarrayrows *
cm->hiarraycols * 2 * sizeof(color));
newarray = REALLOC_ARRAY(cm->hicolormap, color,
cm->maxarrayrows * cm->hiarraycols * 2);
if (newarray == NULL)
{
CERR(REG_ESPACE);
@ -652,8 +651,7 @@ subcoloronechr(struct vars *v,
* Potentially, we could need two more colormapranges than we have now, if
* the given chr is in the middle of some existing range.
*/
newranges = (colormaprange *)
MALLOC((cm->numcmranges + 2) * sizeof(colormaprange));
newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges + 2);
if (newranges == NULL)
{
CERR(REG_ESPACE);
@ -766,8 +764,7 @@ subcoloronerange(struct vars *v,
* Potentially, if we have N non-adjacent ranges, we could need as many as
* 2N+1 result ranges (consider case where new range spans 'em all).
*/
newranges = (colormaprange *)
MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange));
newranges = MALLOC_ARRAY(colormaprange, cm->numcmranges * 2 + 1);
if (newranges == NULL)
{
CERR(REG_ESPACE);

View file

@ -40,6 +40,9 @@
/*
* newcvec - allocate a new cvec
*
* Note: in current usage, nchrs and nranges are never so large that we risk
* integer overflow in these size calculations, even with 32-bit size_t.
*/
static struct cvec *
newcvec(int nchrs, /* to hold this many chrs... */

View file

@ -3523,6 +3523,10 @@ compact(struct nfa *nfa,
assert(!NISERR());
/*
* The REG_MAX_COMPILE_SPACE restriction ensures that integer overflow
* can't occur in this loop nor in the allocation requests below.
*/
nstates = 0;
narcs = 0;
for (s = nfa->states; s != NULL; s = s->next)
@ -3575,6 +3579,12 @@ compact(struct nfa *nfa,
case LACON:
assert(s->no != cnfa->pre);
assert(a->co >= 0);
/* make sure the modified color number will fit */
if (a->co > MAX_COLOR - cnfa->ncolors)
{
NERR(REG_ECOLORS);
return;
}
ca->co = (color) (cnfa->ncolors + a->co);
ca->to = a->to->no;
ca++;

View file

@ -561,6 +561,7 @@ moresubs(struct vars *v,
assert(wanted > 0 && (size_t) wanted >= v->nsubs);
n = (size_t) wanted * 3 / 2 + 1;
/* n is bounded by the number of states, so no chance of overflow here */
if (v->subs == v->sub10)
{
p = (struct subre **) MALLOC(n * sizeof(struct subre *));
@ -2405,8 +2406,8 @@ newlacon(struct vars *v,
else
{
n = v->nlacons;
newlacons = (struct subre *) REALLOC(v->lacons,
(n + 1) * sizeof(struct subre));
/* better use REALLOC_ARRAY here, as struct subre is big */
newlacons = REALLOC_ARRAY(v->lacons, struct subre, n + 1);
}
if (newlacons == NULL)
{

View file

@ -640,20 +640,29 @@ newdfa(struct vars *v,
}
else
{
/*
* Restrict the ranges of nstates and ncolors enough that the arrays
* we allocate here have no more than INT_MAX members. This protects
* not only the allocation calculations just below, but later indexing
* into these arrays.
*/
if (wordsper >= INT_MAX / (nss + WORK) ||
cnfa->ncolors >= INT_MAX / nss)
{
ERR(REG_ETOOBIG);
return NULL;
}
d = (struct dfa *) MALLOC(sizeof(struct dfa));
if (d == NULL)
{
ERR(REG_ESPACE);
return NULL;
}
d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
sizeof(unsigned));
d->ssets = MALLOC_ARRAY(struct sset, nss);
d->statesarea = MALLOC_ARRAY(unsigned, (nss + WORK) * wordsper);
d->work = &d->statesarea[nss * wordsper];
d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
sizeof(struct sset *));
d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
sizeof(struct arcp));
d->outsarea = MALLOC_ARRAY(struct sset *, nss * cnfa->ncolors);
d->incarea = MALLOC_ARRAY(struct arcp, nss * cnfa->ncolors);
d->ismalloced = true;
d->arraysmalloced = true;
/* now freedfa() will behave sanely */

View file

@ -231,7 +231,7 @@ pg_regexec(regex_t *re,
if (v->nmatch <= LOCALMAT)
v->pmatch = mat;
else
v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
v->pmatch = MALLOC_ARRAY(regmatch_t, v->nmatch);
if (v->pmatch == NULL)
return REG_ESPACE;
zapallsubs(v->pmatch, v->nmatch);
@ -265,6 +265,7 @@ pg_regexec(regex_t *re,
v->subdfas = subdfas;
else
{
/* ntree is surely less than the number of states, so this is safe: */
v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
if (v->subdfas == NULL)
{
@ -279,6 +280,7 @@ pg_regexec(regex_t *re,
n = (size_t) v->g->nlacons;
if (n > 0)
{
/* nlacons is surely less than the number of arcs, so this is safe: */
v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
if (v->ladfas == NULL)
{
@ -1163,7 +1165,7 @@ citerdissect(struct vars *v,
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;
endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
endpts = MALLOC_ARRAY(chr *, max_matches + 1);
if (endpts == NULL)
return REG_ESPACE;
endpts[0] = begin;
@ -1370,7 +1372,7 @@ creviterdissect(struct vars *v,
max_matches = t->max;
if (max_matches < min_matches)
max_matches = min_matches;
endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
endpts = MALLOC_ARRAY(chr *, max_matches + 1);
if (endpts == NULL)
return REG_ESPACE;
endpts[0] = begin;

View file

@ -52,6 +52,8 @@
#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM)
#define FREE(p) pfree(VS(p))
#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM)
#define MALLOC_ARRAY(type, n) palloc_array_extended(type, n, MCXT_ALLOC_NO_OOM)
#define REALLOC_ARRAY(p, type, n) repalloc_array_extended(p, type, n, MCXT_ALLOC_NO_OOM)
#define INTERRUPT(re) CHECK_FOR_INTERRUPTS()
#define assert(x) Assert(x)

View file

@ -76,6 +76,14 @@
#ifndef FREE
#define FREE(p) free(VS(p))
#endif
#ifndef MALLOC_ARRAY
/* we don't depend on calloc's zeroing behavior, we do need overflow check */
#define MALLOC_ARRAY(type, n) ((type *) calloc(sizeof(type), n))
#endif
#ifndef REALLOC_ARRAY
/* XXX this definition does not provide the desired overflow check */
#define REALLOC_ARRAY(p, type, n) ((type *) REALLOC(p, sizeof(type) * (n)))
#endif
/* interruption */
#ifndef INTERRUPT
@ -446,6 +454,11 @@ struct cnfa
* (the compacted NFA and the colormap).
* The scaling here is based on an empirical measurement that very large
* NFAs tend to have about 4 arcs/state.
*
* Do not raise this so high as to allow more than INT_MAX/8 states or arcs,
* or you risk integer overflows in various space allocation requests.
* (We could be more defensive in those places, but that's so far beyond the
* practical range of NFA sizes that it doesn't seem worth additional code.)
*/
#ifndef REG_MAX_COMPILE_SPACE
#define REG_MAX_COMPILE_SPACE \