mirror of
https://github.com/postgres/postgres.git
synced 2026-02-12 23:33:27 -05:00
While the preceding commit prevented such attachments from occurring in future, this one aims to prevent further abuse of any already- created operator that exposes _int_matchsel to the wrong data types. (No other contrib module has a vulnerable selectivity estimator.) We need only check that the Const we've found in the query is indeed of the type we expect (query_int), but there's a difficulty: as an extension type, query_int doesn't have a fixed OID that we could hard-code into the estimator. Therefore, the bulk of this patch consists of infrastructure to let an extension function securely look up the OID of a datatype belonging to the same extension. (Extension authors have requested such functionality before, so we anticipate that this code will have additional non-security uses, and may soon be extended to allow looking up other kinds of SQL objects.) This is done by first finding the extension that owns the calling function (there can be only one), and then thumbing through the objects owned by that extension to find a type that has the desired name. This is relatively expensive, especially for large extensions, so a simple cache is put in front of these lookups. Reported-by: Daniel Firer as part of zeroday.cloud Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Noah Misch <noah@leadboat.com> Security: CVE-2026-2004 Backpatch-through: 14
350 lines
8.9 KiB
C
350 lines
8.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* _int_selfuncs.c
|
|
* Functions for selectivity estimation of intarray operators
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* contrib/intarray/_int_selfuncs.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "_int.h"
|
|
#include "access/htup_details.h"
|
|
#include "catalog/pg_operator.h"
|
|
#include "catalog/pg_statistic.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/extension.h"
|
|
#include "miscadmin.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/selfuncs.h"
|
|
#include "utils/syscache.h"
|
|
|
|
PG_FUNCTION_INFO_V1(_int_overlap_sel);
|
|
PG_FUNCTION_INFO_V1(_int_contains_sel);
|
|
PG_FUNCTION_INFO_V1(_int_contained_sel);
|
|
PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_contains_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_contained_joinsel);
|
|
PG_FUNCTION_INFO_V1(_int_matchsel);
|
|
|
|
|
|
static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
|
|
int nmcelems, float4 minfreq);
|
|
static int compare_val_int4(const void *a, const void *b);
|
|
|
|
/*
|
|
* Wrappers around the default array selectivity estimation functions.
|
|
*
|
|
* The default array selectivity operators for the @>, && and @< operators
|
|
* work fine for integer arrays. However, if we tried to just use arraycontsel
|
|
* and arraycontjoinsel directly as the cost estimator functions for our
|
|
* operators, they would not work as intended, because they look at the
|
|
* operator's OID. Our operators behave exactly like the built-in anyarray
|
|
* versions, but we must tell the cost estimator functions which built-in
|
|
* operators they correspond to. These wrappers just replace the operator
|
|
* OID with the corresponding built-in operator's OID, and call the built-in
|
|
* function.
|
|
*/
|
|
|
|
Datum
|
|
_int_overlap_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_contains_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_contained_sel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3)));
|
|
}
|
|
|
|
Datum
|
|
_int_overlap_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
Datum
|
|
_int_contains_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
Datum
|
|
_int_contained_joinsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
|
|
PG_GETARG_DATUM(0),
|
|
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
|
|
PG_GETARG_DATUM(2),
|
|
PG_GETARG_DATUM(3),
|
|
PG_GETARG_DATUM(4)));
|
|
}
|
|
|
|
|
|
/*
|
|
* _int_matchsel -- restriction selectivity function for intarray @@ query_int
|
|
*/
|
|
Datum
|
|
_int_matchsel(PG_FUNCTION_ARGS)
|
|
{
|
|
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
|
|
|
|
List *args = (List *) PG_GETARG_POINTER(2);
|
|
int varRelid = PG_GETARG_INT32(3);
|
|
VariableStatData vardata;
|
|
Node *other;
|
|
bool varonleft;
|
|
Selectivity selec;
|
|
QUERYTYPE *query;
|
|
Datum *mcelems = NULL;
|
|
float4 *mcefreqs = NULL;
|
|
int nmcelems = 0;
|
|
float4 minfreq = 0.0;
|
|
float4 nullfrac = 0.0;
|
|
AttStatsSlot sslot;
|
|
|
|
/*
|
|
* If expression is not "variable @@ something" or "something @@ variable"
|
|
* then punt and return a default estimate.
|
|
*/
|
|
if (!get_restriction_variable(root, args, varRelid,
|
|
&vardata, &other, &varonleft))
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
/*
|
|
* Variable should be int[]. We don't support cases where variable is
|
|
* query_int.
|
|
*/
|
|
if (vardata.vartype != INT4ARRAYOID)
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
|
|
/*
|
|
* Can't do anything useful if the something is not a constant, either.
|
|
*/
|
|
if (!IsA(other, Const))
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
}
|
|
|
|
/*
|
|
* The "@@" operator is strict, so we can cope with NULL right away.
|
|
*/
|
|
if (((Const *) other)->constisnull)
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(0.0);
|
|
}
|
|
|
|
/*
|
|
* Verify that the Const is a query_int, else return a default estimate.
|
|
* (This could only fail if someone attached this estimator to the wrong
|
|
* operator.)
|
|
*/
|
|
if (((Const *) other)->consttype !=
|
|
get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
|
|
}
|
|
|
|
query = DatumGetQueryTypeP(((Const *) other)->constvalue);
|
|
|
|
/* Empty query matches nothing */
|
|
if (query->size == 0)
|
|
{
|
|
ReleaseVariableStats(vardata);
|
|
PG_RETURN_FLOAT8(0.0);
|
|
}
|
|
|
|
/*
|
|
* Get the statistics for the intarray column.
|
|
*
|
|
* We're interested in the Most-Common-Elements list, and the NULL
|
|
* fraction.
|
|
*/
|
|
if (HeapTupleIsValid(vardata.statsTuple))
|
|
{
|
|
Form_pg_statistic stats;
|
|
|
|
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
|
|
nullfrac = stats->stanullfrac;
|
|
|
|
/*
|
|
* For an int4 array, the default array type analyze function will
|
|
* collect a Most Common Elements list, which is an array of int4s.
|
|
*/
|
|
if (get_attstatsslot(&sslot, vardata.statsTuple,
|
|
STATISTIC_KIND_MCELEM, InvalidOid,
|
|
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
|
|
{
|
|
Assert(sslot.valuetype == INT4OID);
|
|
|
|
/*
|
|
* There should be three more Numbers than Values, because the
|
|
* last three (for intarray) cells are taken for minimal, maximal
|
|
* and nulls frequency. Punt if not.
|
|
*/
|
|
if (sslot.nnumbers == sslot.nvalues + 3)
|
|
{
|
|
/* Grab the lowest frequency. */
|
|
minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
|
|
|
|
mcelems = sslot.values;
|
|
mcefreqs = sslot.numbers;
|
|
nmcelems = sslot.nvalues;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
memset(&sslot, 0, sizeof(sslot));
|
|
|
|
/* Process the logical expression in the query, using the stats */
|
|
selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
|
|
mcelems, mcefreqs, nmcelems, minfreq);
|
|
|
|
/* MCE stats count only non-null rows, so adjust for null rows. */
|
|
selec *= (1.0 - nullfrac);
|
|
|
|
free_attstatsslot(&sslot);
|
|
ReleaseVariableStats(vardata);
|
|
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
PG_RETURN_FLOAT8((float8) selec);
|
|
}
|
|
|
|
/*
|
|
* Estimate selectivity of single intquery operator
|
|
*/
|
|
static Selectivity
|
|
int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
|
|
int nmcelems, float4 minfreq)
|
|
{
|
|
Selectivity selec;
|
|
|
|
/* since this function recurses, it could be driven to stack overflow */
|
|
check_stack_depth();
|
|
|
|
if (item->type == VAL)
|
|
{
|
|
Datum *searchres;
|
|
|
|
if (mcelems == NULL)
|
|
return (Selectivity) DEFAULT_EQ_SEL;
|
|
|
|
searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
|
|
sizeof(Datum), compare_val_int4);
|
|
if (searchres)
|
|
{
|
|
/*
|
|
* The element is in MCELEM. Return precise selectivity (or at
|
|
* least as precise as ANALYZE could find out).
|
|
*/
|
|
selec = mcefreqs[searchres - mcelems];
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The element is not in MCELEM. Punt, but assume that the
|
|
* selectivity cannot be more than minfreq / 2.
|
|
*/
|
|
selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
|
|
}
|
|
}
|
|
else if (item->type == OPR)
|
|
{
|
|
/* Current query node is an operator */
|
|
Selectivity s1,
|
|
s2;
|
|
|
|
s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
|
|
minfreq);
|
|
switch (item->val)
|
|
{
|
|
case (int32) '!':
|
|
selec = 1.0 - s1;
|
|
break;
|
|
|
|
case (int32) '&':
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
nmcelems, minfreq);
|
|
selec = s1 * s2;
|
|
break;
|
|
|
|
case (int32) '|':
|
|
s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
|
|
nmcelems, minfreq);
|
|
selec = s1 + s2 - s1 * s2;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unrecognized operator: %d", item->val);
|
|
selec = 0; /* keep compiler quiet */
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
elog(ERROR, "unrecognized int query item type: %u", item->type);
|
|
selec = 0; /* keep compiler quiet */
|
|
}
|
|
|
|
/* Clamp intermediate results to stay sane despite roundoff error */
|
|
CLAMP_PROBABILITY(selec);
|
|
|
|
return selec;
|
|
}
|
|
|
|
/*
|
|
* Comparison function for binary search in mcelem array.
|
|
*/
|
|
static int
|
|
compare_val_int4(const void *a, const void *b)
|
|
{
|
|
int32 key = *(int32 *) a;
|
|
int32 value = DatumGetInt32(*(const Datum *) b);
|
|
|
|
if (key < value)
|
|
return -1;
|
|
else if (key > value)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|