mirror of
https://github.com/postgres/postgres.git
synced 2026-05-25 02:40:42 -04:00
Optimize sort and deduplication in ginExtractEntries()
Remove NULLs from the array first, and use qsort to deduplicate only the non-NULL items. This simplifies the comparison function. Also replace qsort_arg() with a templated version so that the comparison function can be inlined. These changes make ginExtractEntries() a little faster especially for simple datatypes like integers. Author: David Geier <geidav.pg@gmail.com> Discussion: https://www.postgresql.org/message-id/6d16b6bd-a1ff-4469-aefb-a1c8274e561a@iki.fi
This commit is contained in:
parent
b6ccd30d8f
commit
6f5ad00ab7
2 changed files with 68 additions and 85 deletions
|
|
@ -28,6 +28,7 @@
|
||||||
#include "utils/index_selfuncs.h"
|
#include "utils/index_selfuncs.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
#include "utils/typcache.h"
|
#include "utils/typcache.h"
|
||||||
|
#include "lib/qunique.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -389,18 +390,9 @@ GinInitMetabuffer(Buffer b)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Support for sorting key datums in ginExtractEntries
|
* Support for sorting key datums and detecting duplicates in
|
||||||
*
|
* ginExtractEntries
|
||||||
* Note: we only have to worry about null and not-null keys here;
|
|
||||||
* ginExtractEntries never generates more than one placeholder null,
|
|
||||||
* so it doesn't have to sort those.
|
|
||||||
*/
|
*/
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
Datum datum;
|
|
||||||
bool isnull;
|
|
||||||
} keyEntryData;
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
FmgrInfo *cmpDatumFunc;
|
FmgrInfo *cmpDatumFunc;
|
||||||
|
|
@ -411,24 +403,14 @@ typedef struct
|
||||||
static int
|
static int
|
||||||
cmpEntries(const void *a, const void *b, void *arg)
|
cmpEntries(const void *a, const void *b, void *arg)
|
||||||
{
|
{
|
||||||
const keyEntryData *aa = (const keyEntryData *) a;
|
const Datum *aa = (const Datum *) a;
|
||||||
const keyEntryData *bb = (const keyEntryData *) b;
|
const Datum *bb = (const Datum *) b;
|
||||||
cmpEntriesArg *data = (cmpEntriesArg *) arg;
|
cmpEntriesArg *data = (cmpEntriesArg *) arg;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
if (aa->isnull)
|
res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
|
||||||
{
|
data->collation,
|
||||||
if (bb->isnull)
|
*aa, *bb));
|
||||||
res = 0; /* NULL "=" NULL */
|
|
||||||
else
|
|
||||||
res = 1; /* NULL ">" not-NULL */
|
|
||||||
}
|
|
||||||
else if (bb->isnull)
|
|
||||||
res = -1; /* not-NULL "<" NULL */
|
|
||||||
else
|
|
||||||
res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
|
|
||||||
data->collation,
|
|
||||||
aa->datum, bb->datum));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Detect if we have any duplicates. If there are equal keys, qsort must
|
* Detect if we have any duplicates. If there are equal keys, qsort must
|
||||||
|
|
@ -441,6 +423,14 @@ cmpEntries(const void *a, const void *b, void *arg)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ST_SORT qsort_arg_entries
|
||||||
|
#define ST_ELEMENT_TYPE Datum
|
||||||
|
#define ST_COMPARE_ARG_TYPE cmpEntriesArg
|
||||||
|
#define ST_COMPARE(a, b, arg) cmpEntries(a, b, arg)
|
||||||
|
#define ST_SCOPE static
|
||||||
|
#define ST_DEFINE
|
||||||
|
#define ST_DECLARE
|
||||||
|
#include "lib/sort_template.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Extract the index key values from an indexable item
|
* Extract the index key values from an indexable item
|
||||||
|
|
@ -451,11 +441,13 @@ cmpEntries(const void *a, const void *b, void *arg)
|
||||||
Datum *
|
Datum *
|
||||||
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
||||||
Datum value, bool isNull,
|
Datum value, bool isNull,
|
||||||
int32 *nentries, GinNullCategory **categories)
|
int32 *nentries_p, GinNullCategory **categories_p)
|
||||||
{
|
{
|
||||||
Datum *entries;
|
Datum *entries;
|
||||||
bool *nullFlags;
|
bool *nullFlags;
|
||||||
int32 i;
|
GinNullCategory *categories;
|
||||||
|
bool hasNull;
|
||||||
|
int32 nentries;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't call the extractValueFn on a null item. Instead generate a
|
* We don't call the extractValueFn on a null item. Instead generate a
|
||||||
|
|
@ -463,42 +455,60 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
||||||
*/
|
*/
|
||||||
if (isNull)
|
if (isNull)
|
||||||
{
|
{
|
||||||
*nentries = 1;
|
*nentries_p = 1;
|
||||||
entries = palloc_object(Datum);
|
entries = palloc_object(Datum);
|
||||||
entries[0] = (Datum) 0;
|
entries[0] = (Datum) 0;
|
||||||
*categories = palloc_object(GinNullCategory);
|
*categories_p = palloc_object(GinNullCategory);
|
||||||
(*categories)[0] = GIN_CAT_NULL_ITEM;
|
(*categories_p)[0] = GIN_CAT_NULL_ITEM;
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* OK, call the opclass's extractValueFn */
|
/* OK, call the opclass's extractValueFn */
|
||||||
nullFlags = NULL; /* in case extractValue doesn't set it */
|
nullFlags = NULL; /* in case extractValue doesn't set it */
|
||||||
|
nentries = 0;
|
||||||
entries = (Datum *)
|
entries = (Datum *)
|
||||||
DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
|
DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
|
||||||
ginstate->supportCollation[attnum - 1],
|
ginstate->supportCollation[attnum - 1],
|
||||||
value,
|
value,
|
||||||
PointerGetDatum(nentries),
|
PointerGetDatum(&nentries),
|
||||||
PointerGetDatum(&nullFlags)));
|
PointerGetDatum(&nullFlags)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate a placeholder if the item contained no keys.
|
* Generate a placeholder if the item contained no keys.
|
||||||
*/
|
*/
|
||||||
if (entries == NULL || *nentries <= 0)
|
if (entries == NULL || nentries <= 0)
|
||||||
{
|
{
|
||||||
*nentries = 1;
|
*nentries_p = 1;
|
||||||
entries = palloc_object(Datum);
|
entries = palloc_object(Datum);
|
||||||
entries[0] = (Datum) 0;
|
entries[0] = (Datum) 0;
|
||||||
*categories = palloc_object(GinNullCategory);
|
*categories_p = palloc_object(GinNullCategory);
|
||||||
(*categories)[0] = GIN_CAT_EMPTY_ITEM;
|
(*categories_p)[0] = GIN_CAT_EMPTY_ITEM;
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the extractValueFn didn't create a nullFlags array, create one,
|
* Scan the items for any NULLs. All NULLs are considered equal, so we
|
||||||
* assuming that everything's non-null.
|
* just need to check and remember if there are any. We remove them from
|
||||||
|
* the array here, and after deduplication, put back one NULL entry to
|
||||||
|
* represent them all.
|
||||||
*/
|
*/
|
||||||
if (nullFlags == NULL)
|
hasNull = false;
|
||||||
nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
|
if (nullFlags)
|
||||||
|
{
|
||||||
|
int32 numNonNulls = 0;
|
||||||
|
|
||||||
|
for (int32 i = 0; i < nentries; i++)
|
||||||
|
{
|
||||||
|
if (nullFlags[i])
|
||||||
|
hasNull = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
entries[numNonNulls] = entries[i];
|
||||||
|
numNonNulls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nentries = numNonNulls;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there's more than one key, sort and unique-ify.
|
* If there's more than one key, sort and unique-ify.
|
||||||
|
|
@ -507,63 +517,36 @@ ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
||||||
* pretty bad too. For small numbers of keys it'd likely be better to use
|
* pretty bad too. For small numbers of keys it'd likely be better to use
|
||||||
* a simple insertion sort.
|
* a simple insertion sort.
|
||||||
*/
|
*/
|
||||||
if (*nentries > 1)
|
if (nentries > 1)
|
||||||
{
|
{
|
||||||
keyEntryData *keydata;
|
|
||||||
cmpEntriesArg arg;
|
cmpEntriesArg arg;
|
||||||
|
|
||||||
keydata = palloc_array(keyEntryData, *nentries);
|
|
||||||
for (i = 0; i < *nentries; i++)
|
|
||||||
{
|
|
||||||
keydata[i].datum = entries[i];
|
|
||||||
keydata[i].isnull = nullFlags[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
|
arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
|
||||||
arg.collation = ginstate->supportCollation[attnum - 1];
|
arg.collation = ginstate->supportCollation[attnum - 1];
|
||||||
arg.haveDups = false;
|
arg.haveDups = false;
|
||||||
qsort_arg(keydata, *nentries, sizeof(keyEntryData),
|
|
||||||
cmpEntries, &arg);
|
qsort_arg_entries(entries, nentries, &arg);
|
||||||
|
|
||||||
if (arg.haveDups)
|
if (arg.haveDups)
|
||||||
{
|
nentries = qunique_arg(entries, nentries, sizeof(Datum), cmpEntries, &arg);
|
||||||
/* there are duplicates, must get rid of 'em */
|
|
||||||
int32 j;
|
|
||||||
|
|
||||||
entries[0] = keydata[0].datum;
|
|
||||||
nullFlags[0] = keydata[0].isnull;
|
|
||||||
j = 1;
|
|
||||||
for (i = 1; i < *nentries; i++)
|
|
||||||
{
|
|
||||||
if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
|
|
||||||
{
|
|
||||||
entries[j] = keydata[i].datum;
|
|
||||||
nullFlags[j] = keydata[i].isnull;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*nentries = j;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* easy, no duplicates */
|
|
||||||
for (i = 0; i < *nentries; i++)
|
|
||||||
{
|
|
||||||
entries[i] = keydata[i].datum;
|
|
||||||
nullFlags[i] = keydata[i].isnull;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(keydata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create GinNullCategory representation from nullFlags.
|
* Create GinNullCategory representation.
|
||||||
*/
|
*/
|
||||||
*categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
|
StaticAssertStmt(GIN_CAT_NORM_KEY == 0, "Assuming GIN_CAT_NORM_KEY=0");
|
||||||
for (i = 0; i < *nentries; i++)
|
categories = palloc0_array(GinNullCategory, nentries + (hasNull ? 1 : 0));
|
||||||
(*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
|
|
||||||
|
|
||||||
|
/* Put back a NULL entry, if there were any */
|
||||||
|
if (hasNull)
|
||||||
|
{
|
||||||
|
entries[nentries] = (Datum) 0;
|
||||||
|
categories[nentries] = GIN_CAT_NULL_KEY;
|
||||||
|
nentries++;
|
||||||
|
}
|
||||||
|
|
||||||
|
*nentries_p = nentries;
|
||||||
|
*categories_p = categories;
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ extern void GinInitPage(Page page, uint32 f, Size pageSize);
|
||||||
extern void GinInitMetabuffer(Buffer b);
|
extern void GinInitMetabuffer(Buffer b);
|
||||||
extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
|
||||||
Datum value, bool isNull,
|
Datum value, bool isNull,
|
||||||
int32 *nentries, GinNullCategory **categories);
|
int32 *nentries_p, GinNullCategory **categories_p);
|
||||||
|
|
||||||
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
|
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
|
||||||
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
|
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue