postgresql/src/backend/access/common/detoast.c
Tom Lane aeb1631ed2 Mostly-cosmetic adjustments of TOAST-related macros.
The authors of bbe0a81db hadn't quite got the idea that macros named
like SOMETHING_4B_C were only meant for internal endianness-related
details in postgres.h.  Choose more legible names for macros that are
intended to be used elsewhere.  Rearrange postgres.h a bit to clarify
the separation between those internal macros and ones intended for
wider use.

Also, avoid using the term "rawsize" for true decompressed size;
we've used "extsize" for that, because "rawsize" generally denotes
total Datum size including header.  This choice seemed particularly
unfortunate in tests that were comparing one of these meanings to
the other.

This patch includes a couple of not-purely-cosmetic changes: be
sure that the shifts aligning compression methods are unsigned
(not critical today, but will be when compression method 2 exists),
and fix broken definition of VARATT_EXTERNAL_GET_COMPRESSION (now
VARATT_EXTERNAL_GET_COMPRESS_METHOD), whose callers worked only
accidentally.

Discussion: https://postgr.es/m/574197.1616428079@sss.pgh.pa.us
2021-03-22 13:43:10 -04:00

635 lines
16 KiB
C

/*-------------------------------------------------------------------------
*
* detoast.c
* Retrieve compressed or external variable size attributes.
*
* Copyright (c) 2000-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/access/common/detoast.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/detoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/toast_internals.h"
#include "common/int.h"
#include "common/pg_lzcompress.h"
#include "utils/expandeddatum.h"
#include "utils/rel.h"
static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
/* ----------
* detoast_external_attr -
*
* Public entry point to get back a toasted value from
* external source (possibly still in compressed format).
*
* This will return a datum that contains all the data internally, ie, not
* relying on external storage or memory, but it can still be compressed or
* have a short header. Note some callers assume that if the input is an
* EXTERNAL datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_external_attr(struct varlena *attr)
{
struct varlena *result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse if value is still external in some other way */
if (VARATT_IS_EXTERNAL(attr))
return detoast_external_attr(attr);
/*
* Copy into the caller's memory context, in case caller tries to
* pfree the result.
*/
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
ExpandedObjectHeader *eoh;
Size resultsize;
eoh = DatumGetEOHP(PointerGetDatum(attr));
resultsize = EOH_get_flat_size(eoh);
result = (struct varlena *) palloc(resultsize);
EOH_flatten_into(eoh, (void *) result, resultsize);
}
else
{
/*
* This is a plain value inside of the main tuple - why am I called?
*/
result = attr;
}
return result;
}
/* ----------
* detoast_attr -
*
* Public entry point to get back a toasted value from compression
* or external storage. The result is always non-extended varlena form.
*
* Note some callers assume that if the input is an EXTERNAL or COMPRESSED
* datum, the result will be a pfree'able chunk.
* ----------
*/
struct varlena *
detoast_attr(struct varlena *attr)
{
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* This is an externally stored datum --- fetch it back from there
*/
attr = toast_fetch_datum(attr);
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
struct varlena *tmp = attr;
attr = toast_decompress_datum(tmp);
pfree(tmp);
}
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
/*
* This is an indirect pointer --- dereference it
*/
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
attr = (struct varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
/* recurse in case value is still extended in some other way */
attr = detoast_attr(attr);
/* if it isn't, we'd better copy it */
if (attr == (struct varlena *) redirect.pointer)
{
struct varlena *result;
result = (struct varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
attr = result;
}
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/*
* This is an expanded-object pointer --- get flat format
*/
attr = detoast_external_attr(attr);
/* flatteners are not allowed to produce compressed/short output */
Assert(!VARATT_IS_EXTENDED(attr));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
*/
attr = toast_decompress_datum(attr);
}
else if (VARATT_IS_SHORT(attr))
{
/*
* This is a short-header varlena --- convert to 4-byte header format
*/
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
Size new_size = data_size + VARHDRSZ;
struct varlena *new_attr;
new_attr = (struct varlena *) palloc(new_size);
SET_VARSIZE(new_attr, new_size);
memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
attr = new_attr;
}
return attr;
}
/* ----------
* detoast_attr_slice -
*
* Public entry point to get back part of a toasted value
* from compression or external storage.
*
* sliceoffset is where to start (zero or more)
* If slicelength < 0, return everything beyond sliceoffset
* ----------
*/
struct varlena *
detoast_attr_slice(struct varlena *attr,
int32 sliceoffset, int32 slicelength)
{
struct varlena *preslice;
struct varlena *result;
char *attrdata;
int32 slicelimit;
int32 attrsize;
if (sliceoffset < 0)
elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
/*
* Compute slicelimit = offset + length, or -1 if we must fetch all of the
* value. In case of integer overflow, we must fetch all.
*/
if (slicelength < 0)
slicelimit = -1;
else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
slicelength = slicelimit = -1;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* fast path for non-compressed external datums */
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
/*
* For compressed values, we need to fetch enough slices to decompress
* at least the requested part (when a prefix is requested).
* Otherwise, just fetch all slices.
*/
if (slicelimit >= 0)
{
int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
/*
* Determine maximum amount of compressed data needed for a prefix
* of a given length (after decompression).
*
* At least for now, if it's LZ4 data, we'll have to fetch the
* whole thing, because there doesn't seem to be an API call to
* determine how much compressed data we need to be sure of being
* able to decompress the required slice.
*/
if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
TOAST_PGLZ_COMPRESSION_ID)
max_size = pglz_maximum_compressed_size(slicelimit, max_size);
/*
* Fetch enough compressed slices (compressed marker will get set
* automatically).
*/
preslice = toast_fetch_datum_slice(attr, 0, max_size);
}
else
preslice = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
return detoast_attr_slice(redirect.pointer,
sliceoffset, slicelength);
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
/* pass it off to detoast_external_attr to flatten */
preslice = detoast_external_attr(attr);
}
else
preslice = attr;
Assert(!VARATT_IS_EXTERNAL(preslice));
if (VARATT_IS_COMPRESSED(preslice))
{
struct varlena *tmp = preslice;
/* Decompress enough to encompass the slice and the offset */
if (slicelimit >= 0)
preslice = toast_decompress_datum_slice(tmp, slicelimit);
else
preslice = toast_decompress_datum(tmp);
if (tmp != attr)
pfree(tmp);
}
if (VARATT_IS_SHORT(preslice))
{
attrdata = VARDATA_SHORT(preslice);
attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
}
else
{
attrdata = VARDATA(preslice);
attrsize = VARSIZE(preslice) - VARHDRSZ;
}
/* slicing of datum for compressed cases and plain value */
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
else if (slicelength < 0 || slicelimit > attrsize)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
SET_VARSIZE(result, slicelength + VARHDRSZ);
memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
if (preslice != attr)
pfree(preslice);
return result;
}
/* ----------
* toast_fetch_datum -
*
* Reconstruct an in memory Datum from the chunks saved
* in the toast relation
* ----------
*/
static struct varlena *
toast_fetch_datum(struct varlena *attr)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
result = (struct varlena *) palloc(attrsize + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
else
SET_VARSIZE(result, attrsize + VARHDRSZ);
if (attrsize == 0)
return result; /* Probably shouldn't happen, but just in
* case. */
/*
* Open the toast relation and its indexes
*/
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, 0, attrsize, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_fetch_datum_slice -
*
* Reconstruct a segment of a Datum from the chunks saved
* in the toast relation
*
* Note that this function supports non-compressed external datums
* and compressed external datums (in which case the requested slice
* has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
static struct varlena *
toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
int32 slicelength)
{
Relation toastrel;
struct varlena *result;
struct varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
/* Must copy to access aligned fields */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
* It's nonsense to fetch slices of a compressed datum unless when it's a
* prefix -- this isn't lo_* we can't return a compressed datum which is
* meaningful to toast later.
*/
Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
/*
* When fetching a prefix of a compressed external datum, account for the
* space required by va_tcinfo, which is stored at the beginning as an
* int32 value.
*/
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
slicelength = slicelength + sizeof(int32);
/*
* Adjust length request if needed. (Note: our sole caller,
* detoast_attr_slice, protects us against sliceoffset + slicelength
* overflowing.)
*/
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
result = (struct varlena *) palloc(slicelength + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
else
SET_VARSIZE(result, slicelength + VARHDRSZ);
if (slicelength == 0)
return result; /* Can save a lot of work at this point! */
/* Open the toast relation */
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, sliceoffset, slicelength,
result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_decompress_datum -
*
* Decompress a compressed version of a varlena datum
*/
static struct varlena *
toast_decompress_datum(struct varlena *attr)
{
ToastCompressionId cmid;
Assert(VARATT_IS_COMPRESSED(attr));
/*
* Fetch the compression method id stored in the compression header and
* decompress the data using the appropriate decompression routine.
*/
cmid = TOAST_COMPRESS_METHOD(attr);
switch (cmid)
{
case TOAST_PGLZ_COMPRESSION_ID:
return pglz_decompress_datum(attr);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum(attr);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
}
}
/* ----------
* toast_decompress_datum_slice -
*
* Decompress the front of a compressed version of a varlena datum.
* offset handling happens in detoast_attr_slice.
* Here we just decompress a slice from the front.
*/
static struct varlena *
toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
{
ToastCompressionId cmid;
Assert(VARATT_IS_COMPRESSED(attr));
/*
* Fetch the compression method id stored in the compression header and
* decompress the data slice using the appropriate decompression routine.
*/
cmid = TOAST_COMPRESS_METHOD(attr);
switch (cmid)
{
case TOAST_PGLZ_COMPRESSION_ID:
return pglz_decompress_datum_slice(attr, slicelength);
case TOAST_LZ4_COMPRESSION_ID:
return lz4_decompress_datum_slice(attr, slicelength);
default:
elog(ERROR, "invalid compression method id %d", cmid);
return NULL; /* keep compiler quiet */
}
}
/* ----------
* toast_raw_datum_size -
*
* Return the raw (detoasted) size of a varlena datum
* (including the VARHDRSZ header)
* ----------
*/
Size
toast_raw_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_COMPRESSED(attr))
{
/* here, va_rawsize is just the payload size */
result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ;
}
else if (VARATT_IS_SHORT(attr))
{
/*
* we have to normalize the header length to VARHDRSZ or else the
* callers of this function will be confused.
*/
result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
}
else
{
/* plain untoasted datum */
result = VARSIZE(attr);
}
return result;
}
/* ----------
* toast_datum_size
*
* Return the physical storage size (possibly compressed) of a varlena datum
* ----------
*/
Size
toast_datum_size(Datum value)
{
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/*
* Attribute is stored externally - return the extsize whether
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*/
struct varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
struct varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
{
result = EOH_get_flat_size(DatumGetEOHP(value));
}
else if (VARATT_IS_SHORT(attr))
{
result = VARSIZE_SHORT(attr);
}
else
{
/*
* Attribute is stored inline either compressed or not, just calculate
* the size of the datum in either case.
*/
result = VARSIZE(attr);
}
return result;
}