postgresql/src/include/nodes/queryjumble.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

114 lines
2.9 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* queryjumble.h
* Query normalization and fingerprinting.
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/include/nodes/queryjumble.h
*
*-------------------------------------------------------------------------
*/
#ifndef QUERYJUMBLE_H
#define QUERYJUMBLE_H
#include "nodes/parsenodes.h"
/*
* Struct for tracking locations/lengths of constants during normalization
*/
typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
Introduce squashing of constant lists in query jumbling pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on the number of parameters, because every element of ArrayExpr is individually jumbled. Most of the time that's undesirable, especially if the list becomes too large. Fix this by introducing a new GUC query_id_squash_values which modifies the node jumbling code to only consider the first and last element of a list of constants, rather than each list element individually. This affects both the query_id generated by query jumbling, as well as pg_stat_statements query normalization so that it suppresses printing of the individual elements of such a list. The default value is off, meaning the previous behavior is maintained. Author: Dmitry Dolgov <9erthalion6@gmail.com> Reviewed-by: Sergey Dudoladov (mysterious, off-list) Reviewed-by: David Geier <geidav.pg@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Sami Imseih <samimseih@gmail.com> Reviewed-by: Sutou Kouhei <kou@clear-code.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Marcos Pegoraro <marcos@f10.com.br> Reviewed-by: Julien Rouhaud <rjuju123@gmail.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Tested-by: Yasuo Honda <yasuo.honda@gmail.com> Tested-by: Sergei Kornilov <sk@zsrv.org> Tested-by: Maciek Sakrejda <m.sakrejda@gmail.com> Tested-by: Chengxi Sun <sunchengxi@highgo.com> Tested-by: Jakub Wartak <jakub.wartak@enterprisedb.com> Discussion: https://postgr.es/m/CA+q6zcWtUbT_Sxj0V6HY6EZ89uv5wuG5aefpe_9n0Jr3VwntFg@mail.gmail.com
2025-03-18 13:56:11 -04:00
/* Does this location represent a squashed list? */
Introduce squashing of constant lists in query jumbling pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on the number of parameters, because every element of ArrayExpr is individually jumbled. Most of the time that's undesirable, especially if the list becomes too large. Fix this by introducing a new GUC query_id_squash_values which modifies the node jumbling code to only consider the first and last element of a list of constants, rather than each list element individually. This affects both the query_id generated by query jumbling, as well as pg_stat_statements query normalization so that it suppresses printing of the individual elements of such a list. The default value is off, meaning the previous behavior is maintained. Author: Dmitry Dolgov <9erthalion6@gmail.com> Reviewed-by: Sergey Dudoladov (mysterious, off-list) Reviewed-by: David Geier <geidav.pg@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Sami Imseih <samimseih@gmail.com> Reviewed-by: Sutou Kouhei <kou@clear-code.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Marcos Pegoraro <marcos@f10.com.br> Reviewed-by: Julien Rouhaud <rjuju123@gmail.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Tested-by: Yasuo Honda <yasuo.honda@gmail.com> Tested-by: Sergei Kornilov <sk@zsrv.org> Tested-by: Maciek Sakrejda <m.sakrejda@gmail.com> Tested-by: Chengxi Sun <sunchengxi@highgo.com> Tested-by: Jakub Wartak <jakub.wartak@enterprisedb.com> Discussion: https://postgr.es/m/CA+q6zcWtUbT_Sxj0V6HY6EZ89uv5wuG5aefpe_9n0Jr3VwntFg@mail.gmail.com
2025-03-18 13:56:11 -04:00
bool squashed;
/* Is this location a PARAM_EXTERN parameter? */
bool extern_param;
} LocationLen;
/*
* Working state for computing a query jumble and producing a normalized
* query string
*/
typedef struct JumbleState
{
/* Jumble of current query tree */
unsigned char *jumble;
/* Number of bytes used in jumble[] */
Size jumble_len;
/* Array of locations of constants that should be removed */
LocationLen *clocations;
/* Allocated length of clocations array */
int clocations_buf_size;
/* Current number of valid entries in clocations array */
int clocations_count;
/*
* ID of the highest PARAM_EXTERN parameter we've seen in the query; used
* to start normalization correctly. However, if there are any squashed
* lists in the query, we disregard query-supplied parameter numbers and
* renumber everything. This is to avoid possible gaps caused by
* squashing in case any params are in squashed lists.
*/
int highest_extern_param_id;
/* Whether squashable lists are present */
bool has_squashed_lists;
/*
* Count of the number of NULL nodes seen since last appending a value.
* These are flushed out to the jumble buffer before subsequent appends
* and before performing the final jumble hash.
*/
unsigned int pending_nulls;
#ifdef USE_ASSERT_CHECKING
/* The total number of bytes added to the jumble buffer */
Size total_jumble_len;
#endif
} JumbleState;
/* Values for the compute_query_id GUC */
enum ComputeQueryIdType
{
COMPUTE_QUERY_ID_OFF,
COMPUTE_QUERY_ID_ON,
COMPUTE_QUERY_ID_AUTO,
COMPUTE_QUERY_ID_REGRESS,
};
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
/*
* Returns whether query identifier computation has been enabled, either
* directly in the GUC or by a module when the setting is 'auto'.
*/
static inline bool
IsQueryIdEnabled(void)
{
if (compute_query_id == COMPUTE_QUERY_ID_OFF)
return false;
if (compute_query_id == COMPUTE_QUERY_ID_ON)
return true;
return query_id_enabled;
}
#endif /* QUERYJUMBLE_H */