1996-07-09 02:22:35 -04:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
1999-02-13 18:22:53 -05:00
|
|
|
* bootstrap.c
|
1997-09-07 01:04:48 -04:00
|
|
|
* routines to support running postgres in 'bootstrap' mode
|
|
|
|
|
* bootstrap mode is used to create the initial template database
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
2018-01-02 23:30:12 -05:00
|
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2000-01-26 00:58:53 -05:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
|
|
|
|
* IDENTIFICATION
|
2010-09-20 16:08:53 -04:00
|
|
|
* src/backend/bootstrap/bootstrap.c
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
2000-11-04 07:43:24 -05:00
|
|
|
#include "postgres.h"
|
|
|
|
|
|
1999-07-15 23:14:30 -04:00
|
|
|
#include <unistd.h>
|
1996-10-23 03:42:13 -04:00
|
|
|
#include <signal.h>
|
1998-04-26 00:12:15 -04:00
|
|
|
|
2012-08-30 16:15:44 -04:00
|
|
|
#include "access/htup_details.h"
|
2017-04-14 17:51:25 -04:00
|
|
|
#include "access/xact.h"
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 01:03:48 -04:00
|
|
|
#include "access/xlog_internal.h"
|
1998-04-26 00:12:15 -04:00
|
|
|
#include "bootstrap/bootstrap.h"
|
|
|
|
|
#include "catalog/index.h"
|
2011-02-08 16:04:18 -05:00
|
|
|
#include "catalog/pg_collation.h"
|
1998-04-26 00:12:15 -04:00
|
|
|
#include "catalog/pg_type.h"
|
|
|
|
|
#include "libpq/pqsignal.h"
|
1999-07-16 01:00:38 -04:00
|
|
|
#include "miscadmin.h"
|
2005-04-14 16:03:27 -04:00
|
|
|
#include "nodes/makefuncs.h"
|
2014-02-15 14:31:30 -05:00
|
|
|
#include "pg_getopt.h"
|
2016-03-10 12:44:09 -05:00
|
|
|
#include "pgstat.h"
|
2004-05-29 18:48:23 -04:00
|
|
|
#include "postmaster/bgwriter.h"
|
2011-11-02 10:25:01 -04:00
|
|
|
#include "postmaster/startup.h"
|
2007-07-24 00:54:09 -04:00
|
|
|
#include "postmaster/walwriter.h"
|
2010-01-15 04:19:10 -05:00
|
|
|
#include "replication/walreceiver.h"
|
2008-05-11 20:00:54 -04:00
|
|
|
#include "storage/bufmgr.h"
|
2013-04-30 07:27:12 -04:00
|
|
|
#include "storage/bufpage.h"
|
2016-11-22 14:26:40 -05:00
|
|
|
#include "storage/condition_variable.h"
|
2002-05-04 20:03:29 -04:00
|
|
|
#include "storage/ipc.h"
|
2011-09-04 01:13:16 -04:00
|
|
|
#include "storage/proc.h"
|
1998-04-26 00:12:15 -04:00
|
|
|
#include "tcop/tcopprot.h"
|
|
|
|
|
#include "utils/builtins.h"
|
2000-05-28 13:56:29 -04:00
|
|
|
#include "utils/fmgroids.h"
|
2005-05-06 13:24:55 -04:00
|
|
|
#include "utils/memutils.h"
|
2004-05-28 01:13:32 -04:00
|
|
|
#include "utils/ps_status.h"
|
2011-02-23 12:18:09 -05:00
|
|
|
#include "utils/rel.h"
|
2010-02-07 15:48:13 -05:00
|
|
|
#include "utils/relmapper.h"
|
2008-03-26 17:10:39 -04:00
|
|
|
#include "utils/tqual.h"
|
2000-06-27 23:33:33 -04:00
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:18:54 -04:00
|
|
|
uint32 bootstrap_data_checksum_version = 0; /* No checksum */
|
2013-03-22 09:54:07 -04:00
|
|
|
|
1996-10-21 04:31:23 -04:00
|
|
|
|
2016-08-30 18:22:43 -04:00
|
|
|
#define ALLOC(t, c) \
|
|
|
|
|
((t *) MemoryContextAllocZero(TopMemoryContext, (unsigned)(c) * sizeof(t)))
|
1996-07-09 02:22:35 -04:00
|
|
|
|
2007-03-07 08:35:03 -05:00
|
|
|
static void CheckerModeMain(void);
|
|
|
|
|
static void BootstrapModeMain(void);
|
2004-05-29 18:48:23 -04:00
|
|
|
static void bootstrap_signals(void);
|
2007-03-07 08:35:03 -05:00
|
|
|
static void ShutdownAuxiliaryProcess(int code, Datum arg);
|
1998-08-31 23:29:17 -04:00
|
|
|
static Form_pg_attribute AllocateAttribute(void);
|
1998-08-24 15:04:04 -04:00
|
|
|
static Oid gettype(char *type);
|
1997-09-07 22:41:22 -04:00
|
|
|
static void cleanup(void);
|
1996-10-23 03:42:13 -04:00
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* global variables
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
2002-04-27 17:24:34 -04:00
|
|
|
|
2013-05-29 16:58:43 -04:00
|
|
|
AuxProcType MyAuxProcType = NotAnAuxProcess; /* declared in miscadmin.h */
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
|
2002-04-27 17:24:34 -04:00
|
|
|
Relation boot_reldesc; /* current relation descriptor */
|
|
|
|
|
|
2009-09-26 21:32:11 -04:00
|
|
|
Form_pg_attribute attrtypes[MAXATTR]; /* points to attribute info */
|
|
|
|
|
int numattr; /* number of attributes for cur. rel */
|
1996-07-09 02:22:35 -04:00
|
|
|
|
|
|
|
|
|
2009-09-26 21:32:11 -04:00
|
|
|
/*
|
1996-07-09 02:22:35 -04:00
|
|
|
* Basic information associated with each type. This is used before
|
2010-09-02 21:34:55 -04:00
|
|
|
* pg_type is filled, so it has to cover the datatypes used as column types
|
|
|
|
|
* in the core "bootstrapped" catalogs.
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
1997-09-07 01:04:48 -04:00
|
|
|
* XXX several of these input/output functions do catalog scans
|
2014-05-06 12:12:18 -04:00
|
|
|
* (e.g., F_REGPROCIN scans pg_proc). this obviously creates some
|
1997-09-07 01:04:48 -04:00
|
|
|
* order dependencies in the catalog creation process.
|
1996-07-09 02:22:35 -04:00
|
|
|
*/
|
1997-09-07 01:04:48 -04:00
|
|
|
struct typinfo
|
|
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
char name[NAMEDATALEN];
|
|
|
|
|
Oid oid;
|
|
|
|
|
Oid elem;
|
|
|
|
|
int16 len;
|
2004-04-01 16:28:47 -05:00
|
|
|
bool byval;
|
|
|
|
|
char align;
|
|
|
|
|
char storage;
|
2011-02-08 16:04:18 -05:00
|
|
|
Oid collation;
|
1997-09-07 22:41:22 -04:00
|
|
|
Oid inproc;
|
|
|
|
|
Oid outproc;
|
1996-07-09 02:22:35 -04:00
|
|
|
};
|
|
|
|
|
|
2004-04-01 16:28:47 -05:00
|
|
|
static const struct typinfo TypInfo[] = {
|
2011-02-08 16:04:18 -05:00
|
|
|
{"bool", BOOLOID, 0, 1, true, 'c', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_BOOLIN, F_BOOLOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"bytea", BYTEAOID, 0, -1, false, 'i', 'x', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_BYTEAIN, F_BYTEAOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"char", CHAROID, 0, 1, true, 'c', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_CHARIN, F_CHAROUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"int2", INT2OID, 0, 2, true, 's', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_INT2IN, F_INT2OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"int4", INT4OID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_INT4IN, F_INT4OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"float4", FLOAT4OID, 0, 4, FLOAT4PASSBYVAL, 'i', 'p', InvalidOid,
|
2007-01-21 20:35:23 -05:00
|
|
|
F_FLOAT4IN, F_FLOAT4OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"name", NAMEOID, CHAROID, NAMEDATALEN, false, 'c', 'p', InvalidOid,
|
2007-01-21 20:35:23 -05:00
|
|
|
F_NAMEIN, F_NAMEOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"regclass", REGCLASSOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_REGCLASSIN, F_REGCLASSOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"regproc", REGPROCOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2007-01-21 20:35:23 -05:00
|
|
|
F_REGPROCIN, F_REGPROCOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"regtype", REGTYPEOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_REGTYPEIN, F_REGTYPEOUT},
|
2015-05-09 13:06:49 -04:00
|
|
|
{"regrole", REGROLEOID, 0, 4, true, 'i', 'p', InvalidOid,
|
|
|
|
|
F_REGROLEIN, F_REGROLEOUT},
|
2015-05-09 13:36:52 -04:00
|
|
|
{"regnamespace", REGNAMESPACEOID, 0, 4, true, 'i', 'p', InvalidOid,
|
|
|
|
|
F_REGNAMESPACEIN, F_REGNAMESPACEOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"text", TEXTOID, 0, -1, false, 'i', 'x', DEFAULT_COLLATION_OID,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_TEXTIN, F_TEXTOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"oid", OIDOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_OIDIN, F_OIDOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"tid", TIDOID, 0, 6, false, 's', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_TIDIN, F_TIDOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"xid", XIDOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_XIDIN, F_XIDOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"cid", CIDOID, 0, 4, true, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_CIDIN, F_CIDOUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"pg_node_tree", PGNODETREEOID, 0, -1, false, 'i', 'x', DEFAULT_COLLATION_OID,
|
2010-09-02 21:34:55 -04:00
|
|
|
F_PG_NODE_TREE_IN, F_PG_NODE_TREE_OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"int2vector", INT2VECTOROID, INT2OID, -1, false, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_INT2VECTORIN, F_INT2VECTOROUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"oidvector", OIDVECTOROID, OIDOID, -1, false, 'i', 'p', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_OIDVECTORIN, F_OIDVECTOROUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"_int4", INT4ARRAYOID, INT4OID, -1, false, 'i', 'x', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_ARRAY_IN, F_ARRAY_OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"_text", 1009, TEXTOID, -1, false, 'i', 'x', DEFAULT_COLLATION_OID,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_ARRAY_IN, F_ARRAY_OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"_oid", 1028, OIDOID, -1, false, 'i', 'x', InvalidOid,
|
2005-03-29 14:44:23 -05:00
|
|
|
F_ARRAY_IN, F_ARRAY_OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"_char", 1002, CHAROID, -1, false, 'i', 'x', InvalidOid,
|
2005-03-29 14:44:23 -05:00
|
|
|
F_ARRAY_IN, F_ARRAY_OUT},
|
2011-02-08 16:04:18 -05:00
|
|
|
{"_aclitem", 1034, ACLITEMOID, -1, false, 'i', 'x', InvalidOid,
|
2004-08-29 01:07:03 -04:00
|
|
|
F_ARRAY_IN, F_ARRAY_OUT}
|
1996-07-09 02:22:35 -04:00
|
|
|
};
|
|
|
|
|
|
2004-08-29 01:07:03 -04:00
|
|
|
static const int n_types = sizeof(TypInfo) / sizeof(struct typinfo);
|
1996-07-09 02:22:35 -04:00
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
struct typmap
|
|
|
|
|
{ /* a hack */
|
1997-09-07 22:41:22 -04:00
|
|
|
Oid am_oid;
|
1998-08-31 23:29:17 -04:00
|
|
|
FormData_pg_type am_typ;
|
1996-07-09 02:22:35 -04:00
|
|
|
};
|
|
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
static struct typmap **Typ = NULL;
|
|
|
|
|
static struct typmap *Ap = NULL;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2009-09-26 21:32:11 -04:00
|
|
|
static Datum values[MAXATTR]; /* current row's attribute values */
|
2008-11-01 21:45:28 -04:00
|
|
|
static bool Nulls[MAXATTR];
|
1997-09-07 01:04:48 -04:00
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:18:54 -04:00
|
|
|
static MemoryContext nogc = NULL; /* special no-gc mem context */
|
1997-09-07 01:04:48 -04:00
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
/*
|
1997-09-07 01:04:48 -04:00
|
|
|
* At bootstrap time, we first declare all the indices to be built, and
|
|
|
|
|
* then build them. The IndexList structure stores enough information
|
|
|
|
|
* to allow us to build the indices after they've been declared.
|
1996-07-09 02:22:35 -04:00
|
|
|
*/
|
|
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
typedef struct _IndexList
|
|
|
|
|
{
|
2002-03-26 14:17:02 -05:00
|
|
|
Oid il_heap;
|
|
|
|
|
Oid il_ind;
|
2000-07-14 18:18:02 -04:00
|
|
|
IndexInfo *il_info;
|
1997-09-07 01:04:48 -04:00
|
|
|
struct _IndexList *il_next;
|
1997-09-08 17:56:23 -04:00
|
|
|
} IndexList;
|
1996-07-09 02:22:35 -04:00
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
static IndexList *ILHead = NULL;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
|
2004-05-28 01:13:32 -04:00
|
|
|
/*
|
2007-03-07 08:35:03 -05:00
|
|
|
* AuxiliaryProcessMain
|
2004-05-28 01:13:32 -04:00
|
|
|
*
|
2007-03-07 08:35:03 -05:00
|
|
|
* The main entry point for auxiliary processes, such as the bgwriter,
|
2010-01-15 04:19:10 -05:00
|
|
|
* walwriter, walreceiver, bootstrapper and the shared memory checker code.
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
2007-03-07 08:35:03 -05:00
|
|
|
* This code is here just because of historical reasons.
|
1996-07-09 02:22:35 -04:00
|
|
|
*/
|
2007-03-07 08:35:03 -05:00
|
|
|
void
|
|
|
|
|
AuxiliaryProcessMain(int argc, char *argv[])
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
2005-01-14 16:08:44 -05:00
|
|
|
char *progname = argv[0];
|
1997-09-07 22:41:22 -04:00
|
|
|
int flag;
|
2004-10-07 21:36:36 -04:00
|
|
|
char *userDoption = NULL;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2001-03-22 01:16:21 -05:00
|
|
|
/*
|
2015-01-13 07:12:37 -05:00
|
|
|
* Initialize process environment (already done if under postmaster, but
|
|
|
|
|
* not if standalone).
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
2015-01-13 07:12:37 -05:00
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
|
InitStandaloneProcess(argv[0]);
|
2004-07-31 13:57:11 -04:00
|
|
|
|
2001-03-22 01:16:21 -05:00
|
|
|
/*
|
|
|
|
|
* process command arguments
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
|
|
|
|
|
2016-01-28 02:47:36 -05:00
|
|
|
/* Set defaults, to be overridden by explicit options below */
|
2003-12-24 22:52:51 -05:00
|
|
|
if (!IsUnderPostmaster)
|
2002-05-16 21:19:19 -04:00
|
|
|
InitializeGUCOptions();
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2006-06-18 11:38:37 -04:00
|
|
|
/* Ignore the initial --boot argument, if present */
|
|
|
|
|
if (argc > 1 && strcmp(argv[1], "--boot") == 0)
|
2004-05-28 01:13:32 -04:00
|
|
|
{
|
|
|
|
|
argv++;
|
|
|
|
|
argc--;
|
|
|
|
|
}
|
|
|
|
|
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
/* If no -x argument, we are a CheckerProcess */
|
|
|
|
|
MyAuxProcType = CheckerProcess;
|
|
|
|
|
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 01:03:48 -04:00
|
|
|
while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:x:X:-:")) != -1)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
|
|
|
|
switch (flag)
|
|
|
|
|
{
|
2006-01-05 05:07:46 -05:00
|
|
|
case 'B':
|
|
|
|
|
SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
|
|
|
|
|
break;
|
1997-09-07 22:41:22 -04:00
|
|
|
case 'D':
|
2016-08-30 18:22:43 -04:00
|
|
|
userDoption = pstrdup(optarg);
|
1997-09-07 22:41:22 -04:00
|
|
|
break;
|
|
|
|
|
case 'd':
|
2002-09-04 16:31:48 -04:00
|
|
|
{
|
|
|
|
|
/* Turn on debugging for the bootstrap process. */
|
2013-10-13 00:09:18 -04:00
|
|
|
char *debugstr;
|
2002-09-04 16:31:48 -04:00
|
|
|
|
2013-10-13 00:09:18 -04:00
|
|
|
debugstr = psprintf("debug%s", optarg);
|
2002-11-14 18:53:27 -05:00
|
|
|
SetConfigOption("log_min_messages", debugstr,
|
2002-09-04 16:31:48 -04:00
|
|
|
PGC_POSTMASTER, PGC_S_ARGV);
|
|
|
|
|
SetConfigOption("client_min_messages", debugstr,
|
|
|
|
|
PGC_POSTMASTER, PGC_S_ARGV);
|
|
|
|
|
pfree(debugstr);
|
|
|
|
|
}
|
1997-09-07 22:41:22 -04:00
|
|
|
break;
|
|
|
|
|
case 'F':
|
2002-02-22 20:31:37 -05:00
|
|
|
SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
|
1997-09-07 22:41:22 -04:00
|
|
|
break;
|
2013-03-22 09:54:07 -04:00
|
|
|
case 'k':
|
2013-04-30 07:27:12 -04:00
|
|
|
bootstrap_data_checksum_version = PG_DATA_CHECKSUM_VERSION;
|
2013-03-22 09:54:07 -04:00
|
|
|
break;
|
2006-01-05 05:07:46 -05:00
|
|
|
case 'r':
|
2007-02-10 09:58:55 -05:00
|
|
|
strlcpy(OutputFileName, optarg, MAXPGPATH);
|
2001-08-24 20:31:17 -04:00
|
|
|
break;
|
1999-10-06 17:58:18 -04:00
|
|
|
case 'x':
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
MyAuxProcType = atoi(optarg);
|
1999-10-06 17:58:18 -04:00
|
|
|
break;
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 01:03:48 -04:00
|
|
|
case 'X':
|
|
|
|
|
{
|
|
|
|
|
int WalSegSz = strtoul(optarg, NULL, 0);
|
|
|
|
|
|
|
|
|
|
if (!IsValidWalSegSize(WalSegSz))
|
|
|
|
|
ereport(ERROR,
|
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
|
errmsg("-X requires a power of 2 value between 1MB and 1GB")));
|
|
|
|
|
SetConfigOption("wal_segment_size", optarg, PGC_INTERNAL,
|
|
|
|
|
PGC_S_OVERRIDE);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2003-07-14 20:11:14 -04:00
|
|
|
case 'c':
|
|
|
|
|
case '-':
|
|
|
|
|
{
|
|
|
|
|
char *name,
|
|
|
|
|
*value;
|
|
|
|
|
|
|
|
|
|
ParseLongOption(optarg, &name, &value);
|
|
|
|
|
if (!value)
|
|
|
|
|
{
|
|
|
|
|
if (flag == '-')
|
2003-07-22 19:30:39 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
|
errmsg("--%s requires a value",
|
|
|
|
|
optarg)));
|
2003-07-14 20:11:14 -04:00
|
|
|
else
|
2003-07-22 19:30:39 -04:00
|
|
|
ereport(ERROR,
|
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
|
errmsg("-c %s requires a value",
|
|
|
|
|
optarg)));
|
2003-07-14 20:11:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
|
|
|
|
|
free(name);
|
|
|
|
|
if (value)
|
|
|
|
|
free(value);
|
|
|
|
|
break;
|
|
|
|
|
}
|
1997-09-07 22:41:22 -04:00
|
|
|
default:
|
2006-06-18 11:38:37 -04:00
|
|
|
write_stderr("Try \"%s --help\" for more information.\n",
|
|
|
|
|
progname);
|
|
|
|
|
proc_exit(1);
|
1997-09-07 22:41:22 -04:00
|
|
|
break;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
2003-05-06 19:34:56 -04:00
|
|
|
}
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2007-02-15 21:10:07 -05:00
|
|
|
if (argc != optind)
|
2006-06-18 11:38:37 -04:00
|
|
|
{
|
|
|
|
|
write_stderr("%s: invalid command-line arguments\n", progname);
|
|
|
|
|
proc_exit(1);
|
|
|
|
|
}
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2004-05-28 01:13:32 -04:00
|
|
|
/*
|
|
|
|
|
* Identify myself via ps
|
|
|
|
|
*/
|
2003-09-02 15:04:12 -04:00
|
|
|
if (IsUnderPostmaster)
|
|
|
|
|
{
|
2004-05-28 01:13:32 -04:00
|
|
|
const char *statmsg;
|
2003-12-24 22:52:51 -05:00
|
|
|
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
switch (MyAuxProcType)
|
2004-05-28 01:13:32 -04:00
|
|
|
{
|
2007-03-07 08:35:03 -05:00
|
|
|
case StartupProcess:
|
2017-09-19 21:15:09 -04:00
|
|
|
statmsg = pgstat_get_backend_desc(B_STARTUP);
|
2004-05-28 01:13:32 -04:00
|
|
|
break;
|
2007-03-07 08:35:03 -05:00
|
|
|
case BgWriterProcess:
|
2017-09-19 21:15:09 -04:00
|
|
|
statmsg = pgstat_get_backend_desc(B_BG_WRITER);
|
2004-05-28 01:13:32 -04:00
|
|
|
break;
|
2011-11-01 13:14:47 -04:00
|
|
|
case CheckpointerProcess:
|
2017-09-19 21:15:09 -04:00
|
|
|
statmsg = pgstat_get_backend_desc(B_CHECKPOINTER);
|
2011-11-01 13:14:47 -04:00
|
|
|
break;
|
2007-07-24 00:54:09 -04:00
|
|
|
case WalWriterProcess:
|
2017-09-19 21:15:09 -04:00
|
|
|
statmsg = pgstat_get_backend_desc(B_WAL_WRITER);
|
2007-07-24 00:54:09 -04:00
|
|
|
break;
|
2010-01-15 04:19:10 -05:00
|
|
|
case WalReceiverProcess:
|
2017-09-19 21:15:09 -04:00
|
|
|
statmsg = pgstat_get_backend_desc(B_WAL_RECEIVER);
|
2010-01-15 04:19:10 -05:00
|
|
|
break;
|
2004-05-28 01:13:32 -04:00
|
|
|
default:
|
2004-05-29 18:48:23 -04:00
|
|
|
statmsg = "??? process";
|
2004-05-28 01:13:32 -04:00
|
|
|
break;
|
|
|
|
|
}
|
2006-06-27 18:16:44 -04:00
|
|
|
init_ps_display(statmsg, "", "", "");
|
2003-09-02 15:04:12 -04:00
|
|
|
}
|
2004-05-28 01:13:32 -04:00
|
|
|
|
|
|
|
|
/* Acquire configuration parameters, unless inherited from postmaster */
|
|
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
|
{
|
2005-01-14 16:08:44 -05:00
|
|
|
if (!SelectConfigFiles(userDoption, progname))
|
2004-10-07 21:36:36 -04:00
|
|
|
proc_exit(1);
|
2004-05-28 01:13:32 -04:00
|
|
|
}
|
2004-05-21 01:08:06 -04:00
|
|
|
|
2004-10-07 21:36:36 -04:00
|
|
|
/* Validate we have been given a reasonable-looking DataDir */
|
|
|
|
|
Assert(DataDir);
|
|
|
|
|
ValidatePgVersion(DataDir);
|
|
|
|
|
|
2005-07-04 00:51:52 -04:00
|
|
|
/* Change into DataDir (if under postmaster, should be done already) */
|
|
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
|
ChangeToDataDir();
|
|
|
|
|
|
2004-05-29 18:48:23 -04:00
|
|
|
/* If standalone, create lockfile for data directory */
|
|
|
|
|
if (!IsUnderPostmaster)
|
2005-07-04 00:51:52 -04:00
|
|
|
CreateDataDirLockFile(false);
|
1999-10-06 17:58:18 -04:00
|
|
|
|
XLOG (and related) changes:
* Store two past checkpoint locations, not just one, in pg_control.
On startup, we fall back to the older checkpoint if the newer one
is unreadable. Also, a physical copy of the newest checkpoint record
is kept in pg_control for possible use in disaster recovery (ie,
complete loss of pg_xlog). Also add a version number for pg_control
itself. Remove archdir from pg_control; it ought to be a GUC
parameter, not a special case (not that it's implemented yet anyway).
* Suppress successive checkpoint records when nothing has been entered
in the WAL log since the last one. This is not so much to avoid I/O
as to make it actually useful to keep track of the last two
checkpoints. If the things are right next to each other then there's
not a lot of redundancy gained...
* Change CRC scheme to a true 64-bit CRC, not a pair of 32-bit CRCs
on alternate bytes. Polynomial borrowed from ECMA DLT1 standard.
* Fix XLOG record length handling so that it will work at BLCKSZ = 32k.
* Change XID allocation to work more like OID allocation. (This is of
dubious necessity, but I think it's a good idea anyway.)
* Fix a number of minor bugs, such as off-by-one logic for XLOG file
wraparound at the 4 gig mark.
* Add documentation and clean up some coding infelicities; move file
format declarations out to include files where planned contrib
utilities can get at them.
* Checkpoint will now occur every CHECKPOINT_SEGMENTS log segments or
every CHECKPOINT_TIMEOUT seconds, whichever comes first. It is also
possible to force a checkpoint by sending SIGUSR1 to the postmaster
(undocumented feature...)
* Defend against kill -9 postmaster by storing shmem block's key and ID
in postmaster.pid lockfile, and checking at startup to ensure that no
processes are still connected to old shmem block (if it still exists).
* Switch backends to accept SIGQUIT rather than SIGUSR1 for emergency
stop, for symmetry with postmaster and xlog utilities. Clean up signal
handling in bootstrap.c so that xlog utilities launched by postmaster
will react to signals better.
* Standalone bootstrap now grabs lockfile in target directory, as added
insurance against running it in parallel with live postmaster.
2001-03-12 20:17:06 -05:00
|
|
|
SetProcessingMode(BootstrapProcessing);
|
2006-01-05 05:07:46 -05:00
|
|
|
IgnoreSystemIndexes = true;
|
XLOG (and related) changes:
* Store two past checkpoint locations, not just one, in pg_control.
On startup, we fall back to the older checkpoint if the newer one
is unreadable. Also, a physical copy of the newest checkpoint record
is kept in pg_control for possible use in disaster recovery (ie,
complete loss of pg_xlog). Also add a version number for pg_control
itself. Remove archdir from pg_control; it ought to be a GUC
parameter, not a special case (not that it's implemented yet anyway).
* Suppress successive checkpoint records when nothing has been entered
in the WAL log since the last one. This is not so much to avoid I/O
as to make it actually useful to keep track of the last two
checkpoints. If the things are right next to each other then there's
not a lot of redundancy gained...
* Change CRC scheme to a true 64-bit CRC, not a pair of 32-bit CRCs
on alternate bytes. Polynomial borrowed from ECMA DLT1 standard.
* Fix XLOG record length handling so that it will work at BLCKSZ = 32k.
* Change XID allocation to work more like OID allocation. (This is of
dubious necessity, but I think it's a good idea anyway.)
* Fix a number of minor bugs, such as off-by-one logic for XLOG file
wraparound at the 4 gig mark.
* Add documentation and clean up some coding infelicities; move file
format declarations out to include files where planned contrib
utilities can get at them.
* Checkpoint will now occur every CHECKPOINT_SEGMENTS log segments or
every CHECKPOINT_TIMEOUT seconds, whichever comes first. It is also
possible to force a checkpoint by sending SIGUSR1 to the postmaster
(undocumented feature...)
* Defend against kill -9 postmaster by storing shmem block's key and ID
in postmaster.pid lockfile, and checking at startup to ensure that no
processes are still connected to old shmem block (if it still exists).
* Switch backends to accept SIGQUIT rather than SIGUSR1 for emergency
stop, for symmetry with postmaster and xlog utilities. Clean up signal
handling in bootstrap.c so that xlog utilities launched by postmaster
will react to signals better.
* Standalone bootstrap now grabs lockfile in target directory, as added
insurance against running it in parallel with live postmaster.
2001-03-12 20:17:06 -05:00
|
|
|
|
2013-01-02 16:39:20 -05:00
|
|
|
/* Initialize MaxBackends (if under postmaster, was done already) */
|
|
|
|
|
if (!IsUnderPostmaster)
|
|
|
|
|
InitializeMaxBackends();
|
|
|
|
|
|
XLOG (and related) changes:
* Store two past checkpoint locations, not just one, in pg_control.
On startup, we fall back to the older checkpoint if the newer one
is unreadable. Also, a physical copy of the newest checkpoint record
is kept in pg_control for possible use in disaster recovery (ie,
complete loss of pg_xlog). Also add a version number for pg_control
itself. Remove archdir from pg_control; it ought to be a GUC
parameter, not a special case (not that it's implemented yet anyway).
* Suppress successive checkpoint records when nothing has been entered
in the WAL log since the last one. This is not so much to avoid I/O
as to make it actually useful to keep track of the last two
checkpoints. If the things are right next to each other then there's
not a lot of redundancy gained...
* Change CRC scheme to a true 64-bit CRC, not a pair of 32-bit CRCs
on alternate bytes. Polynomial borrowed from ECMA DLT1 standard.
* Fix XLOG record length handling so that it will work at BLCKSZ = 32k.
* Change XID allocation to work more like OID allocation. (This is of
dubious necessity, but I think it's a good idea anyway.)
* Fix a number of minor bugs, such as off-by-one logic for XLOG file
wraparound at the 4 gig mark.
* Add documentation and clean up some coding infelicities; move file
format declarations out to include files where planned contrib
utilities can get at them.
* Checkpoint will now occur every CHECKPOINT_SEGMENTS log segments or
every CHECKPOINT_TIMEOUT seconds, whichever comes first. It is also
possible to force a checkpoint by sending SIGUSR1 to the postmaster
(undocumented feature...)
* Defend against kill -9 postmaster by storing shmem block's key and ID
in postmaster.pid lockfile, and checking at startup to ensure that no
processes are still connected to old shmem block (if it still exists).
* Switch backends to accept SIGQUIT rather than SIGUSR1 for emergency
stop, for symmetry with postmaster and xlog utilities. Clean up signal
handling in bootstrap.c so that xlog utilities launched by postmaster
will react to signals better.
* Standalone bootstrap now grabs lockfile in target directory, as added
insurance against running it in parallel with live postmaster.
2001-03-12 20:17:06 -05:00
|
|
|
BaseInit();
|
|
|
|
|
|
2005-08-07 23:12:16 -04:00
|
|
|
/*
|
2007-03-07 08:35:03 -05:00
|
|
|
* When we are an auxiliary process, we aren't going to do the full
|
2006-10-03 20:30:14 -04:00
|
|
|
* InitPostgres pushups, but there are a couple of things that need to get
|
2007-03-07 08:35:03 -05:00
|
|
|
* lit up even in an auxiliary process.
|
2005-08-07 23:12:16 -04:00
|
|
|
*/
|
2002-09-25 16:31:40 -04:00
|
|
|
if (IsUnderPostmaster)
|
2003-11-19 10:55:08 -05:00
|
|
|
{
|
2006-01-04 16:06:32 -05:00
|
|
|
/*
|
|
|
|
|
* Create a PGPROC so we can use LWLocks. In the EXEC_BACKEND case,
|
|
|
|
|
* this was already done by SubPostmasterMain().
|
|
|
|
|
*/
|
|
|
|
|
#ifndef EXEC_BACKEND
|
2007-03-07 08:35:03 -05:00
|
|
|
InitAuxiliaryProcess();
|
2006-01-04 16:06:32 -05:00
|
|
|
#endif
|
2005-08-07 23:12:16 -04:00
|
|
|
|
2009-07-31 16:26:23 -04:00
|
|
|
/*
|
2014-05-06 12:12:18 -04:00
|
|
|
* Assign the ProcSignalSlot for an auxiliary process. Since it
|
2009-07-31 16:26:23 -04:00
|
|
|
* doesn't have a BackendId, the slot is statically allocated based on
|
2014-05-06 12:12:18 -04:00
|
|
|
* the auxiliary process type (MyAuxProcType). Backends use slots
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
* indexed in the range from 1 to MaxBackends (inclusive), so we use
|
2009-07-31 16:26:23 -04:00
|
|
|
* MaxBackends + AuxProcType + 1 as the index of the slot for an
|
|
|
|
|
* auxiliary process.
|
|
|
|
|
*
|
|
|
|
|
* This will need rethinking if we ever want more than one of a
|
|
|
|
|
* particular auxiliary process type.
|
|
|
|
|
*/
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
ProcSignalInit(MaxBackends + MyAuxProcType + 1);
|
2009-07-31 16:26:23 -04:00
|
|
|
|
2005-08-07 23:12:16 -04:00
|
|
|
/* finish setting up bufmgr.c */
|
|
|
|
|
InitBufferPoolBackend();
|
2006-06-08 19:55:48 -04:00
|
|
|
|
2017-03-26 22:02:22 -04:00
|
|
|
/* Initialize backend status information */
|
|
|
|
|
pgstat_initialize();
|
|
|
|
|
pgstat_bestart();
|
|
|
|
|
|
2013-12-18 12:57:20 -05:00
|
|
|
/* register a before-shutdown callback for LWLock cleanup */
|
|
|
|
|
before_shmem_exit(ShutdownAuxiliaryProcess, 0);
|
2003-11-19 10:55:08 -05:00
|
|
|
}
|
2002-09-25 16:31:40 -04:00
|
|
|
|
1999-10-06 17:58:18 -04:00
|
|
|
/*
|
2000-11-09 06:26:00 -05:00
|
|
|
* XLOG operations
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
2000-11-21 04:39:57 -05:00
|
|
|
SetProcessingMode(NormalProcessing);
|
2001-09-29 00:02:27 -04:00
|
|
|
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
switch (MyAuxProcType)
|
2000-11-21 04:39:57 -05:00
|
|
|
{
|
2007-03-07 08:35:03 -05:00
|
|
|
case CheckerProcess:
|
2010-04-19 21:38:52 -04:00
|
|
|
/* don't set signals, they're useless here */
|
2007-03-07 08:35:03 -05:00
|
|
|
CheckerModeMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
2001-09-29 00:02:27 -04:00
|
|
|
|
2007-03-07 08:35:03 -05:00
|
|
|
case BootstrapProcess:
|
2015-05-23 21:35:49 -04:00
|
|
|
|
Protect against multixact members wraparound
Multixact member files are subject to early wraparound overflow and
removal: if the average multixact size is above a certain threshold (see
note below) the protections against offset overflow are not enough:
during multixact truncation at checkpoint time, some
pg_multixact/members files would be removed because the server considers
them to be old and not needed anymore. This leads to loss of files that
are critical to interpret existing tuples's Xmax values.
To protect against this, since we don't have enough info in pg_control
and we can't modify it in old branches, we maintain shared memory state
about the oldest value that we need to keep; we use this during new
multixact creation to abort if an old still-needed file would get
overwritten. This value is kept up to date by checkpoints, which makes
it not completely accurate but should be good enough. We start emitting
warnings sometime earlier, so that the eventual multixact-shutdown
doesn't take DBAs completely by surprise (more precisely: once 20
members SLRU segments are remaining before shutdown.)
On troublesome average multixact size: The threshold size depends on the
multixact freeze parameters. The oldest age is related to the greater of
multixact_freeze_table_age and multixact_freeze_min_age: anything
older than that should be removed promptly by autovacuum. If autovacuum
is keeping up with multixact freezing, the troublesome multixact average
size is
(2^32-1) / Max(freeze table age, freeze min age)
or around 28 members per multixact. Having an average multixact size
larger than that will eventually cause new multixact data to overwrite
the data area for older multixacts. (If autovacuum is not able to keep
up, or there are errors in vacuuming, the actual maximum is
multixact_freeeze_max_age instead, at which point multixact generation
is stopped completely. The default value for this limit is 400 million,
which means that the multixact size that would cause trouble is about 10
members).
Initial bug report by Timothy Garnett, bug #12990
Backpatch to 9.3, where the problem was introduced.
Authors: Álvaro Herrera, Thomas Munro
Reviews: Thomas Munro, Amit Kapila, Robert Haas, Kevin Grittner
2015-04-28 10:32:53 -04:00
|
|
|
/*
|
|
|
|
|
* There was a brief instant during which mode was Normal; this is
|
|
|
|
|
* okay. We need to be in bootstrap mode during BootStrapXLOG for
|
|
|
|
|
* the sake of multixact initialization.
|
|
|
|
|
*/
|
|
|
|
|
SetProcessingMode(BootstrapProcessing);
|
2004-05-29 18:48:23 -04:00
|
|
|
bootstrap_signals();
|
2001-09-29 00:02:27 -04:00
|
|
|
BootStrapXLOG();
|
2007-03-07 08:35:03 -05:00
|
|
|
BootstrapModeMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
2001-09-29 00:02:27 -04:00
|
|
|
|
2007-03-07 08:35:03 -05:00
|
|
|
case StartupProcess:
|
Start background writer during archive recovery. Background writer now performs
its usual buffer cleaning duties during archive recovery, and it's responsible
for performing restartpoints.
This requires some changes in postmaster. When the startup process has done
all the initialization and is ready to start WAL redo, it signals the
postmaster to launch the background writer. The postmaster is signaled again
when the point in recovery is reached where we know that the database is in
consistent state. Postmaster isn't interested in that at the moment, but
that's the point where we could let other backends in to perform read-only
queries. The postmaster is signaled third time when the recovery has ended,
so that postmaster knows that it's safe to start accepting connections.
The startup process now traps SIGTERM, and performs a "clean" shutdown. If
you do a fast shutdown during recovery, a shutdown restartpoint is performed,
like a shutdown checkpoint, and postmaster kills the processes cleanly. You
still have to continue the recovery at next startup, though.
Currently, the background writer is only launched during archive recovery.
We could launch it during crash recovery as well, but it seems better to keep
that codepath as simple as possible, for the sake of robustness. And it
couldn't do any restartpoints during crash recovery anyway, so it wouldn't be
that useful.
log_restartpoints is gone. Use log_checkpoints instead. This is yet to be
documented.
This whole operation is a pre-requisite for Hot Standby, but has some value of
its own whether the hot standby patch makes 8.4 or not.
Simon Riggs, with lots of modifications by me.
2009-02-18 10:58:41 -05:00
|
|
|
/* don't set signals, startup process has its own agenda */
|
|
|
|
|
StartupProcessMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
2001-09-29 00:02:27 -04:00
|
|
|
|
2007-03-07 08:35:03 -05:00
|
|
|
case BgWriterProcess:
|
2004-05-29 18:48:23 -04:00
|
|
|
/* don't set signals, bgwriter has its own agenda */
|
|
|
|
|
BackgroundWriterMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
2007-07-24 00:54:09 -04:00
|
|
|
|
2011-11-01 13:14:47 -04:00
|
|
|
case CheckpointerProcess:
|
|
|
|
|
/* don't set signals, checkpointer has its own agenda */
|
|
|
|
|
CheckpointerMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
|
|
|
|
|
2007-07-24 00:54:09 -04:00
|
|
|
case WalWriterProcess:
|
|
|
|
|
/* don't set signals, walwriter has its own agenda */
|
|
|
|
|
InitXLOGAccess();
|
|
|
|
|
WalWriterMain();
|
|
|
|
|
proc_exit(1); /* should never return */
|
2007-11-15 16:14:46 -05:00
|
|
|
|
2010-01-15 04:19:10 -05:00
|
|
|
case WalReceiverProcess:
|
|
|
|
|
/* don't set signals, walreceiver has its own agenda */
|
2010-01-20 04:16:24 -05:00
|
|
|
WalReceiverMain();
|
2010-01-15 04:19:10 -05:00
|
|
|
proc_exit(1); /* should never return */
|
|
|
|
|
|
2001-09-29 00:02:27 -04:00
|
|
|
default:
|
Fix management of pendingOpsTable in auxiliary processes.
mdinit() was misusing IsBootstrapProcessingMode() to decide whether to
create an fsync pending-operations table in the current process. This led
to creating a table not only in the startup and checkpointer processes as
intended, but also in the bgwriter process, not to mention other auxiliary
processes such as walwriter and walreceiver. Creation of the table in the
bgwriter is fatal, because it absorbs fsync requests that should have gone
to the checkpointer; instead they just sit in bgwriter local memory and are
never acted on. So writes performed by the bgwriter were not being fsync'd
which could result in data loss after an OS crash. I think there is no
live bug with respect to walwriter and walreceiver because those never
perform any writes of shared buffers; but the potential is there for
future breakage in those processes too.
To fix, make AuxiliaryProcessMain() export the current process's
AuxProcType as a global variable, and then make mdinit() test directly for
the types of aux process that should have a pendingOpsTable. Having done
that, we might as well also get rid of the random bool flags such as
am_walreceiver that some of the aux processes had grown. (Note that we
could not have fixed the bug by examining those variables in mdinit(),
because it's called from BaseInit() which is run by AuxiliaryProcessMain()
before entering any of the process-type-specific code.)
Back-patch to 9.2, where the problem was introduced by the split-up of
bgwriter and checkpointer processes. The bogus pendingOpsTable exists
in walwriter and walreceiver processes in earlier branches, but absent
any evidence that it causes actual problems there, I'll leave the older
branches alone.
2012-07-18 15:28:10 -04:00
|
|
|
elog(PANIC, "unrecognized process type: %d", (int) MyAuxProcType);
|
2004-05-29 18:48:23 -04:00
|
|
|
proc_exit(1);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
2007-03-07 08:35:03 -05:00
|
|
|
}
|
2001-09-29 00:02:27 -04:00
|
|
|
|
2007-03-07 08:35:03 -05:00
|
|
|
/*
|
|
|
|
|
* In shared memory checker mode, all we really want to do is create shared
|
|
|
|
|
* memory and semaphores (just to prove we can do it with the current GUC
|
2010-04-19 21:38:52 -04:00
|
|
|
* settings). Since, in fact, that was already done by BaseInit(),
|
|
|
|
|
* we have nothing more to do here.
|
2007-03-07 08:35:03 -05:00
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
CheckerModeMain(void)
|
|
|
|
|
{
|
|
|
|
|
proc_exit(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The main entry point for running the backend in bootstrap mode
|
|
|
|
|
*
|
|
|
|
|
* The bootstrap mode is used to initialize the template database.
|
|
|
|
|
* The bootstrap backend doesn't speak SQL, but instead expects
|
|
|
|
|
* commands in a special bootstrap language.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
BootstrapModeMain(void)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
Assert(!IsUnderPostmaster);
|
Protect against multixact members wraparound
Multixact member files are subject to early wraparound overflow and
removal: if the average multixact size is above a certain threshold (see
note below) the protections against offset overflow are not enough:
during multixact truncation at checkpoint time, some
pg_multixact/members files would be removed because the server considers
them to be old and not needed anymore. This leads to loss of files that
are critical to interpret existing tuples's Xmax values.
To protect against this, since we don't have enough info in pg_control
and we can't modify it in old branches, we maintain shared memory state
about the oldest value that we need to keep; we use this during new
multixact creation to abort if an old still-needed file would get
overwritten. This value is kept up to date by checkpoints, which makes
it not completely accurate but should be good enough. We start emitting
warnings sometime earlier, so that the eventual multixact-shutdown
doesn't take DBAs completely by surprise (more precisely: once 20
members SLRU segments are remaining before shutdown.)
On troublesome average multixact size: The threshold size depends on the
multixact freeze parameters. The oldest age is related to the greater of
multixact_freeze_table_age and multixact_freeze_min_age: anything
older than that should be removed promptly by autovacuum. If autovacuum
is keeping up with multixact freezing, the troublesome multixact average
size is
(2^32-1) / Max(freeze table age, freeze min age)
or around 28 members per multixact. Having an average multixact size
larger than that will eventually cause new multixact data to overwrite
the data area for older multixacts. (If autovacuum is not able to keep
up, or there are errors in vacuuming, the actual maximum is
multixact_freeeze_max_age instead, at which point multixact generation
is stopped completely. The default value for this limit is 400 million,
which means that the multixact size that would cause trouble is about 10
members).
Initial bug report by Timothy Garnett, bug #12990
Backpatch to 9.3, where the problem was introduced.
Authors: Álvaro Herrera, Thomas Munro
Reviews: Thomas Munro, Amit Kapila, Robert Haas, Kevin Grittner
2015-04-28 10:32:53 -04:00
|
|
|
Assert(IsBootstrapProcessingMode());
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2003-07-14 20:11:14 -04:00
|
|
|
/*
|
2007-03-07 08:35:03 -05:00
|
|
|
* Do backend-like initialization for bootstrap mode
|
2003-07-14 20:11:14 -04:00
|
|
|
*/
|
2007-03-07 08:35:03 -05:00
|
|
|
InitProcess();
|
2010-04-19 21:38:52 -04:00
|
|
|
|
2015-02-02 16:23:59 -05:00
|
|
|
InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL);
|
2003-07-14 20:11:14 -04:00
|
|
|
|
|
|
|
|
/* Initialize stuff for bootstrap-file processing */
|
1997-09-07 01:04:48 -04:00
|
|
|
for (i = 0; i < MAXATTR; i++)
|
|
|
|
|
{
|
2004-01-07 13:56:30 -05:00
|
|
|
attrtypes[i] = NULL;
|
2008-11-01 21:45:28 -04:00
|
|
|
Nulls[i] = false;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
|
2001-03-22 01:16:21 -05:00
|
|
|
/*
|
2003-07-14 20:11:14 -04:00
|
|
|
* Process bootstrap input.
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
2017-04-14 17:51:25 -04:00
|
|
|
StartTransactionCommand();
|
2006-03-06 20:03:12 -05:00
|
|
|
boot_yyparse();
|
2017-04-14 17:51:25 -04:00
|
|
|
CommitTransactionCommand();
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2010-02-07 15:48:13 -05:00
|
|
|
/*
|
2010-02-25 21:01:40 -05:00
|
|
|
* We should now know about all mapped relations, so it's okay to write
|
|
|
|
|
* out the initial relation mapping files.
|
2010-02-07 15:48:13 -05:00
|
|
|
*/
|
|
|
|
|
RelationMapFinishBootstrap();
|
|
|
|
|
|
2003-07-14 20:11:14 -04:00
|
|
|
/* Clean up and exit */
|
1997-09-07 01:04:48 -04:00
|
|
|
cleanup();
|
2007-03-07 08:35:03 -05:00
|
|
|
proc_exit(0);
|
2004-05-29 18:48:23 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
|
* misc functions
|
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up signal handling for a bootstrap process
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
bootstrap_signals(void)
|
|
|
|
|
{
|
2015-01-13 10:44:09 -05:00
|
|
|
Assert(!IsUnderPostmaster);
|
2004-05-29 18:48:23 -04:00
|
|
|
|
2015-01-13 10:44:09 -05:00
|
|
|
/* Set up appropriately for interactive use */
|
|
|
|
|
pqsignal(SIGHUP, die);
|
|
|
|
|
pqsignal(SIGINT, die);
|
|
|
|
|
pqsignal(SIGTERM, die);
|
|
|
|
|
pqsignal(SIGQUIT, die);
|
2004-05-29 18:48:23 -04:00
|
|
|
}
|
|
|
|
|
|
2006-06-08 19:55:48 -04:00
|
|
|
/*
|
2014-05-06 12:12:18 -04:00
|
|
|
* Begin shutdown of an auxiliary process. This is approximately the equivalent
|
2007-03-07 08:35:03 -05:00
|
|
|
* of ShutdownPostgres() in postinit.c. We can't run transactions in an
|
|
|
|
|
* auxiliary process, so most of the work of AbortTransaction() is not needed,
|
2006-06-08 19:55:48 -04:00
|
|
|
* but we do need to make sure we've released any LWLocks we are holding.
|
|
|
|
|
* (This is only critical during an error exit.)
|
|
|
|
|
*/
|
|
|
|
|
static void
|
2007-03-07 08:35:03 -05:00
|
|
|
ShutdownAuxiliaryProcess(int code, Datum arg)
|
2006-06-08 19:55:48 -04:00
|
|
|
{
|
|
|
|
|
LWLockReleaseAll();
|
2016-11-22 14:26:40 -05:00
|
|
|
ConditionVariableCancelSleep();
|
2016-03-10 12:44:09 -05:00
|
|
|
pgstat_report_wait_end();
|
2006-06-08 19:55:48 -04:00
|
|
|
}
|
|
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
/* ----------------------------------------------------------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* MANUAL BACKEND INTERACTIVE INTERFACE COMMANDS
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* boot_openrel
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
boot_openrel(char *relname)
|
|
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
int i;
|
1997-09-07 01:04:48 -04:00
|
|
|
struct typmap **app;
|
1998-08-18 22:04:17 -04:00
|
|
|
Relation rel;
|
|
|
|
|
HeapScanDesc scan;
|
1997-09-07 22:41:22 -04:00
|
|
|
HeapTuple tup;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2005-04-14 16:03:27 -04:00
|
|
|
if (strlen(relname) >= NAMEDATALEN)
|
1998-02-25 23:46:47 -05:00
|
|
|
relname[NAMEDATALEN - 1] = '\0';
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
if (Typ == NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2006-08-15 18:36:17 -04:00
|
|
|
/* We can now load the pg_type data */
|
2005-04-14 16:03:27 -04:00
|
|
|
rel = heap_open(TypeRelationId, NoLock);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 09:47:01 -04:00
|
|
|
scan = heap_beginscan_catalog(rel, 0, NULL);
|
1998-08-18 22:04:17 -04:00
|
|
|
i = 0;
|
2002-05-20 19:51:44 -04:00
|
|
|
while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
1998-08-18 22:04:17 -04:00
|
|
|
++i;
|
|
|
|
|
heap_endscan(scan);
|
1997-09-07 01:04:48 -04:00
|
|
|
app = Typ = ALLOC(struct typmap *, i + 1);
|
|
|
|
|
while (i-- > 0)
|
|
|
|
|
*app++ = ALLOC(struct typmap, 1);
|
2004-01-07 13:56:30 -05:00
|
|
|
*app = NULL;
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 09:47:01 -04:00
|
|
|
scan = heap_beginscan_catalog(rel, 0, NULL);
|
1997-09-07 01:04:48 -04:00
|
|
|
app = Typ;
|
2002-05-20 19:51:44 -04:00
|
|
|
while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2002-07-20 01:16:59 -04:00
|
|
|
(*app)->am_oid = HeapTupleGetOid(tup);
|
2000-06-27 23:33:33 -04:00
|
|
|
memcpy((char *) &(*app)->am_typ,
|
|
|
|
|
(char *) GETSTRUCT(tup),
|
|
|
|
|
sizeof((*app)->am_typ));
|
|
|
|
|
app++;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
1998-08-18 22:04:17 -04:00
|
|
|
heap_endscan(scan);
|
1999-09-18 15:08:25 -04:00
|
|
|
heap_close(rel, NoLock);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
|
2002-04-27 17:24:34 -04:00
|
|
|
if (boot_reldesc != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
closerel(NULL);
|
|
|
|
|
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(DEBUG4, "open relation %s, attrsize %d",
|
2009-01-22 15:16:10 -05:00
|
|
|
relname, (int) ATTRIBUTE_FIXED_PART_SIZE);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2008-09-01 16:42:46 -04:00
|
|
|
boot_reldesc = heap_openrv(makeRangeVar(NULL, relname, -1), NoLock);
|
2016-04-08 14:52:13 -04:00
|
|
|
numattr = boot_reldesc->rd_rel->relnatts;
|
1997-09-07 01:04:48 -04:00
|
|
|
for (i = 0; i < numattr; i++)
|
|
|
|
|
{
|
|
|
|
|
if (attrtypes[i] == NULL)
|
|
|
|
|
attrtypes[i] = AllocateAttribute();
|
|
|
|
|
memmove((char *) attrtypes[i],
|
2017-08-20 14:19:07 -04:00
|
|
|
(char *) TupleDescAttr(boot_reldesc->rd_att, i),
|
2009-01-22 15:16:10 -05:00
|
|
|
ATTRIBUTE_FIXED_PART_SIZE);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
|
|
|
|
{
|
1998-08-31 23:29:17 -04:00
|
|
|
Form_pg_attribute at = attrtypes[i];
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "create attribute %d name %s len %d num %d type %u",
|
2001-05-11 21:48:49 -04:00
|
|
|
i, NameStr(at->attname), at->attlen, at->attnum,
|
Commit to match discussed elog() changes. Only update is that LOG is
now just below FATAL in server_min_messages. Added more text to
highlight ordering difference between it and client_min_messages.
---------------------------------------------------------------------------
REALLYFATAL => PANIC
STOP => PANIC
New INFO level the prints to client by default
New LOG level the prints to server log by default
Cause VACUUM information to print only to the client
NOTICE => INFO where purely information messages are sent
DEBUG => LOG for purely server status messages
DEBUG removed, kept as backward compatible
DEBUG5, DEBUG4, DEBUG3, DEBUG2, DEBUG1 added
DebugLvl removed in favor of new DEBUG[1-5] symbols
New server_min_messages GUC parameter with values:
DEBUG[5-1], INFO, NOTICE, ERROR, LOG, FATAL, PANIC
New client_min_messages GUC parameter with values:
DEBUG[5-1], LOG, INFO, NOTICE, ERROR, FATAL, PANIC
Server startup now logged with LOG instead of DEBUG
Remove debug_level GUC parameter
elog() numbers now start at 10
Add test to print error message if older elog() values are passed to elog()
Bootstrap mode now has a -d that requires an argument, like postmaster
2002-03-02 16:39:36 -05:00
|
|
|
at->atttypid);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* closerel
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
closerel(char *name)
|
|
|
|
|
{
|
1997-09-07 01:04:48 -04:00
|
|
|
if (name)
|
|
|
|
|
{
|
2002-04-27 17:24:34 -04:00
|
|
|
if (boot_reldesc)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2002-04-27 17:24:34 -04:00
|
|
|
if (strcmp(RelationGetRelationName(boot_reldesc), name) != 0)
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(ERROR, "close of %s when %s was expected",
|
2005-04-14 16:03:27 -04:00
|
|
|
name, RelationGetRelationName(boot_reldesc));
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
else
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(ERROR, "close of %s before any relation was opened",
|
1997-09-07 01:04:48 -04:00
|
|
|
name);
|
|
|
|
|
}
|
|
|
|
|
|
2002-04-27 17:24:34 -04:00
|
|
|
if (boot_reldesc == NULL)
|
2001-05-11 21:48:49 -04:00
|
|
|
elog(ERROR, "no open relation to close");
|
1997-09-07 01:04:48 -04:00
|
|
|
else
|
|
|
|
|
{
|
2005-04-14 16:03:27 -04:00
|
|
|
elog(DEBUG4, "close relation %s",
|
|
|
|
|
RelationGetRelationName(boot_reldesc));
|
2002-04-27 17:24:34 -04:00
|
|
|
heap_close(boot_reldesc, NoLock);
|
2004-01-07 13:56:30 -05:00
|
|
|
boot_reldesc = NULL;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
|
|
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
/* ----------------
|
|
|
|
|
* DEFINEATTR()
|
|
|
|
|
*
|
|
|
|
|
* define a <field,type> pair
|
|
|
|
|
* if there are n fields in a relation to be created, this routine
|
|
|
|
|
* will be called n times
|
|
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
2015-02-21 16:25:49 -05:00
|
|
|
DefineAttr(char *name, char *type, int attnum, int nullness)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
1998-08-24 15:04:04 -04:00
|
|
|
Oid typeoid;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2002-04-27 17:24:34 -04:00
|
|
|
if (boot_reldesc != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(WARNING, "no open relations allowed with CREATE command");
|
2005-04-14 16:03:27 -04:00
|
|
|
closerel(NULL);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
if (attrtypes[attnum] == NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
attrtypes[attnum] = AllocateAttribute();
|
2009-01-22 15:16:10 -05:00
|
|
|
MemSet(attrtypes[attnum], 0, ATTRIBUTE_FIXED_PART_SIZE);
|
2002-08-02 14:15:10 -04:00
|
|
|
|
|
|
|
|
namestrcpy(&attrtypes[attnum]->attname, name);
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "column %s %s", NameStr(attrtypes[attnum]->attname), type);
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:18:54 -04:00
|
|
|
attrtypes[attnum]->attnum = attnum + 1; /* fillatt */
|
2002-08-02 14:15:10 -04:00
|
|
|
|
|
|
|
|
typeoid = gettype(type);
|
|
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
if (Typ != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
|
|
|
|
attrtypes[attnum]->atttypid = Ap->am_oid;
|
2005-03-28 19:17:27 -05:00
|
|
|
attrtypes[attnum]->attlen = Ap->am_typ.typlen;
|
1997-09-07 01:04:48 -04:00
|
|
|
attrtypes[attnum]->attbyval = Ap->am_typ.typbyval;
|
2002-04-24 22:56:56 -04:00
|
|
|
attrtypes[attnum]->attstorage = Ap->am_typ.typstorage;
|
1998-08-24 15:04:04 -04:00
|
|
|
attrtypes[attnum]->attalign = Ap->am_typ.typalign;
|
2011-02-08 16:04:18 -05:00
|
|
|
attrtypes[attnum]->attcollation = Ap->am_typ.typcollation;
|
2004-01-06 18:55:19 -05:00
|
|
|
/* if an array type, assume 1-dimensional attribute */
|
|
|
|
|
if (Ap->am_typ.typelem != InvalidOid && Ap->am_typ.typlen < 0)
|
|
|
|
|
attrtypes[attnum]->attndims = 1;
|
|
|
|
|
else
|
|
|
|
|
attrtypes[attnum]->attndims = 0;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2004-04-01 16:28:47 -05:00
|
|
|
attrtypes[attnum]->atttypid = TypInfo[typeoid].oid;
|
2005-03-28 19:17:27 -05:00
|
|
|
attrtypes[attnum]->attlen = TypInfo[typeoid].len;
|
2004-04-01 16:28:47 -05:00
|
|
|
attrtypes[attnum]->attbyval = TypInfo[typeoid].byval;
|
|
|
|
|
attrtypes[attnum]->attstorage = TypInfo[typeoid].storage;
|
|
|
|
|
attrtypes[attnum]->attalign = TypInfo[typeoid].align;
|
2011-02-08 16:04:18 -05:00
|
|
|
attrtypes[attnum]->attcollation = TypInfo[typeoid].collation;
|
2004-01-06 18:55:19 -05:00
|
|
|
/* if an array type, assume 1-dimensional attribute */
|
2005-03-28 19:17:27 -05:00
|
|
|
if (TypInfo[typeoid].elem != InvalidOid &&
|
|
|
|
|
attrtypes[attnum]->attlen < 0)
|
2004-01-06 18:55:19 -05:00
|
|
|
attrtypes[attnum]->attndims = 1;
|
|
|
|
|
else
|
|
|
|
|
attrtypes[attnum]->attndims = 0;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
2004-01-06 18:55:19 -05:00
|
|
|
|
|
|
|
|
attrtypes[attnum]->attstattarget = -1;
|
1998-02-06 14:18:06 -05:00
|
|
|
attrtypes[attnum]->attcacheoff = -1;
|
1998-02-07 01:11:56 -05:00
|
|
|
attrtypes[attnum]->atttypmod = -1;
|
2002-09-22 15:42:52 -04:00
|
|
|
attrtypes[attnum]->attislocal = true;
|
2002-09-04 16:31:48 -04:00
|
|
|
|
2015-02-21 16:25:49 -05:00
|
|
|
if (nullness == BOOTCOL_NULL_FORCE_NOT_NULL)
|
|
|
|
|
{
|
|
|
|
|
attrtypes[attnum]->attnotnull = true;
|
|
|
|
|
}
|
|
|
|
|
else if (nullness == BOOTCOL_NULL_FORCE_NULL)
|
|
|
|
|
{
|
|
|
|
|
attrtypes[attnum]->attnotnull = false;
|
|
|
|
|
}
|
|
|
|
|
else
|
2002-08-02 14:15:10 -04:00
|
|
|
{
|
2015-02-21 16:25:49 -05:00
|
|
|
Assert(nullness == BOOTCOL_NULL_AUTO);
|
2002-08-02 14:15:10 -04:00
|
|
|
|
2015-02-21 16:25:49 -05:00
|
|
|
/*
|
|
|
|
|
* Mark as "not null" if type is fixed-width and prior columns are
|
|
|
|
|
* too. This corresponds to case where column can be accessed
|
|
|
|
|
* directly via C struct declaration.
|
|
|
|
|
*
|
|
|
|
|
* oidvector and int2vector are also treated as not-nullable, even
|
|
|
|
|
* though they are no longer fixed-width.
|
|
|
|
|
*/
|
|
|
|
|
#define MARKNOTNULL(att) \
|
|
|
|
|
((att)->attlen > 0 || \
|
|
|
|
|
(att)->atttypid == OIDVECTOROID || \
|
|
|
|
|
(att)->atttypid == INT2VECTOROID)
|
|
|
|
|
|
|
|
|
|
if (MARKNOTNULL(attrtypes[attnum]))
|
2002-08-02 14:15:10 -04:00
|
|
|
{
|
2015-02-21 16:25:49 -05:00
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
/* check earlier attributes */
|
|
|
|
|
for (i = 0; i < attnum; i++)
|
|
|
|
|
{
|
|
|
|
|
if (!attrtypes[i]->attnotnull)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (i == attnum)
|
|
|
|
|
attrtypes[attnum]->attnotnull = true;
|
2002-08-02 14:15:10 -04:00
|
|
|
}
|
|
|
|
|
}
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* InsertOneTuple
|
2001-08-10 14:57:42 -04:00
|
|
|
*
|
|
|
|
|
* If objectid is not zero, it is a specific OID to assign to the tuple.
|
|
|
|
|
* Otherwise, an OID will be assigned (if necessary) by heap_insert.
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
InsertOneTuple(Oid objectid)
|
|
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
HeapTuple tuple;
|
|
|
|
|
TupleDesc tupDesc;
|
|
|
|
|
int i;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "inserting row oid %u, %d columns", objectid, numattr);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2002-09-01 21:05:06 -04:00
|
|
|
tupDesc = CreateTupleDesc(numattr,
|
|
|
|
|
RelationGetForm(boot_reldesc)->relhasoids,
|
|
|
|
|
attrtypes);
|
2008-11-01 21:45:28 -04:00
|
|
|
tuple = heap_form_tuple(tupDesc, values, Nulls);
|
1997-09-07 01:04:48 -04:00
|
|
|
if (objectid != (Oid) 0)
|
2002-07-20 01:16:59 -04:00
|
|
|
HeapTupleSetOid(tuple, objectid);
|
|
|
|
|
pfree(tupDesc); /* just free's tupDesc, not the attrtypes */
|
2002-09-01 21:05:06 -04:00
|
|
|
|
2002-05-21 18:05:55 -04:00
|
|
|
simple_heap_insert(boot_reldesc, tuple);
|
1999-12-16 17:20:03 -05:00
|
|
|
heap_freetuple(tuple);
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "row inserted");
|
1997-09-07 01:04:48 -04:00
|
|
|
|
|
|
|
|
/*
|
2008-11-01 21:45:28 -04:00
|
|
|
* Reset null markers for next tuple
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
|
|
|
|
for (i = 0; i < numattr; i++)
|
2008-11-01 21:45:28 -04:00
|
|
|
Nulls[i] = false;
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* InsertOneValue
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
2001-08-10 14:57:42 -04:00
|
|
|
InsertOneValue(char *value, int i)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
2004-06-05 20:41:28 -04:00
|
|
|
Oid typoid;
|
2006-08-15 18:36:17 -04:00
|
|
|
int16 typlen;
|
|
|
|
|
bool typbyval;
|
|
|
|
|
char typalign;
|
|
|
|
|
char typdelim;
|
2004-06-05 20:41:28 -04:00
|
|
|
Oid typioparam;
|
|
|
|
|
Oid typinput;
|
|
|
|
|
Oid typoutput;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2011-09-06 16:05:02 -04:00
|
|
|
AssertArg(i >= 0 && i < MAXATTR);
|
2001-05-11 21:48:49 -04:00
|
|
|
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(DEBUG4, "inserting column %d value \"%s\"", i, value);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2017-08-20 14:19:07 -04:00
|
|
|
typoid = TupleDescAttr(boot_reldesc->rd_att, i)->atttypid;
|
2004-06-05 20:41:28 -04:00
|
|
|
|
2006-08-15 18:36:17 -04:00
|
|
|
boot_get_type_io_data(typoid,
|
|
|
|
|
&typlen, &typbyval, &typalign,
|
|
|
|
|
&typdelim, &typioparam,
|
|
|
|
|
&typinput, &typoutput);
|
|
|
|
|
|
2006-04-04 15:35:37 -04:00
|
|
|
values[i] = OidInputFunctionCall(typinput, value, typioparam, -1);
|
Prevent memory leaks from accumulating across printtup() calls.
Historically, printtup() has assumed that it could prevent memory leakage
by pfree'ing the string result of each output function and manually
managing detoasting of toasted values. This amounts to assuming that
datatype output functions never leak any memory internally; an assumption
we've already decided to be bogus elsewhere, for example in COPY OUT.
range_out in particular is known to leak multiple kilobytes per call, as
noted in bug #8573 from Godfried Vanluffelen. While we could go in and fix
that leak, it wouldn't be very notationally convenient, and in any case
there have been and undoubtedly will again be other leaks in other output
functions. So what seems like the best solution is to run the output
functions in a temporary memory context that can be reset after each row,
as we're doing in COPY OUT. Some quick experimentation suggests this is
actually a tad faster than the retail pfree's anyway.
This patch fixes all the variants of printtup, except for debugtup()
which is used in standalone mode. It doesn't seem worth worrying
about query-lifespan leaks in standalone mode, and fixing that case
would be a bit tedious since debugtup() doesn't currently have any
startup or shutdown functions.
While at it, remove manual detoast management from several other
output-function call sites that had copied it from printtup(). This
doesn't make a lot of difference right now, but in view of recent
discussions about supporting "non-flattened" Datums, we're going to
want that code gone eventually anyway.
Back-patch to 9.2 where range_out was introduced. We might eventually
decide to back-patch this further, but in the absence of known major
leaks in older output functions, I'll refrain for now.
2013-11-03 11:33:05 -05:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We use ereport not elog here so that parameters aren't evaluated unless
|
|
|
|
|
* the message is going to be printed, which generally it isn't
|
|
|
|
|
*/
|
|
|
|
|
ereport(DEBUG4,
|
|
|
|
|
(errmsg_internal("inserted -> %s",
|
|
|
|
|
OidOutputFunctionCall(typoutput, values[i]))));
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* InsertOneNull
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
InsertOneNull(int i)
|
|
|
|
|
{
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "inserting column %d NULL", i);
|
2011-09-06 16:05:02 -04:00
|
|
|
Assert(i >= 0 && i < MAXATTR);
|
2017-08-20 14:19:07 -04:00
|
|
|
if (TupleDescAttr(boot_reldesc->rd_att, i)->attnotnull)
|
2017-06-13 10:54:39 -04:00
|
|
|
elog(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 15:35:54 -04:00
|
|
|
"NULL value specified for not-null column \"%s\" of relation \"%s\"",
|
2017-08-20 14:19:07 -04:00
|
|
|
NameStr(TupleDescAttr(boot_reldesc->rd_att, i)->attname),
|
2017-06-13 10:54:39 -04:00
|
|
|
RelationGetRelationName(boot_reldesc));
|
2000-05-30 00:25:00 -04:00
|
|
|
values[i] = PointerGetDatum(NULL);
|
2008-11-01 21:45:28 -04:00
|
|
|
Nulls[i] = true;
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* cleanup
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
1997-08-19 17:40:56 -04:00
|
|
|
static void
|
2003-05-28 14:19:09 -04:00
|
|
|
cleanup(void)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
2002-08-02 18:36:05 -04:00
|
|
|
if (boot_reldesc != NULL)
|
|
|
|
|
closerel(NULL);
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* gettype
|
2002-04-24 22:56:56 -04:00
|
|
|
*
|
2004-04-01 16:28:47 -05:00
|
|
|
* NB: this is really ugly; it will return an integer index into TypInfo[],
|
2002-04-24 22:56:56 -04:00
|
|
|
* and not an OID at all, until the first reference to a type not known in
|
2004-04-01 16:28:47 -05:00
|
|
|
* TypInfo[]. At that point it will read and cache pg_type in the Typ array,
|
2002-04-24 22:56:56 -04:00
|
|
|
* and subsequently return a real OID (and set the global pointer Ap to
|
2014-05-06 12:12:18 -04:00
|
|
|
* point at the found row in Typ). So caller must check whether Typ is
|
2002-04-24 22:56:56 -04:00
|
|
|
* still NULL to determine what the return value is!
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
1998-08-24 15:04:04 -04:00
|
|
|
static Oid
|
1996-07-09 02:22:35 -04:00
|
|
|
gettype(char *type)
|
|
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
int i;
|
1998-08-18 22:04:17 -04:00
|
|
|
Relation rel;
|
|
|
|
|
HeapScanDesc scan;
|
1997-09-07 22:41:22 -04:00
|
|
|
HeapTuple tup;
|
1997-09-07 01:04:48 -04:00
|
|
|
struct typmap **app;
|
|
|
|
|
|
2004-01-07 13:56:30 -05:00
|
|
|
if (Typ != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2004-01-07 13:56:30 -05:00
|
|
|
for (app = Typ; *app != NULL; app++)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
1999-11-07 18:08:36 -05:00
|
|
|
if (strncmp(NameStr((*app)->am_typ.typname), type, NAMEDATALEN) == 0)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
|
|
|
|
Ap = *app;
|
1998-08-31 23:29:17 -04:00
|
|
|
return (*app)->am_oid;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2002-04-24 22:56:56 -04:00
|
|
|
for (i = 0; i < n_types; i++)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2004-04-01 16:28:47 -05:00
|
|
|
if (strncmp(type, TypInfo[i].name, NAMEDATALEN) == 0)
|
1998-08-31 23:29:17 -04:00
|
|
|
return i;
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
2003-05-27 13:49:47 -04:00
|
|
|
elog(DEBUG4, "external type: %s", type);
|
2005-04-14 16:03:27 -04:00
|
|
|
rel = heap_open(TypeRelationId, NoLock);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 09:47:01 -04:00
|
|
|
scan = heap_beginscan_catalog(rel, 0, NULL);
|
1997-09-07 01:04:48 -04:00
|
|
|
i = 0;
|
2002-05-20 19:51:44 -04:00
|
|
|
while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
++i;
|
1998-08-18 22:04:17 -04:00
|
|
|
heap_endscan(scan);
|
1997-09-07 01:04:48 -04:00
|
|
|
app = Typ = ALLOC(struct typmap *, i + 1);
|
|
|
|
|
while (i-- > 0)
|
|
|
|
|
*app++ = ALLOC(struct typmap, 1);
|
2004-01-07 13:56:30 -05:00
|
|
|
*app = NULL;
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 09:47:01 -04:00
|
|
|
scan = heap_beginscan_catalog(rel, 0, NULL);
|
1997-09-07 01:04:48 -04:00
|
|
|
app = Typ;
|
2002-05-20 19:51:44 -04:00
|
|
|
while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2002-07-20 01:16:59 -04:00
|
|
|
(*app)->am_oid = HeapTupleGetOid(tup);
|
1997-09-07 01:04:48 -04:00
|
|
|
memmove((char *) &(*app++)->am_typ,
|
|
|
|
|
(char *) GETSTRUCT(tup),
|
|
|
|
|
sizeof((*app)->am_typ));
|
|
|
|
|
}
|
1998-08-18 22:04:17 -04:00
|
|
|
heap_endscan(scan);
|
1999-09-18 15:08:25 -04:00
|
|
|
heap_close(rel, NoLock);
|
1998-08-31 23:29:17 -04:00
|
|
|
return gettype(type);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
2003-07-22 19:30:39 -04:00
|
|
|
elog(ERROR, "unrecognized type \"%s\"", type);
|
1997-09-07 01:04:48 -04:00
|
|
|
/* not reached, here to make compiler happy */
|
|
|
|
|
return 0;
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
2006-08-15 18:36:17 -04:00
|
|
|
/* ----------------
|
|
|
|
|
* boot_get_type_io_data
|
|
|
|
|
*
|
|
|
|
|
* Obtain type I/O information at bootstrap time. This intentionally has
|
|
|
|
|
* almost the same API as lsyscache.c's get_type_io_data, except that
|
|
|
|
|
* we only support obtaining the typinput and typoutput routines, not
|
|
|
|
|
* the binary I/O routines. It is exported so that array_in and array_out
|
|
|
|
|
* can be made to work during early bootstrap.
|
|
|
|
|
* ----------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
boot_get_type_io_data(Oid typid,
|
|
|
|
|
int16 *typlen,
|
|
|
|
|
bool *typbyval,
|
|
|
|
|
char *typalign,
|
|
|
|
|
char *typdelim,
|
|
|
|
|
Oid *typioparam,
|
|
|
|
|
Oid *typinput,
|
|
|
|
|
Oid *typoutput)
|
|
|
|
|
{
|
|
|
|
|
if (Typ != NULL)
|
|
|
|
|
{
|
|
|
|
|
/* We have the boot-time contents of pg_type, so use it */
|
|
|
|
|
struct typmap **app;
|
|
|
|
|
struct typmap *ap;
|
|
|
|
|
|
|
|
|
|
app = Typ;
|
|
|
|
|
while (*app && (*app)->am_oid != typid)
|
|
|
|
|
++app;
|
|
|
|
|
ap = *app;
|
|
|
|
|
if (ap == NULL)
|
|
|
|
|
elog(ERROR, "type OID %u not found in Typ list", typid);
|
|
|
|
|
|
|
|
|
|
*typlen = ap->am_typ.typlen;
|
|
|
|
|
*typbyval = ap->am_typ.typbyval;
|
|
|
|
|
*typalign = ap->am_typ.typalign;
|
|
|
|
|
*typdelim = ap->am_typ.typdelim;
|
|
|
|
|
|
|
|
|
|
/* XXX this logic must match getTypeIOParam() */
|
|
|
|
|
if (OidIsValid(ap->am_typ.typelem))
|
|
|
|
|
*typioparam = ap->am_typ.typelem;
|
|
|
|
|
else
|
|
|
|
|
*typioparam = typid;
|
|
|
|
|
|
|
|
|
|
*typinput = ap->am_typ.typinput;
|
|
|
|
|
*typoutput = ap->am_typ.typoutput;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/* We don't have pg_type yet, so use the hard-wired TypInfo array */
|
|
|
|
|
int typeindex;
|
|
|
|
|
|
|
|
|
|
for (typeindex = 0; typeindex < n_types; typeindex++)
|
|
|
|
|
{
|
|
|
|
|
if (TypInfo[typeindex].oid == typid)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (typeindex >= n_types)
|
|
|
|
|
elog(ERROR, "type OID %u not found in TypInfo", typid);
|
|
|
|
|
|
|
|
|
|
*typlen = TypInfo[typeindex].len;
|
|
|
|
|
*typbyval = TypInfo[typeindex].byval;
|
|
|
|
|
*typalign = TypInfo[typeindex].align;
|
|
|
|
|
/* We assume typdelim is ',' for all boot-time types */
|
|
|
|
|
*typdelim = ',';
|
|
|
|
|
|
|
|
|
|
/* XXX this logic must match getTypeIOParam() */
|
|
|
|
|
if (OidIsValid(TypInfo[typeindex].elem))
|
|
|
|
|
*typioparam = TypInfo[typeindex].elem;
|
|
|
|
|
else
|
|
|
|
|
*typioparam = typid;
|
|
|
|
|
|
|
|
|
|
*typinput = TypInfo[typeindex].inproc;
|
|
|
|
|
*typoutput = TypInfo[typeindex].outproc;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
1996-07-09 02:22:35 -04:00
|
|
|
/* ----------------
|
1997-09-07 01:04:48 -04:00
|
|
|
* AllocateAttribute
|
2009-01-22 15:16:10 -05:00
|
|
|
*
|
|
|
|
|
* Note: bootstrap never sets any per-column ACLs, so we only need
|
|
|
|
|
* ATTRIBUTE_FIXED_PART_SIZE space per attribute.
|
1996-07-09 02:22:35 -04:00
|
|
|
* ----------------
|
|
|
|
|
*/
|
2002-08-02 14:15:10 -04:00
|
|
|
static Form_pg_attribute
|
|
|
|
|
AllocateAttribute(void)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
2016-08-30 18:22:43 -04:00
|
|
|
return (Form_pg_attribute)
|
|
|
|
|
MemoryContextAllocZero(TopMemoryContext, ATTRIBUTE_FIXED_PART_SIZE);
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
2014-12-16 15:35:33 -05:00
|
|
|
/*
|
1997-09-07 01:04:48 -04:00
|
|
|
* MapArrayTypeName
|
|
|
|
|
*
|
2014-12-16 15:35:33 -05:00
|
|
|
* Given a type name, produce the corresponding array type name by prepending
|
|
|
|
|
* '_' and truncating as needed to fit in NAMEDATALEN-1 bytes. This is only
|
|
|
|
|
* used in bootstrap mode, so we can get away with assuming that the input is
|
|
|
|
|
* ASCII and we don't need multibyte-aware truncation.
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
2014-12-16 15:35:33 -05:00
|
|
|
* The given string normally ends with '[]' or '[digits]'; we discard that.
|
|
|
|
|
*
|
|
|
|
|
* The result is a palloc'd string.
|
1996-07-09 02:22:35 -04:00
|
|
|
*/
|
1998-02-25 23:46:47 -05:00
|
|
|
char *
|
2014-12-16 15:35:33 -05:00
|
|
|
MapArrayTypeName(const char *s)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
int i,
|
|
|
|
|
j;
|
2014-12-16 15:35:33 -05:00
|
|
|
char newStr[NAMEDATALEN];
|
1996-07-09 02:22:35 -04:00
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
newStr[0] = '_';
|
2014-12-16 15:35:33 -05:00
|
|
|
j = 1;
|
|
|
|
|
for (i = 0; i < NAMEDATALEN - 2 && s[i] != '['; i++, j++)
|
1997-09-07 01:04:48 -04:00
|
|
|
newStr[j] = s[i];
|
1996-07-09 02:22:35 -04:00
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
newStr[j] = '\0';
|
|
|
|
|
|
2014-12-16 15:35:33 -05:00
|
|
|
return pstrdup(newStr);
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
1997-09-07 01:04:48 -04:00
|
|
|
* index_register() -- record an index that has been set up for building
|
|
|
|
|
* later.
|
1996-07-09 02:22:35 -04:00
|
|
|
*
|
2006-05-10 19:18:39 -04:00
|
|
|
* At bootstrap time, we define a bunch of indexes on system catalogs.
|
|
|
|
|
* We postpone actually building the indexes until just before we're
|
2014-05-06 12:12:18 -04:00
|
|
|
* finished with initialization, however. This is because the indexes
|
2006-05-10 19:18:39 -04:00
|
|
|
* themselves have catalog entries, and those have to be included in the
|
2014-05-06 12:12:18 -04:00
|
|
|
* indexes on those catalogs. Doing it in two phases is the simplest
|
2006-05-10 19:18:39 -04:00
|
|
|
* way of making sure the indexes have the right contents at the end.
|
1996-07-09 02:22:35 -04:00
|
|
|
*/
|
|
|
|
|
void
|
2002-03-26 14:17:02 -05:00
|
|
|
index_register(Oid heap,
|
|
|
|
|
Oid ind,
|
2000-07-14 18:18:02 -04:00
|
|
|
IndexInfo *indexInfo)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
1997-09-07 22:41:22 -04:00
|
|
|
IndexList *newind;
|
|
|
|
|
MemoryContext oldcxt;
|
1997-09-07 01:04:48 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* XXX mao 10/31/92 -- don't gc index reldescs, associated info at
|
2014-05-06 12:12:18 -04:00
|
|
|
* bootstrap time. we'll declare the indexes now, but want to create them
|
2005-10-14 22:49:52 -04:00
|
|
|
* later.
|
1997-09-07 01:04:48 -04:00
|
|
|
*/
|
|
|
|
|
|
2000-06-27 23:33:33 -04:00
|
|
|
if (nogc == NULL)
|
2004-01-07 13:56:30 -05:00
|
|
|
nogc = AllocSetContextCreate(NULL,
|
2000-06-27 23:33:33 -04:00
|
|
|
"BootstrapNoGC",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 17:50:38 -04:00
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2000-06-27 23:33:33 -04:00
|
|
|
oldcxt = MemoryContextSwitchTo(nogc);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
|
|
|
|
newind = (IndexList *) palloc(sizeof(IndexList));
|
2002-03-26 14:17:02 -05:00
|
|
|
newind->il_heap = heap;
|
|
|
|
|
newind->il_ind = ind;
|
2000-07-14 18:18:02 -04:00
|
|
|
newind->il_info = (IndexInfo *) palloc(sizeof(IndexInfo));
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2000-07-14 18:18:02 -04:00
|
|
|
memcpy(newind->il_info, indexInfo, sizeof(IndexInfo));
|
2003-05-28 12:04:02 -04:00
|
|
|
/* expressions will likely be null, but may as well copy it */
|
2017-03-09 15:18:59 -05:00
|
|
|
newind->il_info->ii_Expressions =
|
2003-05-28 12:04:02 -04:00
|
|
|
copyObject(indexInfo->ii_Expressions);
|
|
|
|
|
newind->il_info->ii_ExpressionsState = NIL;
|
2001-07-16 01:07:00 -04:00
|
|
|
/* predicate will likely be null, but may as well copy it */
|
2017-03-09 15:18:59 -05:00
|
|
|
newind->il_info->ii_Predicate =
|
2001-07-16 01:07:00 -04:00
|
|
|
copyObject(indexInfo->ii_Predicate);
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 18:45:36 -04:00
|
|
|
newind->il_info->ii_PredicateState = NULL;
|
2009-12-07 00:22:23 -05:00
|
|
|
/* no exclusion constraints at bootstrap time, so no need to copy */
|
|
|
|
|
Assert(indexInfo->ii_ExclusionOps == NULL);
|
|
|
|
|
Assert(indexInfo->ii_ExclusionProcs == NULL);
|
|
|
|
|
Assert(indexInfo->ii_ExclusionStrats == NULL);
|
2000-06-17 19:41:51 -04:00
|
|
|
|
1997-09-07 01:04:48 -04:00
|
|
|
newind->il_next = ILHead;
|
|
|
|
|
ILHead = newind;
|
|
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|
|
|
|
|
|
2006-05-10 19:18:39 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* build_indices -- fill in all the indexes registered earlier
|
|
|
|
|
*/
|
1996-07-09 02:22:35 -04:00
|
|
|
void
|
2004-01-07 13:56:30 -05:00
|
|
|
build_indices(void)
|
1996-07-09 02:22:35 -04:00
|
|
|
{
|
2004-01-07 13:56:30 -05:00
|
|
|
for (; ILHead != NULL; ILHead = ILHead->il_next)
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2000-07-14 18:18:02 -04:00
|
|
|
Relation heap;
|
|
|
|
|
Relation ind;
|
|
|
|
|
|
2006-07-31 16:09:10 -04:00
|
|
|
/* need not bother with locks during bootstrap */
|
2002-03-26 14:17:02 -05:00
|
|
|
heap = heap_open(ILHead->il_heap, NoLock);
|
2006-07-31 16:09:10 -04:00
|
|
|
ind = index_open(ILHead->il_ind, NoLock);
|
2000-04-12 13:17:23 -04:00
|
|
|
|
Support parallel btree index builds.
To make this work, tuplesort.c and logtape.c must also support
parallelism, so this patch adds that infrastructure and then applies
it to the particular case of parallel btree index builds. Testing
to date shows that this can often be 2-3x faster than a serial
index build.
The model for deciding how many workers to use is fairly primitive
at present, but it's better than not having the feature. We can
refine it as we get more experience.
Peter Geoghegan with some help from Rushabh Lathia. While Heikki
Linnakangas is not an author of this patch, he wrote other patches
without which this feature would not have been possible, and
therefore the release notes should possibly credit him as an author
of this feature. Reviewed by Claudio Freire, Heikki Linnakangas,
Thomas Munro, Tels, Amit Kapila, me.
Discussion: http://postgr.es/m/CAM3SWZQKM=Pzc=CAHzRixKjp2eO5Q0Jg1SoFQqeXFQ647JiwqQ@mail.gmail.com
Discussion: http://postgr.es/m/CAH2-Wz=AxWqDoVvGU7dq856S4r6sJAj6DBn7VMtigkB33N5eyg@mail.gmail.com
2018-02-02 13:25:55 -05:00
|
|
|
index_build(heap, ind, ILHead->il_info, false, false, false);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
2006-07-31 16:09:10 -04:00
|
|
|
index_close(ind, NoLock);
|
2006-05-10 19:18:39 -04:00
|
|
|
heap_close(heap, NoLock);
|
1997-09-07 01:04:48 -04:00
|
|
|
}
|
1996-07-09 02:22:35 -04:00
|
|
|
}
|