Add COPY (on_error set_null) option

If ON_ERROR SET_NULL is specified during COPY FROM, any data type
conversion errors will result in the affected column being set to a
null value.  A column's not-null constraints are still enforced, and
attempting to set a null value in such columns will raise a constraint
violation error.  This applies to a column whose data type is a domain
with a NOT NULL constraint.

Author: Jian He <jian.universality@gmail.com>
Author: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>
Reviewed-by: Jim Jones <jim.jones@uni-muenster.de>
Reviewed-by: "David G. Johnston" <david.g.johnston@gmail.com>
Reviewed-by: Yugo NAGATA <nagata@sraoss.co.jp>
Reviewed-by: torikoshia <torikoshia@oss.nttdata.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Atsushi Torikoshi <torikoshia@oss.nttdata.com>
Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://www.postgresql.org/message-id/flat/CAKFQuwawy1e6YR4S%3Dj%2By7pXqg_Dw1WBVrgvf%3DBP3d1_aSfe_%2BQ%40mail.gmail.com
This commit is contained in:
Peter Eisentraut 2026-03-03 07:23:38 +01:00
parent a1bd0c1615
commit 2a525cc97e
10 changed files with 253 additions and 41 deletions

View file

@ -6266,8 +6266,8 @@ FROM pg_stat_get_backend_idset() AS backendid;
</para>
<para>
Number of tuples skipped because they contain malformed data.
This counter only advances when a value other than
<literal>stop</literal> is specified to the <literal>ON_ERROR</literal>
This counter only advances when
<literal>ignore</literal> is specified to the <literal>ON_ERROR</literal>
option.
</para></entry>
</row>

View file

@ -413,22 +413,33 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
input value into its data type.
An <replaceable class="parameter">error_action</replaceable> value of
<literal>stop</literal> means fail the command, while
<literal>ignore</literal> means discard the input row and continue with the next one.
<literal>ignore</literal> means discard the input row and continue with the next one,
and <literal>set_null</literal> means replace the field containing the invalid
input value with a null value and continue to the next field.
The default is <literal>stop</literal>.
</para>
<para>
The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
The <literal>ignore</literal> and <literal>set_null</literal>
options are applicable only for <command>COPY FROM</command>
when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
</para>
<para>
A <literal>NOTICE</literal> message containing the ignored row count is
emitted at the end of the <command>COPY FROM</command> if at least one
row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
<literal>verbose</literal>, a <literal>NOTICE</literal> message
If <literal>ON_ERROR</literal> is set to <literal>ignore</literal> or
<literal>set_null</literal>, a <literal>NOTICE</literal> message is emitted at the end of the
<command>COPY FROM</command> command containing the count of rows that were ignored or
changed, if at least one row was affected.
</para>
<para>
When <literal>LOG_VERBOSITY</literal> option is set to <literal>verbose</literal>,
for <literal>ignore</literal> option, a <literal>NOTICE</literal> message
containing the line of the input file and the column name whose input
conversion has failed is emitted for each discarded row.
conversion has failed is emitted for each discarded row;
for <literal>set_null</literal> option, a <literal>NOTICE</literal>
message containing the line of the input file and the column name where
value was replaced with <literal>NULL</literal> for each input conversion
failure.
When it is set to <literal>silent</literal>, no message is emitted
regarding ignored rows.
regarding input conversion failed rows.
</para>
</listitem>
</varlistentry>
@ -476,7 +487,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</para>
<para>
This is currently used in <command>COPY FROM</command> command when
<literal>ON_ERROR</literal> option is set to <literal>ignore</literal>.
<literal>ON_ERROR</literal> option is set to <literal>ignore</literal>
or <literal>set_null</literal>.
</para>
</listitem>
</varlistentry>

View file

@ -468,13 +468,12 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
errmsg("COPY %s cannot be used with %s", "ON_ERROR", "COPY TO"),
parser_errposition(pstate, def->location)));
/*
* Allow "stop", or "ignore" values.
*/
if (pg_strcasecmp(sval, "stop") == 0)
return COPY_ON_ERROR_STOP;
if (pg_strcasecmp(sval, "ignore") == 0)
return COPY_ON_ERROR_IGNORE;
if (pg_strcasecmp(sval, "set_null") == 0)
return COPY_ON_ERROR_SET_NULL;
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -984,7 +983,7 @@ ProcessCopyOptions(ParseState *pstate,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
if (opts_out->reject_limit && !opts_out->on_error)
if (opts_out->reject_limit && opts_out->on_error != COPY_ON_ERROR_IGNORE)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/*- translator: first and second %s are the names of COPY option, e.g.

View file

@ -50,6 +50,7 @@
#include "utils/portal.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/typcache.h"
/*
* No more than this many tuples per CopyMultiInsertBuffer
@ -1463,14 +1464,22 @@ CopyFrom(CopyFromState cstate)
/* Done, clean up */
error_context_stack = errcallback.previous;
if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
cstate->num_errors > 0 &&
if (cstate->num_errors > 0 &&
cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
ereport(NOTICE,
errmsg_plural("%" PRIu64 " row was skipped due to data type incompatibility",
"%" PRIu64 " rows were skipped due to data type incompatibility",
cstate->num_errors,
cstate->num_errors));
{
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
ereport(NOTICE,
errmsg_plural("%" PRIu64 " row was skipped due to data type incompatibility",
"%" PRIu64 " rows were skipped due to data type incompatibility",
cstate->num_errors,
cstate->num_errors));
else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
ereport(NOTICE,
errmsg_plural("in %" PRIu64 " row, columns were set to null due to data type incompatibility",
"in %" PRIu64 " rows, columns were set to null due to data type incompatibility",
cstate->num_errors,
cstate->num_errors));
}
if (bistate != NULL)
FreeBulkInsertState(bistate);
@ -1617,16 +1626,37 @@ BeginCopyFrom(ParseState *pstate,
cstate->escontext->type = T_ErrorSaveContext;
cstate->escontext->error_occurred = false;
/*
* Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
* options later
*/
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
cstate->escontext->details_wanted = false;
}
else
cstate->escontext = NULL;
if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
{
int attr_count = list_length(cstate->attnumlist);
/*
* When data type conversion fails and ON_ERROR is SET_NULL, we need
* ensure that the input column allow null values. ExecConstraints()
* will cover most of the cases, but it does not verify domain
* constraints. Therefore, for constrained domains, the null value
* check must be performed during the initial string-to-datum
* conversion (see CopyFromTextLikeOneRow()).
*/
cstate->domain_with_constraint = palloc0_array(bool, attr_count);
foreach_int(attno, cstate->attnumlist)
{
int i = foreach_current_index(attno);
Form_pg_attribute att = TupleDescAttr(tupDesc, attno - 1);
cstate->domain_with_constraint[i] = DomainHasConstraints(att->atttypid);
}
}
/* Convert FORCE_NULL name list to per-column flags, check validity */
cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
if (cstate->opts.force_null_all)

View file

@ -959,6 +959,7 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
int fldct;
int fieldno;
char *string;
bool current_row_erroneous = false;
tupDesc = RelationGetDescr(cstate->rel);
attr_count = list_length(cstate->attnumlist);
@ -1036,7 +1037,7 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
}
/*
* If ON_ERROR is specified with IGNORE, skip rows with soft errors
* If ON_ERROR is specified, handle the different options
*/
else if (!InputFunctionCallSafe(&in_functions[m],
string,
@ -1047,7 +1048,55 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
{
Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
cstate->num_errors++;
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
cstate->num_errors++;
else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
{
/*
* Reset error state so the subsequent InputFunctionCallSafe
* call (for domain constraint check) can properly report
* whether it succeeded or failed.
*/
cstate->escontext->error_occurred = false;
Assert(cstate->domain_with_constraint != NULL);
/*
* For constrained domains, we need an additional
* InputFunctionCallSafe() to ensure that an error is thrown
* if the domain constraint rejects null values.
*/
if (!cstate->domain_with_constraint[m] ||
InputFunctionCallSafe(&in_functions[m],
NULL,
typioparams[m],
att->atttypmod,
(Node *) cstate->escontext,
&values[m]))
{
nulls[m] = true;
values[m] = (Datum) 0;
}
else
ereport(ERROR,
errcode(ERRCODE_NOT_NULL_VIOLATION),
errmsg("domain %s does not allow null values",
format_type_be(typioparams[m])),
errdetail("ON_ERROR SET_NULL cannot be applied because column \"%s\" (domain %s) does not accept null values.",
cstate->cur_attname,
format_type_be(typioparams[m])),
errdatatype(typioparams[m]));
/*
* We count only the number of rows (not fields) where
* ON_ERROR SET_NULL was applied.
*/
if (!current_row_erroneous)
{
current_row_erroneous = true;
cstate->num_errors++;
}
}
if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
{
@ -1064,24 +1113,37 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
char *attval;
attval = CopyLimitPrintoutLength(cstate->cur_attval);
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
cstate->cur_lineno,
cstate->cur_attname,
attval));
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
cstate->cur_lineno,
cstate->cur_attname,
attval));
else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
ereport(NOTICE,
errmsg("setting to null due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
cstate->cur_lineno,
cstate->cur_attname,
attval));
pfree(attval);
}
else
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
cstate->cur_lineno,
cstate->cur_attname));
{
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
ereport(NOTICE,
errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
cstate->cur_lineno,
cstate->cur_attname));
}
/* reset relname_only */
cstate->relname_only = false;
}
return true;
if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
return true;
else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
continue;
}
cstate->cur_attname = NULL;

View file

@ -3437,7 +3437,7 @@ match_previous_words(int pattern_id,
/* Complete COPY <sth> FROM filename WITH (ON_ERROR */
else if (TailMatches("ON_ERROR"))
COMPLETE_WITH("stop", "ignore");
COMPLETE_WITH("stop", "ignore", "set_null");
/* Complete COPY <sth> FROM filename WITH (LOG_VERBOSITY */
else if (TailMatches("LOG_VERBOSITY"))

View file

@ -35,6 +35,7 @@ typedef enum CopyOnErrorChoice
{
COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */
COPY_ON_ERROR_IGNORE, /* ignore errors */
COPY_ON_ERROR_SET_NULL, /* set error field to null */
} CopyOnErrorChoice;
/*

View file

@ -108,6 +108,13 @@ typedef struct CopyFromStateData
* att */
bool *defaults; /* if DEFAULT marker was found for
* corresponding att */
/*
* True if the corresponding attribute's is a constrained domain. This
* will be populated only when ON_ERROR is SET_NULL, otherwise NULL.
*/
bool *domain_with_constraint;
bool volatile_defexprs; /* is any of defexprs volatile? */
List *range_table; /* single element list of RangeTblEntry */
List *rteperminfos; /* single element list of RTEPermissionInfo */

View file

@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
^
COPY x from stdin (on_error set_null, on_error set_null);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (on_error set_null, on_error set_null);
^
COPY x from stdin (log_verbosity default, log_verbosity verbose);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@ -92,6 +96,10 @@ COPY x from stdin (format BINARY, null 'x');
ERROR: cannot specify NULL in BINARY mode
COPY x from stdin (format BINARY, on_error ignore);
ERROR: only ON_ERROR STOP is allowed in BINARY mode
COPY x from stdin (format BINARY, on_error set_null);
ERROR: only ON_ERROR STOP is allowed in BINARY mode
COPY x from stdin (on_error set_null, reject_limit 2);
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin (on_error unsupported);
ERROR: COPY ON_ERROR "unsupported" not recognized
LINE 1: COPY x from stdin (on_error unsupported);
@ -124,6 +132,10 @@ COPY x to stdout (format BINARY, on_error unsupported);
ERROR: COPY ON_ERROR cannot be used with COPY TO
LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
^
COPY x to stdout (on_error set_null);
ERROR: COPY ON_ERROR cannot be used with COPY TO
LINE 1: COPY x to stdout (on_error set_null);
^
COPY x from stdin (log_verbosity unsupported);
ERROR: COPY LOG_VERBOSITY "unsupported" not recognized
LINE 1: COPY x from stdin (log_verbosity unsupported);
@ -782,6 +794,49 @@ CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 8 for column "k": "a"
CONTEXT: COPY check_ign_err
NOTICE: 6 rows were skipped due to data type incompatibility
CREATE DOMAIN d_int_not_null AS integer NOT NULL CHECK (value > 0);
CREATE DOMAIN d_int_positive_maybe_null AS integer CHECK (value > 0);
CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c integer);
\pset null NULL
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
ERROR: domain d_int_not_null does not allow null values
DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values.
CONTEXT: COPY t_on_error_null, line 1, column a: null input
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
ERROR: domain d_int_not_null does not allow null values
DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values.
CONTEXT: COPY t_on_error_null, line 1, column a: "ss"
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
ERROR: domain d_int_not_null does not allow null values
DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values.
CONTEXT: COPY t_on_error_null, line 1, column a: "-1"
-- fail, less data.
COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null);
ERROR: missing data for column "c"
CONTEXT: COPY t_on_error_null, line 1: "1,1"
-- fail, extra data.
COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null);
ERROR: extra data after last expected column
CONTEXT: COPY t_on_error_null, line 1: "1,2,3,4"
COPY t_on_error_null FROM STDIN WITH (on_error set_null, log_verbosity verbose); -- ok
NOTICE: setting to null due to data type incompatibility at line 1 for column "b": "x1"
CONTEXT: COPY t_on_error_null
NOTICE: setting to null due to data type incompatibility at line 1 for column "c": "yx"
CONTEXT: COPY t_on_error_null
NOTICE: setting to null due to data type incompatibility at line 2 for column "b": "zx"
CONTEXT: COPY t_on_error_null
NOTICE: setting to null due to data type incompatibility at line 3 for column "c": "ea"
CONTEXT: COPY t_on_error_null
NOTICE: in 3 rows, columns were set to null due to data type incompatibility
SELECT * FROM t_on_error_null ORDER BY a;
a | b | c
----+------+------
10 | NULL | NULL
11 | NULL | 12
13 | 14 | NULL
(3 rows)
\pset null ''
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@ -841,6 +896,9 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
DROP TABLE t_on_error_null;
DROP DOMAIN d_int_not_null;
DROP DOMAIN d_int_positive_maybe_null;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;

View file

@ -67,12 +67,15 @@ COPY x from stdin (force_null (a), force_null (b));
COPY x from stdin (convert_selectively (a), convert_selectively (b));
COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
COPY x from stdin (on_error ignore, on_error ignore);
COPY x from stdin (on_error set_null, on_error set_null);
COPY x from stdin (log_verbosity default, log_verbosity verbose);
-- incorrect options
COPY x from stdin (format BINARY, delimiter ',');
COPY x from stdin (format BINARY, null 'x');
COPY x from stdin (format BINARY, on_error ignore);
COPY x from stdin (format BINARY, on_error set_null);
COPY x from stdin (on_error set_null, reject_limit 2);
COPY x from stdin (on_error unsupported);
COPY x from stdin (format TEXT, force_quote(a));
COPY x from stdin (format TEXT, force_quote *);
@ -87,6 +90,7 @@ COPY x from stdin (format TEXT, force_null *);
COPY x to stdout (format CSV, force_null(a));
COPY x to stdout (format CSV, force_null *);
COPY x to stdout (format BINARY, on_error unsupported);
COPY x to stdout (on_error set_null);
COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0);
@ -540,6 +544,42 @@ a {2} 2
8 {8} 8
\.
CREATE DOMAIN d_int_not_null AS integer NOT NULL CHECK (value > 0);
CREATE DOMAIN d_int_positive_maybe_null AS integer CHECK (value > 0);
CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c integer);
\pset null NULL
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
\N 11 13
\.
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
ss 11 14
\.
COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail
-1 11 13
\.
-- fail, less data.
COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null);
1,1
\.
-- fail, extra data.
COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null);
1,2,3,4
\.
COPY t_on_error_null FROM STDIN WITH (on_error set_null, log_verbosity verbose); -- ok
10 x1 yx
11 zx 12
13 14 ea
\.
SELECT * FROM t_on_error_null ORDER BY a;
\pset null ''
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@ -609,6 +649,9 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
DROP TABLE t_on_error_null;
DROP DOMAIN d_int_not_null;
DROP DOMAIN d_int_positive_maybe_null;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;