diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index b77d189a500..dcf6e6a2f48 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6266,8 +6266,8 @@ FROM pg_stat_get_backend_idset() AS backendid; Number of tuples skipped because they contain malformed data. - This counter only advances when a value other than - stop is specified to the ON_ERROR + This counter only advances when + ignore is specified to the ON_ERROR option. diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 53b0ea8f573..0ad890ef95f 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -413,22 +413,33 @@ COPY { table_name [ ( error_action value of stop means fail the command, while - ignore means discard the input row and continue with the next one. + ignore means discard the input row and continue with the next one, + and set_null means replace the field containing the invalid + input value with a null value and continue to the next field. The default is stop. - The ignore option is applicable only for COPY FROM + The ignore and set_null + options are applicable only for COPY FROM when the FORMAT is text or csv. - A NOTICE message containing the ignored row count is - emitted at the end of the COPY FROM if at least one - row was discarded. When LOG_VERBOSITY option is set to - verbose, a NOTICE message + If ON_ERROR is set to ignore or + set_null, a NOTICE message is emitted at the end of the + COPY FROM command containing the count of rows that were ignored or + changed, if at least one row was affected. + + + When LOG_VERBOSITY option is set to verbose, + for ignore option, a NOTICE message containing the line of the input file and the column name whose input - conversion has failed is emitted for each discarded row. + conversion has failed is emitted for each discarded row; + for set_null option, a NOTICE + message containing the line of the input file and the column name where + value was replaced with NULL for each input conversion + failure. When it is set to silent, no message is emitted - regarding ignored rows. + regarding input conversion failed rows. @@ -476,7 +487,8 @@ COPY { table_name [ ( This is currently used in COPY FROM command when - ON_ERROR option is set to ignore. + ON_ERROR option is set to ignore + or set_null. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 155a79a70c5..63b86802ba2 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -468,13 +468,12 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) errmsg("COPY %s cannot be used with %s", "ON_ERROR", "COPY TO"), parser_errposition(pstate, def->location))); - /* - * Allow "stop", or "ignore" values. - */ if (pg_strcasecmp(sval, "stop") == 0) return COPY_ON_ERROR_STOP; if (pg_strcasecmp(sval, "ignore") == 0) return COPY_ON_ERROR_IGNORE; + if (pg_strcasecmp(sval, "set_null") == 0) + return COPY_ON_ERROR_SET_NULL; ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -984,7 +983,7 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("only ON_ERROR STOP is allowed in BINARY mode"))); - if (opts_out->reject_limit && !opts_out->on_error) + if (opts_out->reject_limit && opts_out->on_error != COPY_ON_ERROR_IGNORE) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), /*- translator: first and second %s are the names of COPY option, e.g. diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 2b7556b287c..2f42f55e229 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -50,6 +50,7 @@ #include "utils/portal.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/typcache.h" /* * No more than this many tuples per CopyMultiInsertBuffer @@ -1463,14 +1464,22 @@ CopyFrom(CopyFromState cstate) /* Done, clean up */ error_context_stack = errcallback.previous; - if (cstate->opts.on_error != COPY_ON_ERROR_STOP && - cstate->num_errors > 0 && + if (cstate->num_errors > 0 && cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT) - ereport(NOTICE, - errmsg_plural("%" PRIu64 " row was skipped due to data type incompatibility", - "%" PRIu64 " rows were skipped due to data type incompatibility", - cstate->num_errors, - cstate->num_errors)); + { + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + ereport(NOTICE, + errmsg_plural("%" PRIu64 " row was skipped due to data type incompatibility", + "%" PRIu64 " rows were skipped due to data type incompatibility", + cstate->num_errors, + cstate->num_errors)); + else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) + ereport(NOTICE, + errmsg_plural("in %" PRIu64 " row, columns were set to null due to data type incompatibility", + "in %" PRIu64 " rows, columns were set to null due to data type incompatibility", + cstate->num_errors, + cstate->num_errors)); + } if (bistate != NULL) FreeBulkInsertState(bistate); @@ -1617,16 +1626,37 @@ BeginCopyFrom(ParseState *pstate, cstate->escontext->type = T_ErrorSaveContext; cstate->escontext->error_occurred = false; - /* - * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other - * options later - */ - if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE || + cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) cstate->escontext->details_wanted = false; } else cstate->escontext = NULL; + if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) + { + int attr_count = list_length(cstate->attnumlist); + + /* + * When data type conversion fails and ON_ERROR is SET_NULL, we need + * ensure that the input column allow null values. ExecConstraints() + * will cover most of the cases, but it does not verify domain + * constraints. Therefore, for constrained domains, the null value + * check must be performed during the initial string-to-datum + * conversion (see CopyFromTextLikeOneRow()). + */ + cstate->domain_with_constraint = palloc0_array(bool, attr_count); + + foreach_int(attno, cstate->attnumlist) + { + int i = foreach_current_index(attno); + + Form_pg_attribute att = TupleDescAttr(tupDesc, attno - 1); + + cstate->domain_with_constraint[i] = DomainHasConstraints(att->atttypid); + } + } + /* Convert FORCE_NULL name list to per-column flags, check validity */ cstate->opts.force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); if (cstate->opts.force_null_all) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 6b00d49c50f..fbd13353efc 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -959,6 +959,7 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, int fldct; int fieldno; char *string; + bool current_row_erroneous = false; tupDesc = RelationGetDescr(cstate->rel); attr_count = list_length(cstate->attnumlist); @@ -1036,7 +1037,7 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, } /* - * If ON_ERROR is specified with IGNORE, skip rows with soft errors + * If ON_ERROR is specified, handle the different options */ else if (!InputFunctionCallSafe(&in_functions[m], string, @@ -1047,7 +1048,55 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, { Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); - cstate->num_errors++; + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + cstate->num_errors++; + else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) + { + /* + * Reset error state so the subsequent InputFunctionCallSafe + * call (for domain constraint check) can properly report + * whether it succeeded or failed. + */ + cstate->escontext->error_occurred = false; + + Assert(cstate->domain_with_constraint != NULL); + + /* + * For constrained domains, we need an additional + * InputFunctionCallSafe() to ensure that an error is thrown + * if the domain constraint rejects null values. + */ + if (!cstate->domain_with_constraint[m] || + InputFunctionCallSafe(&in_functions[m], + NULL, + typioparams[m], + att->atttypmod, + (Node *) cstate->escontext, + &values[m])) + { + nulls[m] = true; + values[m] = (Datum) 0; + } + else + ereport(ERROR, + errcode(ERRCODE_NOT_NULL_VIOLATION), + errmsg("domain %s does not allow null values", + format_type_be(typioparams[m])), + errdetail("ON_ERROR SET_NULL cannot be applied because column \"%s\" (domain %s) does not accept null values.", + cstate->cur_attname, + format_type_be(typioparams[m])), + errdatatype(typioparams[m])); + + /* + * We count only the number of rows (not fields) where + * ON_ERROR SET_NULL was applied. + */ + if (!current_row_erroneous) + { + current_row_erroneous = true; + cstate->num_errors++; + } + } if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) { @@ -1064,24 +1113,37 @@ CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, char *attval; attval = CopyLimitPrintoutLength(cstate->cur_attval); - ereport(NOTICE, - errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"", - cstate->cur_lineno, - cstate->cur_attname, - attval)); + + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + ereport(NOTICE, + errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"", + cstate->cur_lineno, + cstate->cur_attname, + attval)); + else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) + ereport(NOTICE, + errmsg("setting to null due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"", + cstate->cur_lineno, + cstate->cur_attname, + attval)); pfree(attval); } else - ereport(NOTICE, - errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input", - cstate->cur_lineno, - cstate->cur_attname)); - + { + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + ereport(NOTICE, + errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input", + cstate->cur_lineno, + cstate->cur_attname)); + } /* reset relname_only */ cstate->relname_only = false; } - return true; + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + return true; + else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL) + continue; } cstate->cur_attname = NULL; diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 987cce820b9..905c076763c 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3437,7 +3437,7 @@ match_previous_words(int pattern_id, /* Complete COPY FROM filename WITH (ON_ERROR */ else if (TailMatches("ON_ERROR")) - COMPLETE_WITH("stop", "ignore"); + COMPLETE_WITH("stop", "ignore", "set_null"); /* Complete COPY FROM filename WITH (LOG_VERBOSITY */ else if (TailMatches("LOG_VERBOSITY")) diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 43c2580539f..877202af67b 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -35,6 +35,7 @@ typedef enum CopyOnErrorChoice { COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */ COPY_ON_ERROR_IGNORE, /* ignore errors */ + COPY_ON_ERROR_SET_NULL, /* set error field to null */ } CopyOnErrorChoice; /* diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 822ef33cf69..f892c343157 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -108,6 +108,13 @@ typedef struct CopyFromStateData * att */ bool *defaults; /* if DEFAULT marker was found for * corresponding att */ + + /* + * True if the corresponding attribute's is a constrained domain. This + * will be populated only when ON_ERROR is SET_NULL, otherwise NULL. + */ + bool *domain_with_constraint; + bool volatile_defexprs; /* is any of defexprs volatile? */ List *range_table; /* single element list of RangeTblEntry */ List *rteperminfos; /* single element list of RTEPermissionInfo */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 3145b314e48..01101c71051 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (on_error ignore, on_error ignore); ^ +COPY x from stdin (on_error set_null, on_error set_null); +ERROR: conflicting or redundant options +LINE 1: COPY x from stdin (on_error set_null, on_error set_null); + ^ COPY x from stdin (log_verbosity default, log_verbosity verbose); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb... @@ -92,6 +96,10 @@ COPY x from stdin (format BINARY, null 'x'); ERROR: cannot specify NULL in BINARY mode COPY x from stdin (format BINARY, on_error ignore); ERROR: only ON_ERROR STOP is allowed in BINARY mode +COPY x from stdin (format BINARY, on_error set_null); +ERROR: only ON_ERROR STOP is allowed in BINARY mode +COPY x from stdin (on_error set_null, reject_limit 2); +ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE COPY x from stdin (on_error unsupported); ERROR: COPY ON_ERROR "unsupported" not recognized LINE 1: COPY x from stdin (on_error unsupported); @@ -124,6 +132,10 @@ COPY x to stdout (format BINARY, on_error unsupported); ERROR: COPY ON_ERROR cannot be used with COPY TO LINE 1: COPY x to stdout (format BINARY, on_error unsupported); ^ +COPY x to stdout (on_error set_null); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: COPY x to stdout (on_error set_null); + ^ COPY x from stdin (log_verbosity unsupported); ERROR: COPY LOG_VERBOSITY "unsupported" not recognized LINE 1: COPY x from stdin (log_verbosity unsupported); @@ -782,6 +794,49 @@ CONTEXT: COPY check_ign_err NOTICE: skipping row due to data type incompatibility at line 8 for column "k": "a" CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +CREATE DOMAIN d_int_not_null AS integer NOT NULL CHECK (value > 0); +CREATE DOMAIN d_int_positive_maybe_null AS integer CHECK (value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c integer); +\pset null NULL +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +ERROR: domain d_int_not_null does not allow null values +DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values. +CONTEXT: COPY t_on_error_null, line 1, column a: null input +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +ERROR: domain d_int_not_null does not allow null values +DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values. +CONTEXT: COPY t_on_error_null, line 1, column a: "ss" +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +ERROR: domain d_int_not_null does not allow null values +DETAIL: ON_ERROR SET_NULL cannot be applied because column "a" (domain d_int_not_null) does not accept null values. +CONTEXT: COPY t_on_error_null, line 1, column a: "-1" +-- fail, less data. +COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null); +ERROR: missing data for column "c" +CONTEXT: COPY t_on_error_null, line 1: "1,1" +-- fail, extra data. +COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null); +ERROR: extra data after last expected column +CONTEXT: COPY t_on_error_null, line 1: "1,2,3,4" +COPY t_on_error_null FROM STDIN WITH (on_error set_null, log_verbosity verbose); -- ok +NOTICE: setting to null due to data type incompatibility at line 1 for column "b": "x1" +CONTEXT: COPY t_on_error_null +NOTICE: setting to null due to data type incompatibility at line 1 for column "c": "yx" +CONTEXT: COPY t_on_error_null +NOTICE: setting to null due to data type incompatibility at line 2 for column "b": "zx" +CONTEXT: COPY t_on_error_null +NOTICE: setting to null due to data type incompatibility at line 3 for column "c": "ea" +CONTEXT: COPY t_on_error_null +NOTICE: in 3 rows, columns were set to null due to data type incompatibility +SELECT * FROM t_on_error_null ORDER BY a; + a | b | c +----+------+------ + 10 | NULL | NULL + 11 | NULL | 12 + 13 | 14 | NULL +(3 rows) + +\pset null '' -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -841,6 +896,9 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE t_on_error_null; +DROP DOMAIN d_int_not_null; +DROP DOMAIN d_int_positive_maybe_null; DROP TABLE check_ign_err2; DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 66435167500..889dcf1383f 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -67,12 +67,15 @@ COPY x from stdin (force_null (a), force_null (b)); COPY x from stdin (convert_selectively (a), convert_selectively (b)); COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); COPY x from stdin (on_error ignore, on_error ignore); +COPY x from stdin (on_error set_null, on_error set_null); COPY x from stdin (log_verbosity default, log_verbosity verbose); -- incorrect options COPY x from stdin (format BINARY, delimiter ','); COPY x from stdin (format BINARY, null 'x'); COPY x from stdin (format BINARY, on_error ignore); +COPY x from stdin (format BINARY, on_error set_null); +COPY x from stdin (on_error set_null, reject_limit 2); COPY x from stdin (on_error unsupported); COPY x from stdin (format TEXT, force_quote(a)); COPY x from stdin (format TEXT, force_quote *); @@ -87,6 +90,7 @@ COPY x from stdin (format TEXT, force_null *); COPY x to stdout (format CSV, force_null(a)); COPY x to stdout (format CSV, force_null *); COPY x to stdout (format BINARY, on_error unsupported); +COPY x to stdout (on_error set_null); COPY x from stdin (log_verbosity unsupported); COPY x from stdin with (reject_limit 1); COPY x from stdin with (on_error ignore, reject_limit 0); @@ -540,6 +544,42 @@ a {2} 2 8 {8} 8 \. +CREATE DOMAIN d_int_not_null AS integer NOT NULL CHECK (value > 0); +CREATE DOMAIN d_int_positive_maybe_null AS integer CHECK (value > 0); +CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c integer); + +\pset null NULL +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +\N 11 13 +\. + +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +ss 11 14 +\. + +COPY t_on_error_null FROM STDIN WITH (on_error set_null); -- fail +-1 11 13 +\. + +-- fail, less data. +COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null); +1,1 +\. +-- fail, extra data. +COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_null); +1,2,3,4 +\. + +COPY t_on_error_null FROM STDIN WITH (on_error set_null, log_verbosity verbose); -- ok +10 x1 yx +11 zx 12 +13 14 ea +\. + +SELECT * FROM t_on_error_null ORDER BY a; + +\pset null '' + -- tests for on_error option with log_verbosity and null constraint via domain CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); @@ -609,6 +649,9 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE t_on_error_null; +DROP DOMAIN d_int_not_null; +DROP DOMAIN d_int_positive_maybe_null; DROP TABLE check_ign_err2; DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err;