Replace pg_mblen() with bounds-checked versions.

A corrupted string could cause code that iterates with pg_mblen() to
overrun its buffer.  Fix, by converting all callers to one of the
following:

1. Callers with a null-terminated string now use pg_mblen_cstr(), which
raises an "illegal byte sequence" error if it finds a terminator in the
middle of the sequence.

2. Callers with a length or end pointer now use either
pg_mblen_with_len() or pg_mblen_range(), for the same effect, depending
on which of the two seems more convenient at each site.

3. A small number of cases pre-validate a string, and can use
pg_mblen_unbounded().

The traditional pg_mblen() function and COPYCHAR macro still exist for
backward compatibility, but are no longer used by core code and are
hereby deprecated.  The same applies to the t_isXXX() functions.

Security: CVE-2026-2006
Backpatch-through: 14
Co-authored-by: Thomas Munro <thomas.munro@gmail.com>
Co-authored-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reported-by: Paul Gerste (as part of zeroday.cloud)
Reported-by: Moritz Sanft (as part of zeroday.cloud)
This commit is contained in:
Thomas Munro 2026-01-07 22:14:31 +13:00
parent 7a522039f7
commit 319e8a6441
41 changed files with 532 additions and 359 deletions

View file

@ -116,36 +116,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
/*
* returns the common prefix length of a node key
*
* If the underlying type is character data, the prefix length may point in
* the middle of a multibyte character.
*/
static int32
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
{
GBT_VARKEY_R r = gbt_var_key_readable(node);
int32 i = 0;
int32 l = 0;
int32 l_left_to_match = 0;
int32 l_total = 0;
int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
int32 ml = Min(t1len, t2len);
char *p1 = VARDATA(r.lower);
char *p2 = VARDATA(r.upper);
const char *end1 = p1 + t1len;
const char *end2 = p2 + t2len;
if (ml == 0)
return 0;
while (i < ml)
{
if (tinfo->eml > 1 && l == 0)
if (tinfo->eml > 1 && l_left_to_match == 0)
{
if ((l = pg_mblen(p1)) != pg_mblen(p2))
l_total = pg_mblen_range(p1, end1);
if (l_total != pg_mblen_range(p2, end2))
{
return i;
}
l_left_to_match = l_total;
}
if (*p1 != *p2)
{
if (tinfo->eml > 1)
{
return (i - l + 1);
int32 l_matched_subset = l_total - l_left_to_match;
/* end common prefix at final byte of last matching char */
return i - l_matched_subset;
}
else
{
@ -155,7 +166,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
p1++;
p2++;
l--;
l_left_to_match--;
i++;
}
return ml; /* lower == upper */

View file

@ -48,15 +48,15 @@ find_word(char *in, char **end)
char *start;
*end = NULL;
while (*in && t_isspace(in))
in += pg_mblen(in);
while (*in && t_isspace_cstr(in))
in += pg_mblen_cstr(in);
if (!*in || *in == '#')
return NULL;
start = in;
while (*in && !t_isspace(in))
in += pg_mblen(in);
while (*in && !t_isspace_cstr(in))
in += pg_mblen_cstr(in);
*end = in;

View file

@ -64,7 +64,7 @@ prssyntaxerror(HSParser *state)
errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in hstore, near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
pg_mblen_cstr(state->ptr), state->ptr,
(int) (state->ptr - state->begin))));
/* In soft error situation, return false as convenience for caller */
return false;

View file

@ -27,14 +27,14 @@ getlexeme(char *start, char *end, int *len)
char *ptr;
while (start < end && t_iseq(start, '_'))
start += pg_mblen(start);
start += pg_mblen_range(start, end);
ptr = start;
if (ptr >= end)
return NULL;
while (ptr < end && !t_iseq(ptr, '_'))
ptr += pg_mblen(ptr);
ptr += pg_mblen_range(ptr, end);
*len = ptr - start;
return start;

View file

@ -127,7 +127,7 @@ typedef struct
#define LQUERY_HASNOT 0x01
/* valid label chars are alphanumerics, underscores and hyphens */
#define ISLABEL(x) ( t_isalnum(x) || t_iseq(x, '_') || t_iseq(x, '-') )
#define ISLABEL(x) ( t_isalnum_cstr(x) || t_iseq(x, '_') || t_iseq(x, '-') )
/* full text query */

View file

@ -55,7 +55,7 @@ parse_ltree(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
charlen = pg_mblen_cstr(ptr);
if (t_iseq(ptr, '.'))
num++;
ptr += charlen;
@ -70,7 +70,7 @@ parse_ltree(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
charlen = pg_mblen_cstr(ptr);
switch (state)
{
@ -292,7 +292,7 @@ parse_lquery(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
charlen = pg_mblen_cstr(ptr);
if (t_iseq(ptr, '.'))
num++;
@ -312,7 +312,7 @@ parse_lquery(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
charlen = pg_mblen(ptr);
charlen = pg_mblen_cstr(ptr);
switch (state)
{
@ -412,7 +412,7 @@ parse_lquery(const char *buf, struct Node *escontext)
case LQPRS_WAITFNUM:
if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
else if (t_isdigit(ptr))
else if (t_isdigit_cstr(ptr))
{
int low = atoi(ptr);
@ -430,7 +430,7 @@ parse_lquery(const char *buf, struct Node *escontext)
UNCHAR;
break;
case LQPRS_WAITSNUM:
if (t_isdigit(ptr))
if (t_isdigit_cstr(ptr))
{
int high = atoi(ptr);
@ -461,7 +461,7 @@ parse_lquery(const char *buf, struct Node *escontext)
case LQPRS_WAITCLOSE:
if (t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
else if (!t_isdigit(ptr))
else if (!t_isdigit_cstr(ptr))
UNCHAR;
break;
case LQPRS_WAITND:
@ -472,7 +472,7 @@ parse_lquery(const char *buf, struct Node *escontext)
}
else if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
else if (!t_isdigit(ptr))
else if (!t_isdigit_cstr(ptr))
UNCHAR;
break;
case LQPRS_WAITEND:

View file

@ -64,7 +64,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
for (;;)
{
charlen = pg_mblen(state->buf);
charlen = pg_mblen_cstr(state->buf);
switch (state->state)
{
@ -88,7 +88,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
*lenval = charlen;
*flag = 0;
}
else if (!t_isspace(state->buf))
else if (!t_isspace_cstr(state->buf))
ereturn(state->escontext, ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand syntax error")));

View file

@ -101,7 +101,7 @@ text_to_bits(char *str, int len)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid character \"%.*s\" in t_bits string",
pg_mblen(str + off), str + off)));
pg_mblen_cstr(str + off), str + off)));
if (off % 8 == 7)
bits[off / 8] = byte;

View file

@ -52,10 +52,10 @@ typedef char trgm[3];
} while(0)
#ifdef KEEPONLYALNUM
#define ISWORDCHR(c) (t_isalnum(c))
#define ISWORDCHR(c, len) (t_isalnum_with_len(c, len))
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
#else
#define ISWORDCHR(c) (!t_isspace(c))
#define ISWORDCHR(c, len) (!t_isspace_with_len(c, len))
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
#endif
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )

View file

@ -174,18 +174,29 @@ static char *
find_word(char *str, int lenstr, char **endword, int *charlen)
{
char *beginword = str;
const char *endstr = str + lenstr;
while (beginword - str < lenstr && !ISWORDCHR(beginword))
beginword += pg_mblen(beginword);
while (beginword < endstr)
{
int clen = pg_mblen_range(beginword, endstr);
if (beginword - str >= lenstr)
if (ISWORDCHR(beginword, clen))
break;
beginword += clen;
}
if (beginword >= endstr)
return NULL;
*endword = beginword;
*charlen = 0;
while (*endword - str < lenstr && ISWORDCHR(*endword))
while (*endword < endstr)
{
*endword += pg_mblen(*endword);
int clen = pg_mblen_range(*endword, endstr);
if (!ISWORDCHR(*endword, clen))
break;
*endword += clen;
(*charlen)++;
}
@ -233,9 +244,9 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
if (bytelen > charlen)
{
/* Find multibyte character boundaries and apply compact_trigram */
int lenfirst = pg_mblen(str),
lenmiddle = pg_mblen(str + lenfirst),
lenlast = pg_mblen(str + lenfirst + lenmiddle);
int lenfirst = pg_mblen_unbounded(str),
lenmiddle = pg_mblen_unbounded(str + lenfirst),
lenlast = pg_mblen_unbounded(str + lenfirst + lenmiddle);
while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
{
@ -246,7 +257,7 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
lenfirst = lenmiddle;
lenmiddle = lenlast;
lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle);
}
}
else
@ -726,6 +737,7 @@ get_wildcard_part(const char *str, int lenstr,
{
const char *beginword = str;
const char *endword;
const char *endstr = str + lenstr;
char *s = buf;
bool in_leading_wildcard_meta = false;
bool in_trailing_wildcard_meta = false;
@ -738,11 +750,13 @@ get_wildcard_part(const char *str, int lenstr,
* from this loop to the next one, since we may exit at a word character
* that is in_escape.
*/
while (beginword - str < lenstr)
while (beginword < endstr)
{
clen = pg_mblen_range(beginword, endstr);
if (in_escape)
{
if (ISWORDCHR(beginword))
if (ISWORDCHR(beginword, clen))
break;
in_escape = false;
in_leading_wildcard_meta = false;
@ -753,12 +767,12 @@ get_wildcard_part(const char *str, int lenstr,
in_escape = true;
else if (ISWILDCARDCHAR(beginword))
in_leading_wildcard_meta = true;
else if (ISWORDCHR(beginword))
else if (ISWORDCHR(beginword, clen))
break;
else
in_leading_wildcard_meta = false;
}
beginword += pg_mblen(beginword);
beginword += clen;
}
/*
@ -791,12 +805,12 @@ get_wildcard_part(const char *str, int lenstr,
* string boundary. Strip escapes during copy.
*/
endword = beginword;
while (endword - str < lenstr)
while (endword < endstr)
{
clen = pg_mblen(endword);
clen = pg_mblen_range(endword, endstr);
if (in_escape)
{
if (ISWORDCHR(endword))
if (ISWORDCHR(endword, clen))
{
memcpy(s, endword, clen);
(*charlen)++;
@ -824,7 +838,7 @@ get_wildcard_part(const char *str, int lenstr,
in_trailing_wildcard_meta = true;
break;
}
else if (ISWORDCHR(endword))
else if (ISWORDCHR(endword, clen))
{
memcpy(s, endword, clen);
(*charlen)++;

View file

@ -481,7 +481,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
static void RE_compile(regex_t *regex, text *text_re,
int cflags, Oid collation);
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
static int convertPgWchar(pg_wchar c, trgm_mb_char *result);
static void transformGraph(TrgmNFA *trgmNFA);
static void processState(TrgmNFA *trgmNFA, TrgmState *state);
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
@ -806,10 +806,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
for (j = 0; j < charsCount; j++)
{
trgm_mb_char c;
int clen = convertPgWchar(chars[j], &c);
if (!convertPgWchar(chars[j], &c))
if (!clen)
continue; /* ok to ignore it altogether */
if (ISWORDCHR(c.bytes))
if (ISWORDCHR(c.bytes, clen))
colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
else
colorInfo->containsNonWord = true;
@ -821,13 +822,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
/*
* Convert pg_wchar to multibyte format.
* Returns false if the character should be ignored completely.
* Returns 0 if the character should be ignored completely, else returns its
* byte length.
*/
static bool
static int
convertPgWchar(pg_wchar c, trgm_mb_char *result)
{
/* "s" has enough space for a multibyte character and a trailing NUL */
char s[MAX_MULTIBYTE_CHAR_LEN + 1];
int clen;
/*
* We can ignore the NUL character, since it can never appear in a PG text
@ -835,11 +838,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
* reconstructing trigrams.
*/
if (c == 0)
return false;
return 0;
/* Do the conversion, making sure the result is NUL-terminated */
memset(s, 0, sizeof(s));
pg_wchar2mb_with_len(&c, s, 1);
clen = pg_wchar2mb_with_len(&c, s, 1);
/*
* In IGNORECASE mode, we can ignore uppercase characters. We assume that
@ -861,7 +864,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
if (strcmp(lowerCased, s) != 0)
{
pfree(lowerCased);
return false;
return 0;
}
pfree(lowerCased);
}
@ -869,7 +872,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
return true;
return clen;
}

View file

@ -155,9 +155,9 @@ initTrie(const char *filename)
state = 0;
for (ptr = line; *ptr; ptr += ptrlen)
{
ptrlen = pg_mblen(ptr);
ptrlen = pg_mblen_cstr(ptr);
/* ignore whitespace, but end src or trg */
if (t_isspace(ptr))
if (t_isspace_cstr(ptr))
{
if (state == 1)
state = 2;
@ -381,6 +381,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
char *srcchar = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *srcstart = srcchar;
const char *srcend = srcstart + len;
TSLexeme *res;
StringInfoData buf;
@ -408,7 +409,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
}
else
{
matchlen = pg_mblen(srcchar);
matchlen = pg_mblen_range(srcchar, srcend);
if (buf.data != NULL)
appendBinaryStringInfo(&buf, srcchar, matchlen);
}

View file

@ -1160,7 +1160,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal,
if (cursorpos > 0)
newcp++;
}
chlen = pg_mblen(prosrc);
chlen = pg_mblen_cstr(prosrc);
if (strncmp(prosrc, literal, chlen) != 0)
goto fail;
prosrc += chlen;

View file

@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags)
char *lastchar;
/* Skip leading spaces */
while (*in && t_isspace(in))
in += pg_mblen(in);
while (*in && t_isspace_cstr(in))
in += pg_mblen_cstr(in);
/* Return NULL on empty lines */
if (*in == '\0')
@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags)
lastchar = start = in;
/* Find end of word */
while (*in && !t_isspace(in))
while (*in && !t_isspace_cstr(in))
{
lastchar = in;
in += pg_mblen(in);
in += pg_mblen_cstr(in);
}
if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)

View file

@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d)
ptr = line;
/* is it a comment? */
while (*ptr && t_isspace(ptr))
ptr += pg_mblen(ptr);
while (*ptr && t_isspace_cstr(ptr))
ptr += pg_mblen_cstr(ptr);
if (t_iseq(ptr, '#') || *ptr == '\0' ||
t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
errmsg("unexpected delimiter")));
state = TR_WAITSUBS;
}
else if (!t_isspace(ptr))
else if (!t_isspace_cstr(ptr))
{
beginwrd = ptr;
state = TR_INLEX;
@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITSUBS;
}
else if (t_isspace(ptr))
else if (t_isspace_cstr(ptr))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITLEX;
@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d)
{
useasis = true;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
beginwrd = ptr + pg_mblen_cstr(ptr);
}
else if (t_iseq(ptr, '\\'))
{
useasis = false;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
beginwrd = ptr + pg_mblen_cstr(ptr);
}
else if (!t_isspace(ptr))
else if (!t_isspace_cstr(ptr))
{
useasis = false;
beginwrd = ptr;
@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
}
else if (state == TR_INSUBS)
{
if (t_isspace(ptr))
if (t_isspace_cstr(ptr))
{
if (ptr == beginwrd)
ereport(ERROR,
@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
else
elog(ERROR, "unrecognized thesaurus state: %d", state);
ptr += pg_mblen(ptr);
ptr += pg_mblen_cstr(ptr);
}
if (state == TR_INSUBS)

View file

@ -37,7 +37,7 @@ RS_isRegis(const char *str)
{
if (state == RS_IN_WAIT)
{
if (t_isalpha(c))
if (t_isalpha_cstr(c))
/* okay */ ;
else if (t_iseq(c, '['))
state = RS_IN_ONEOF;
@ -48,14 +48,14 @@ RS_isRegis(const char *str)
{
if (t_iseq(c, '^'))
state = RS_IN_NONEOF;
else if (t_isalpha(c))
else if (t_isalpha_cstr(c))
state = RS_IN_ONEOF_IN;
else
return false;
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
if (t_isalpha(c))
if (t_isalpha_cstr(c))
/* okay */ ;
else if (t_iseq(c, ']'))
state = RS_IN_WAIT;
@ -64,7 +64,7 @@ RS_isRegis(const char *str)
}
else
elog(ERROR, "internal error in RS_isRegis: state %d", state);
c += pg_mblen(c);
c += pg_mblen_cstr(c);
}
return (state == RS_IN_WAIT);
@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str)
{
if (state == RS_IN_WAIT)
{
if (t_isalpha(c))
if (t_isalpha_cstr(c))
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
ptr = r->node = newRegisNode(NULL, len);
COPYCHAR(ptr->data, c);
ptr->type = RSF_ONEOF;
ptr->len = pg_mblen(c);
ptr->len = ts_copychar_cstr(ptr->data, c);
}
else if (t_iseq(c, '['))
{
@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str)
ptr->type = RSF_NONEOF;
state = RS_IN_NONEOF;
}
else if (t_isalpha(c))
else if (t_isalpha_cstr(c))
{
COPYCHAR(ptr->data, c);
ptr->len = pg_mblen(c);
ptr->len = ts_copychar_cstr(ptr->data, c);
state = RS_IN_ONEOF_IN;
}
else /* shouldn't get here */
@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str)
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
if (t_isalpha(c))
{
COPYCHAR(ptr->data + ptr->len, c);
ptr->len += pg_mblen(c);
}
if (t_isalpha_cstr(c))
ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c);
else if (t_iseq(c, ']'))
state = RS_IN_WAIT;
else /* shouldn't get here */
@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str)
}
else
elog(ERROR, "internal error in RS_compile: state %d", state);
c += pg_mblen(c);
c += pg_mblen_cstr(c);
}
if (state != RS_IN_WAIT) /* shouldn't get here */
@ -187,10 +182,10 @@ mb_strchr(char *str, char *c)
char *ptr = str;
bool res = false;
clen = pg_mblen(c);
clen = pg_mblen_cstr(c);
while (*ptr && !res)
{
plen = pg_mblen(ptr);
plen = pg_mblen_cstr(ptr);
if (plen == clen)
{
i = plen;
@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str)
while (*c)
{
len++;
c += pg_mblen(c);
c += pg_mblen_cstr(c);
}
if (len < r->nchar)
@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str)
{
len -= r->nchar;
while (len-- > 0)
c += pg_mblen(c);
c += pg_mblen_cstr(c);
}
@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str)
elog(ERROR, "unrecognized regis node type: %d", ptr->type);
}
ptr = ptr->next;
c += pg_mblen(c);
c += pg_mblen_cstr(c);
}
return true;

View file

@ -232,7 +232,7 @@ findchar(char *str, int c)
{
if (t_iseq(str, c))
return str;
str += pg_mblen(str);
str += pg_mblen_cstr(str);
}
return NULL;
@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2)
{
if (t_iseq(str, c1) || t_iseq(str, c2))
return str;
str += pg_mblen(str);
str += pg_mblen_cstr(str);
}
return NULL;
@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
char *next,
*sbuf = *sflagset;
int maxstep;
int clen;
bool stop = false;
bool met_comma = false;
@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
{
case FM_LONG:
case FM_CHAR:
COPYCHAR(sflag, *sflagset);
sflag += pg_mblen(*sflagset);
clen = ts_copychar_cstr(sflag, *sflagset);
sflag += clen;
/* Go to start of the next flag */
*sflagset += pg_mblen(*sflagset);
*sflagset += clen;
/* Check if we get all characters of flag */
maxstep--;
@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
*sflagset = next;
while (**sflagset)
{
if (t_isdigit(*sflagset))
if (t_isdigit_cstr(*sflagset))
{
if (!met_comma)
ereport(ERROR,
@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
*sflagset)));
met_comma = true;
}
else if (!t_isspace(*sflagset))
else if (!t_isspace_cstr(*sflagset))
{
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
*sflagset)));
}
*sflagset += pg_mblen(*sflagset);
*sflagset += pg_mblen_cstr(*sflagset);
}
stop = true;
break;
@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
while (*s)
{
/* we allow only single encoded flags for faster works */
if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s))
s++;
else
{
@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
s = line;
while (*s)
{
if (t_isspace(s))
if (t_isspace_cstr(s))
{
*s = '\0';
break;
}
s += pg_mblen(s);
s += pg_mblen_cstr(s);
}
pstr = lowerstr_ctx(Conf, line);
@ -796,17 +797,17 @@ get_nextfield(char **str, char *next)
while (**str)
{
int clen = pg_mblen_cstr(*str);
if (state == PAE_WAIT_MASK)
{
if (t_iseq(*str, '#'))
return false;
else if (!t_isspace(*str))
else if (!t_isspace_cstr(*str))
{
int clen = pg_mblen(*str);
if (clen < avail)
{
COPYCHAR(next, *str);
ts_copychar_with_len(next, *str, clen);
next += clen;
avail -= clen;
}
@ -815,24 +816,22 @@ get_nextfield(char **str, char *next)
}
else /* state == PAE_INMASK */
{
if (t_isspace(*str))
if (t_isspace_cstr(*str))
{
*next = '\0';
return true;
}
else
{
int clen = pg_mblen(*str);
if (clen < avail)
{
COPYCHAR(next, *str);
ts_copychar_with_len(next, *str, clen);
next += clen;
avail -= clen;
}
}
}
*str += pg_mblen(*str);
*str += clen;
}
*next = '\0';
@ -922,14 +921,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
while (*str)
{
int clen = pg_mblen_cstr(str);
if (state == PAE_WAIT_MASK)
{
if (t_iseq(str, '#'))
return false;
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
pmask += ts_copychar_with_len(pmask, str, clen);
state = PAE_INMASK;
}
}
@ -940,10 +940,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*pmask = '\0';
state = PAE_WAIT_FIND;
}
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
pmask += ts_copychar_with_len(pmask, str, clen);
}
}
else if (state == PAE_WAIT_FIND)
@ -952,13 +951,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
{
state = PAE_INFIND;
}
else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
prepl += ts_copychar_with_len(prepl, str, clen);
state = PAE_INREPL;
}
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@ -970,12 +968,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*pfind = '\0';
state = PAE_WAIT_REPL;
}
else if (t_isalpha(str))
else if (t_isalpha_cstr(str))
{
COPYCHAR(pfind, str);
pfind += pg_mblen(str);
pfind += ts_copychar_with_len(pfind, str, clen);
}
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@ -986,13 +983,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
{
break; /* void repl */
}
else if (t_isalpha(str))
else if (t_isalpha_cstr(str))
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
prepl += ts_copychar_with_len(prepl, str, clen);
state = PAE_INREPL;
}
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@ -1004,12 +1000,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*prepl = '\0';
break;
}
else if (t_isalpha(str))
else if (t_isalpha_cstr(str))
{
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
prepl += ts_copychar_with_len(prepl, str, clen);
}
else if (!t_isspace(str))
else if (!t_isspace_cstr(str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@ -1017,7 +1012,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
else
elog(ERROR, "unrecognized state in parse_affentry: %d", state);
str += pg_mblen(str);
str += clen;
}
*pmask = *pfind = *prepl = '\0';
@ -1070,10 +1065,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
CompoundAffixFlag *newValue;
char sbuf[BUFSIZ];
char *sflag;
int clen;
while (*s && t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace_cstr(s))
s += pg_mblen_cstr(s);
if (!*s)
ereport(ERROR,
@ -1082,10 +1076,10 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
/* Get flag without \n */
sflag = sbuf;
while (*s && !t_isspace(s) && *s != '\n')
while (*s && !t_isspace_cstr(s) && *s != '\n')
{
clen = pg_mblen(s);
COPYCHAR(sflag, s);
int clen = ts_copychar_cstr(sflag, s);
sflag += clen;
s += clen;
}
@ -1228,7 +1222,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
while ((recoded = tsearch_readline(&trst)) != NULL)
{
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
{
pfree(recoded);
continue;
@ -1265,8 +1259,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
char *s = recoded + strlen("FLAG");
while (*s && t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace_cstr(s))
s += pg_mblen_cstr(s);
if (*s)
{
@ -1301,7 +1295,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
int fields_read;
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
goto nextline;
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
@ -1464,12 +1458,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s = findchar2(recoded, 'l', 'L');
if (s)
{
while (*s && !t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace(s))
s += pg_mblen(s);
while (*s && !t_isspace_cstr(s))
s += pg_mblen_cstr(s);
while (*s && t_isspace_cstr(s))
s += pg_mblen_cstr(s);
if (*s && pg_mblen(s) == 1)
if (*s && pg_mblen_cstr(s) == 1)
{
addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
Conf->usecompound = true;
@ -1497,8 +1491,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s = recoded + 4; /* we need non-lowercased string */
flagflags = 0;
while (*s && t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace_cstr(s))
s += pg_mblen_cstr(s);
if (*s == '*')
{
@ -1519,14 +1513,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
* be followed by EOL, whitespace, or ':'. Otherwise this is a
* new-format flag command.
*/
if (*s && pg_mblen(s) == 1)
if (*s && pg_mblen_cstr(s) == 1)
{
COPYCHAR(flag, s);
flag[0] = *s++;
flag[1] = '\0';
s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
t_isspace(s))
t_isspace_cstr(s))
{
oldformat = true;
goto nextline;
@ -1750,7 +1743,7 @@ NISortDictionary(IspellDict *Conf)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",

View file

@ -31,81 +31,44 @@ static void tsearch_readline_callback(void *arg);
*/
#define WC_BUF_LEN 3
int
t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isdigit(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswdigit((wint_t) character[0]);
}
int
t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isspace(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswspace((wint_t) character[0]);
}
int
t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalpha(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswalpha((wint_t) character[0]);
}
int
t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isalnum(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswalnum((wint_t) character[0]);
}
int
t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isprint(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswprint((wint_t) character[0]);
#define GENERATE_T_ISCLASS_DEF(character_class) \
/* mblen shall be that of the first character */ \
int \
t_is##character_class##_with_len(const char *ptr, int mblen) \
{ \
int clen = pg_mblen_with_len(ptr, mblen); \
wchar_t character[WC_BUF_LEN]; \
pg_locale_t mylocale = 0; /* TODO */ \
if (clen == 1 || database_ctype_is_c) \
return is##character_class(TOUCHAR(ptr)); \
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \
return isw##character_class((wint_t) character[0]); \
} \
\
/* ptr shall point to a NUL-terminated string */ \
int \
t_is##character_class##_cstr(const char *ptr) \
{ \
return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
} \
/* ptr shall point to a string with pre-validated encoding */ \
int \
t_is##character_class##_unbounded(const char *ptr) \
{ \
return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
} \
/* historical name for _unbounded */ \
int \
t_is##character_class(const char *ptr) \
{ \
return t_is##character_class##_unbounded(ptr); \
}
GENERATE_T_ISCLASS_DEF(alnum)
GENERATE_T_ISCLASS_DEF(alpha)
GENERATE_T_ISCLASS_DEF(digit)
GENERATE_T_ISCLASS_DEF(print)
GENERATE_T_ISCLASS_DEF(space)
/*
* Set up to read a file using tsearch_readline(). This facility is

View file

@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
char *pbuf = line;
/* Trim trailing space */
while (*pbuf && !t_isspace(pbuf))
pbuf += pg_mblen(pbuf);
while (*pbuf && !t_isspace_cstr(pbuf))
pbuf += pg_mblen_cstr(pbuf);
*pbuf = '\0';
/* Skip empty lines */

View file

@ -1728,7 +1728,8 @@ TParserGet(TParser *prs)
prs->state->charlen = 0;
else
prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
pg_mblen(prs->str + prs->state->posbyte);
pg_mblen_range(prs->str + prs->state->posbyte,
prs->str + prs->lenstr);
Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);

View file

@ -215,7 +215,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal digit: \"%.*s\"",
pg_mblen(s), s)));
pg_mblen_range(s, srcend), s)));
s++;
if (s >= srcend)
ereturn(escontext, 0,
@ -225,7 +225,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal digit: \"%.*s\"",
pg_mblen(s), s)));
pg_mblen_range(s, srcend), s)));
s++;
*p++ = (v1 << 4) | v2;
}
@ -354,7 +354,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
pg_mblen(s - 1), s - 1)));
pg_mblen_range(s - 1, srcend), s - 1)));
}
/* add it to buffer */
buf = (buf << 6) + b;

View file

@ -1396,7 +1396,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid datetime format separator: \"%s\"",
pnstrdup(str, pg_mblen(str)))));
pnstrdup(str, pg_mblen_cstr(str)))));
if (*str == ' ')
n->type = NODE_TYPE_SPACE;
@ -1426,7 +1426,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
/* backslash quotes the next character, if any */
if (*str == '\\' && *(str + 1))
str++;
chlen = pg_mblen(str);
chlen = pg_mblen_cstr(str);
n->type = NODE_TYPE_CHAR;
memcpy(n->character, str, chlen);
n->character[chlen] = '\0';
@ -1444,7 +1444,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
*/
if (*str == '\\' && *(str + 1) == '"')
str++;
chlen = pg_mblen(str);
chlen = pg_mblen_cstr(str);
if ((flags & DCH_FLAG) && is_separator_char(str))
n->type = NODE_TYPE_SEPARATOR;
@ -2274,8 +2274,8 @@ asc_toupper_z(const char *buff)
do { \
if (S_THth(_suf)) \
{ \
if (*(ptr)) (ptr) += pg_mblen(ptr); \
if (*(ptr)) (ptr) += pg_mblen(ptr); \
if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
} \
} while (0)
@ -3481,7 +3481,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
* insist that the consumed character match the format's
* character.
*/
s += pg_mblen(s);
s += pg_mblen_cstr(s);
}
continue;
}
@ -3503,11 +3503,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
if (extra_skip > 0)
extra_skip--;
else
s += pg_mblen(s);
s += pg_mblen_cstr(s);
}
else
{
int chlen = pg_mblen(s);
int chlen = pg_mblen_cstr(s);
/*
* Standard mode requires strict match of format characters.
@ -5809,13 +5809,15 @@ NUM_numpart_to_char(NUMProc *Np, int id)
static void
NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
{
const char *end = Np->inout + input_len;
while (n-- > 0)
{
if (OVERLOAD_TEST)
break; /* end of input */
if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
break; /* it's a data character */
Np->inout_p += pg_mblen(Np->inout_p);
Np->inout_p += pg_mblen_range(Np->inout_p, end);
}
}
@ -6268,7 +6270,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
}
else
{
Np->inout_p += pg_mblen(Np->inout_p);
Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len);
}
continue;
}

View file

@ -693,7 +693,7 @@ report_json_context(JsonLexContext *lex)
{
/* Advance to next multibyte character */
if (IS_HIGHBIT_SET(*context_start))
context_start += pg_mblen(context_start);
context_start += pg_mblen_range(context_start, context_end);
else
context_start++;
}

View file

@ -597,7 +597,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
pg_mblen(flags->val + i), flags->val + i)));
pg_mblen_range(flags->val + i, flags->val + flags->len),
flags->val + i)));
break;
}
}

View file

@ -83,6 +83,8 @@ varstr_levenshtein(const char *source, int slen,
int *s_char_len = NULL;
int j;
const char *y;
const char *send = source + slen;
const char *tend = target + tlen;
/*
* For varstr_levenshtein_less_equal, we have real variables called
@ -183,10 +185,10 @@ varstr_levenshtein(const char *source, int slen,
#endif
/*
* In order to avoid calling pg_mblen() repeatedly on each character in s,
* we cache all the lengths before starting the main loop -- but if all
* the characters in both strings are single byte, then we skip this and
* use a fast-path in the main loop. If only one string contains
* In order to avoid calling pg_mblen_range() repeatedly on each character
* in s, we cache all the lengths before starting the main loop -- but if
* all the characters in both strings are single byte, then we skip this
* and use a fast-path in the main loop. If only one string contains
* multi-byte characters, we still build the array, so that the fast-path
* needn't deal with the case where the array hasn't been initialized.
*/
@ -198,7 +200,7 @@ varstr_levenshtein(const char *source, int slen,
s_char_len = (int *) palloc((m + 1) * sizeof(int));
for (i = 0; i < m; ++i)
{
s_char_len[i] = pg_mblen(cp);
s_char_len[i] = pg_mblen_range(cp, send);
cp += s_char_len[i];
}
s_char_len[i] = 0;
@ -224,7 +226,7 @@ varstr_levenshtein(const char *source, int slen,
{
int *temp;
const char *x = source;
int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
int y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1;
int i;
#ifdef LEVENSHTEIN_LESS_EQUAL

View file

@ -55,20 +55,20 @@ static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
*--------------------
*/
static inline int
wchareq(const char *p1, const char *p2)
wchareq(const char *p1, int p1len, const char *p2, int p2len)
{
int p1_len;
int p1clen;
/* Optimization: quickly compare the first byte. */
if (*p1 != *p2)
return 0;
p1_len = pg_mblen(p1);
if (pg_mblen(p2) != p1_len)
p1clen = pg_mblen_with_len(p1, p1len);
if (pg_mblen_with_len(p2, p2len) != p1clen)
return 0;
/* They are the same length */
while (p1_len--)
while (p1clen--)
{
if (*p1++ != *p2++)
return 0;
@ -105,11 +105,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
#define NextByte(p, plen) ((p)++, (plen)--)
/* Set up to compile like_match.c for multibyte characters */
#define CHAREQ(p1, p2) wchareq((p1), (p2))
#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
#define NextChar(p, plen) \
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
#define CopyAdvChar(dst, src, srclen) \
do { int __l = pg_mblen(src); \
do { int __l = pg_mblen_with_len((src), (srclen)); \
(srclen) -= __l; \
while (__l-- > 0) \
*(dst)++ = *(src)++; \
@ -121,7 +121,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
#include "like_match.c"
/* Set up to compile like_match.c for single-byte characters */
#define CHAREQ(p1, p2) (*(p1) == *(p2))
#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
#define NextChar(p, plen) NextByte((p), (plen))
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)

View file

@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc)
errhint("Escape string must be empty or one character.")));
e = VARDATA_ANY(esc);
elen = VARSIZE_ANY_EXHDR(esc);
/*
* If specified escape is '\', just copy the pattern as-is.
@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc)
afterescape = false;
while (plen > 0)
{
if (CHAREQ(p, e) && !afterescape)
if (CHAREQ(p, plen, e, elen) && !afterescape)
{
*r++ = '\\';
NextChar(p, plen);

View file

@ -153,8 +153,8 @@ lpad(PG_FUNCTION_ARGS)
char *ptr1,
*ptr2,
*ptr2start,
*ptr2end,
*ptr_ret;
const char *ptr2end;
int m,
s1len,
s2len;
@ -199,7 +199,7 @@ lpad(PG_FUNCTION_ARGS)
while (m--)
{
int mlen = pg_mblen(ptr2);
int mlen = pg_mblen_range(ptr2, ptr2end);
memcpy(ptr_ret, ptr2, mlen);
ptr_ret += mlen;
@ -212,7 +212,7 @@ lpad(PG_FUNCTION_ARGS)
while (s1len--)
{
int mlen = pg_mblen(ptr1);
int mlen = pg_mblen_unbounded(ptr1);
memcpy(ptr_ret, ptr1, mlen);
ptr_ret += mlen;
@ -251,8 +251,8 @@ rpad(PG_FUNCTION_ARGS)
char *ptr1,
*ptr2,
*ptr2start,
*ptr2end,
*ptr_ret;
const char *ptr2end;
int m,
s1len,
s2len;
@ -292,11 +292,12 @@ rpad(PG_FUNCTION_ARGS)
m = len - s1len;
ptr1 = VARDATA_ANY(string1);
ptr_ret = VARDATA(ret);
while (s1len--)
{
int mlen = pg_mblen(ptr1);
int mlen = pg_mblen_unbounded(ptr1);
memcpy(ptr_ret, ptr1, mlen);
ptr_ret += mlen;
@ -308,7 +309,7 @@ rpad(PG_FUNCTION_ARGS)
while (m--)
{
int mlen = pg_mblen(ptr2);
int mlen = pg_mblen_range(ptr2, ptr2end);
memcpy(ptr_ret, ptr2, mlen);
ptr_ret += mlen;
@ -393,6 +394,7 @@ dotrim(const char *string, int stringlen,
*/
const char **stringchars;
const char **setchars;
const char *setend;
int *stringmblen;
int *setmblen;
int stringnchars;
@ -400,6 +402,7 @@ dotrim(const char *string, int stringlen,
int resultndx;
int resultnchars;
const char *p;
const char *pend;
int len;
int mblen;
const char *str_pos;
@ -410,10 +413,11 @@ dotrim(const char *string, int stringlen,
stringnchars = 0;
p = string;
len = stringlen;
pend = p + len;
while (len > 0)
{
stringchars[stringnchars] = p;
stringmblen[stringnchars] = mblen = pg_mblen(p);
stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend);
stringnchars++;
p += mblen;
len -= mblen;
@ -424,10 +428,11 @@ dotrim(const char *string, int stringlen,
setnchars = 0;
p = set;
len = setlen;
setend = set + setlen;
while (len > 0)
{
setchars[setnchars] = p;
setmblen[setnchars] = mblen = pg_mblen(p);
setmblen[setnchars] = mblen = pg_mblen_range(p, setend);
setnchars++;
p += mblen;
len -= mblen;
@ -805,6 +810,8 @@ translate(PG_FUNCTION_ARGS)
*to_end;
char *source,
*target;
const char *source_end;
const char *from_end;
int m,
fromlen,
tolen,
@ -819,9 +826,11 @@ translate(PG_FUNCTION_ARGS)
if (m <= 0)
PG_RETURN_TEXT_P(string);
source = VARDATA_ANY(string);
source_end = source + m;
fromlen = VARSIZE_ANY_EXHDR(from);
from_ptr = VARDATA_ANY(from);
from_end = from_ptr + fromlen;
tolen = VARSIZE_ANY_EXHDR(to);
to_ptr = VARDATA_ANY(to);
to_end = to_ptr + tolen;
@ -845,12 +854,12 @@ translate(PG_FUNCTION_ARGS)
while (m > 0)
{
source_len = pg_mblen(source);
source_len = pg_mblen_range(source, source_end);
from_index = 0;
for (i = 0; i < fromlen; i += len)
{
len = pg_mblen(&from_ptr[i]);
len = pg_mblen_range(&from_ptr[i], from_end);
if (len == source_len &&
memcmp(source, &from_ptr[i], len) == 0)
break;
@ -866,11 +875,11 @@ translate(PG_FUNCTION_ARGS)
{
if (p >= to_end)
break;
p += pg_mblen(p);
p += pg_mblen_range(p, to_end);
}
if (p < to_end)
{
len = pg_mblen(p);
len = pg_mblen_range(p, to_end);
memcpy(target, p, len);
target += len;
retlen += len;

View file

@ -443,7 +443,7 @@ parse_re_flags(pg_re_flags *flags, text *opts)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%.*s\"",
pg_mblen(opt_p + i), opt_p + i)));
pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
break;
}
}
@ -673,12 +673,13 @@ textregexreplace(PG_FUNCTION_ARGS)
if (VARSIZE_ANY_EXHDR(opt) > 0)
{
char *opt_p = VARDATA_ANY(opt);
const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt);
if (*opt_p >= '0' && *opt_p <= '9')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%.*s\"",
pg_mblen(opt_p), opt_p),
pg_mblen_range(opt_p, end_p), opt_p),
errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
}
@ -772,6 +773,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
*r;
int plen,
elen;
const char *pend;
bool afterescape = false;
int nquotes = 0;
int bracket_depth = 0; /* square bracket nesting level */
@ -779,6 +781,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
pend = p + plen;
if (esc_text == NULL)
{
/* No ESCAPE clause provided; default to backslash as escape */
@ -878,7 +881,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
if (elen > 1)
{
int mblen = pg_mblen(p);
int mblen = pg_mblen_range(p, pend);
if (mblen > 1)
{

View file

@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
return buf;
buf++;
while (*buf && pg_mblen(buf) == 1)
while (*buf && pg_mblen_cstr(buf) == 1)
{
switch (*buf)
{
@ -197,7 +197,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
continue;
}
if (!t_isdigit(ptr))
if (!t_isdigit_cstr(ptr))
return false;
errno = 0;
@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate)
return false;
/* it shouldn't be a part of any word */
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr))
return false;
for (;;)
{
ptr += pg_mblen(ptr);
ptr += pg_mblen_cstr(ptr);
if (*ptr == '\0') /* got end of string without operand */
return false;
@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate)
* So we still treat OR literal as operation with possibly incorrect
* operand and will not search it as lexeme
*/
if (!t_isspace(ptr))
if (!t_isspace_cstr(ptr))
break;
}
@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
/* generic syntax error message is fine */
return PT_ERR;
}
else if (!t_isspace(state->buf))
else if (!t_isspace_cstr(state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
@ -383,14 +383,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
{
return (state->count) ? PT_ERR : PT_END;
}
else if (!t_isspace(state->buf))
else if (!t_isspace_cstr(state->buf))
{
return PT_ERR;
}
break;
}
state->buf += pg_mblen(state->buf);
state->buf += pg_mblen_cstr(state->buf);
}
}
@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
state->state = WAITOPERAND;
continue;
}
else if (!t_isspace(state->buf))
else if (!t_isspace_cstr(state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
state->buf++;
continue;
}
else if (!t_isspace(state->buf))
else if (!t_isspace_cstr(state->buf))
{
/* insert implicit AND between operands */
state->state = WAITOPERAND;
@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
break;
}
state->buf += pg_mblen(state->buf);
state->buf += pg_mblen_cstr(state->buf);
}
}
@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
*(in->cur) = '\\';
in->cur++;
}
COPYCHAR(in->cur, op);
clen = pg_mblen(op);
clen = ts_copychar_cstr(in->cur, op);
op += clen;
in->cur += clen;
}

View file

@ -319,9 +319,9 @@ tsvectorout(PG_FUNCTION_ARGS)
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
char *curbegin,
*curin,
char *curin,
*curout;
const char *curend;
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
@ -334,13 +334,14 @@ tsvectorout(PG_FUNCTION_ARGS)
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curbegin = curin = STRPTR(out) + ptr->pos;
curin = STRPTR(out) + ptr->pos;
curend = curin + ptr->len;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
while (curin - curbegin < ptr->len)
while (curin < curend)
{
int len = pg_mblen(curin);
int len = pg_mblen_range(curin, curend);
if (t_iseq(curin, '\''))
*curout++ = '\'';

View file

@ -2604,11 +2604,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
if (ws)
{
char *buf;
const char *end;
buf = VARDATA_ANY(ws);
while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
end = buf + VARSIZE_ANY_EXHDR(ws);
while (buf < end)
{
if (pg_mblen(buf) == 1)
int len = pg_mblen_range(buf, end);
if (len == 1)
{
switch (*buf)
{
@ -2632,7 +2636,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
stat->weight |= 0;
}
}
buf += pg_mblen(buf);
buf += len;
}
}

View file

@ -206,10 +206,9 @@ gettoken_tsvector(TSVectorParseState state,
else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR;
else if (!t_isspace(state->prsbuf))
else if (!t_isspace_cstr(state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
curpos += ts_copychar_cstr(curpos, state->prsbuf);
statecode = WAITENDWORD;
}
}
@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
curpos += ts_copychar_cstr(curpos, state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
@ -236,7 +234,7 @@ gettoken_tsvector(TSVectorParseState state,
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
{
@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
curpos += ts_copychar_cstr(curpos, state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
curpos += ts_copychar_cstr(curpos, state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state,
if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
curpos += ts_copychar_cstr(curpos, state->prsbuf);
statecode = WAITENDCMPLX;
}
else
@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state,
PRSSYNTAXERROR;
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
/* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
RETURN_TOKEN;
}
else
@ -317,7 +312,7 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
if (t_isdigit_cstr(state->prsbuf))
{
if (posalen == 0)
{
@ -372,10 +367,10 @@ gettoken_tsvector(TSVectorParseState state,
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 0);
}
else if (t_isspace(state->prsbuf) ||
else if (t_isspace_cstr(state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
else if (!t_isdigit(state->prsbuf))
else if (!t_isdigit_cstr(state->prsbuf))
PRSSYNTAXERROR;
}
else /* internal error */
@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state,
statecode);
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
state->prsbuf += pg_mblen_cstr(state->prsbuf);
}
}

View file

@ -232,7 +232,7 @@ bit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid binary digit",
pg_mblen(sp), sp)));
pg_mblen_cstr(sp), sp)));
x >>= 1;
if (x == 0)
@ -257,7 +257,7 @@ bit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid hexadecimal digit",
pg_mblen(sp), sp)));
pg_mblen_cstr(sp), sp)));
if (bc)
{
@ -533,7 +533,7 @@ varbit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid binary digit",
pg_mblen(sp), sp)));
pg_mblen_cstr(sp), sp)));
x >>= 1;
if (x == 0)
@ -558,7 +558,7 @@ varbit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid hexadecimal digit",
pg_mblen(sp), sp)));
pg_mblen_cstr(sp), sp)));
if (bc)
{

View file

@ -799,8 +799,11 @@ text_catenate(text *t1, text *t2)
* charlen_to_bytelen()
* Compute the number of bytes occupied by n characters starting at *p
*
* It is caller's responsibility that there actually are n characters;
* the string need not be null-terminated.
* The caller shall ensure there are n complete characters. Callers achieve
* this by deriving "n" from regmatch_t findings from searching a wchar array.
* pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
* matches will end no later than the last complete character. (The string
* need not be null-terminated.)
*/
static int
charlen_to_bytelen(const char *p, int n)
@ -815,7 +818,7 @@ charlen_to_bytelen(const char *p, int n)
const char *s;
for (s = p; n > 0; n--)
s += pg_mblen(s);
s += pg_mblen_unbounded(s); /* caller verified encoding */
return s - p;
}
@ -949,6 +952,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
int32 slice_start;
int32 slice_size;
int32 slice_strlen;
int32 slice_len;
text *slice;
int32 E1;
int32 i;
@ -1018,7 +1022,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
slice = (text *) DatumGetPointer(str);
/* see if we got back an empty string */
if (VARSIZE_ANY_EXHDR(slice) == 0)
slice_len = VARSIZE_ANY_EXHDR(slice);
if (slice_len == 0)
{
if (slice != (text *) DatumGetPointer(str))
pfree(slice);
@ -1027,7 +1032,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
/* Now we can get the actual length of the slice in MB characters */
slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
VARSIZE_ANY_EXHDR(slice));
slice_len);
/*
* Check that the start position wasn't > slice_strlen. If so, SQL99
@ -1054,7 +1059,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
*/
p = VARDATA_ANY(slice);
for (i = 0; i < S1 - 1; i++)
p += pg_mblen(p);
p += pg_mblen_unbounded(p);
/* hang onto a pointer to our start position */
s = p;
@ -1064,7 +1069,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
* length.
*/
for (i = S1; i < E1; i++)
p += pg_mblen(p);
p += pg_mblen_unbounded(p);
ret = (text *) palloc(VARHDRSZ + (p - s));
SET_VARSIZE(ret, VARHDRSZ + (p - s));
@ -1362,6 +1367,8 @@ retry:
*/
if (state->is_multibyte_char_in_char)
{
const char *haystack_end = state->str1 + state->len1;
/* Walk one character at a time, until we reach the match. */
/* the search should never move backwards. */
@ -1370,7 +1377,7 @@ retry:
while (state->refpoint < matchptr)
{
/* step to next character. */
state->refpoint += pg_mblen(state->refpoint);
state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
state->refpos++;
/*
@ -4685,6 +4692,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
}
else
{
const char *end_ptr;
/*
* When fldsep is NULL, each character in the input string becomes a
* separate element in the result set. The separator is effectively
@ -4693,10 +4702,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
start_ptr = VARDATA_ANY(inputstring);
end_ptr = start_ptr + inputstring_len;
while (inputstring_len > 0)
{
int chunk_len = pg_mblen(start_ptr);
int chunk_len = pg_mblen_range(start_ptr, end_ptr);
CHECK_FOR_INTERRUPTS();
@ -5600,7 +5610,7 @@ text_reverse(PG_FUNCTION_ARGS)
{
int sz;
sz = pg_mblen(p);
sz = pg_mblen_range(p, endp);
dst -= sz;
memcpy(dst, p, sz);
p += sz;
@ -5761,7 +5771,7 @@ text_format(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%.*s\"",
pg_mblen(cp), cp),
pg_mblen_range(cp, end_ptr), cp),
errhint("For a single \"%%\" use \"%%%%\".")));
/* If indirect width was specified, get its value */
@ -5882,7 +5892,7 @@ text_format(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%.*s\"",
pg_mblen(cp), cp),
pg_mblen_range(cp, end_ptr), cp),
errhint("For a single \"%%\" use \"%%%%\".")));
break;
}

View file

@ -2338,8 +2338,7 @@ sqlchar_to_unicode(const char *s)
char *utf8string;
pg_wchar ret[2]; /* need space for trailing zero */
/* note we're not assuming s is null-terminated */
utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
pg_encoding_mblen(PG_UTF8, utf8string));
@ -2392,7 +2391,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
initStringInfo(&buf);
for (p = ident; *p; p += pg_mblen(p))
for (p = ident; *p; p += pg_mblen_cstr(p))
{
if (*p == ':' && (p == ident || fully_escaped))
appendStringInfoString(&buf, "_x003A_");
@ -2417,7 +2416,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
: !is_valid_xml_namechar(u))
appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
else
appendBinaryStringInfo(&buf, p, pg_mblen(p));
appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
}
}
@ -2440,7 +2439,7 @@ map_xml_name_to_sql_identifier(const char *name)
initStringInfo(&buf);
for (p = name; *p; p += pg_mblen(p))
for (p = name; *p; p += pg_mblen_cstr(p))
{
if (*p == '_' && *(p + 1) == 'x'
&& isxdigit((unsigned char) *(p + 2))
@ -2458,7 +2457,7 @@ map_xml_name_to_sql_identifier(const char *name)
p += 6;
}
else
appendBinaryStringInfo(&buf, p, pg_mblen(p));
appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
}
return buf.data;

View file

@ -38,6 +38,7 @@
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "varatt.h"
@ -97,6 +98,13 @@ static char *perform_default_encoding_conversion(const char *src,
int len, bool is_client_to_server);
static int cliplen(const char *str, int len, int limit);
pg_attribute_noreturn()
static void report_invalid_encoding_int(int encoding, const char *mbstr,
int mblen, int len);
pg_attribute_noreturn()
static void report_invalid_encoding_db(const char *mbstr, int mblen, int len);
/*
* Prepare for a future call to SetClientEncoding. Success should mean
@ -1019,11 +1027,126 @@ pg_encoding_wchar2mb_with_len(int encoding,
return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
}
/* returns the byte length of a multibyte character */
/*
* Returns the byte length of a multibyte character sequence in a
* null-terminated string. Raises an illegal byte sequence error if the
* sequence would hit a null terminator.
*
* The caller is expected to have checked for a terminator at *mbstr == 0
* before calling, but some callers want 1 in that case, so this function
* continues that tradition.
*
* This must only be used for strings that have a null-terminator to enable
* bounds detection.
*/
int
pg_mblen_cstr(const char *mbstr)
{
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
/*
* The .mblen functions return 1 when given a pointer to a terminator.
* Some callers depend on that, so we tolerate it for now. Well-behaved
* callers check the leading byte for a terminator *before* calling.
*/
for (int i = 1; i < length; ++i)
if (unlikely(mbstr[i] == 0))
report_invalid_encoding_db(mbstr, length, i);
/*
* String should be NUL-terminated, but checking that would make typical
* callers O(N^2), tripling Valgrind check-world time. Unless
* VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we
* found a character, not a terminator, the next byte must be a terminator
* or the start of the next character.) If the caller iterates the whole
* string, the last call will diagnose a missing terminator.
*/
if (mbstr[0] != '\0')
{
#ifdef VALGRIND_EXPENSIVE
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr));
#else
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1);
#endif
}
return length;
}
/*
* Returns the byte length of a multibyte character sequence bounded by a range
* [mbstr, end) of at least one byte in size. Raises an illegal byte sequence
* error if the sequence would exceed the range.
*/
int
pg_mblen_range(const char *mbstr, const char *end)
{
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
Assert(end > mbstr);
#ifdef VALGRIND_EXPENSIVE
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr);
#else
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
#endif
if (unlikely(mbstr + length > end))
report_invalid_encoding_db(mbstr, length, end - mbstr);
return length;
}
/*
* Returns the byte length of a multibyte character sequence bounded by a range
* extending for 'limit' bytes, which must be at least one. Raises an illegal
* byte sequence error if the sequence would exceed the range.
*/
int
pg_mblen_with_len(const char *mbstr, int limit)
{
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
Assert(limit >= 1);
#ifdef VALGRIND_EXPENSIVE
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit);
#else
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
#endif
if (unlikely(length > limit))
report_invalid_encoding_db(mbstr, length, limit);
return length;
}
/*
* Returns the length of a multibyte character sequence, without any
* validation of bounds.
*
* PLEASE NOTE: This function can only be used safely if the caller has
* already verified the input string, since otherwise there is a risk of
* overrunning the buffer if the string is invalid. A prior call to a
* pg_mbstrlen* function suffices.
*/
int
pg_mblen_unbounded(const char *mbstr)
{
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
return length;
}
/*
* Historical name for pg_mblen_unbounded(). Should not be used and will be
* removed in a later version.
*/
int
pg_mblen(const char *mbstr)
{
return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
return pg_mblen_unbounded(mbstr);
}
/* returns the display length of a multibyte character */
@ -1045,14 +1168,14 @@ pg_mbstrlen(const char *mbstr)
while (*mbstr)
{
mbstr += pg_mblen(mbstr);
mbstr += pg_mblen_cstr(mbstr);
len++;
}
return len;
}
/* returns the length (counted in wchars) of a multibyte string
* (not necessarily NULL terminated)
* (stops at the first of "limit" or a NUL)
*/
int
pg_mbstrlen_with_len(const char *mbstr, int limit)
@ -1065,7 +1188,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
while (limit > 0 && *mbstr)
{
int l = pg_mblen(mbstr);
int l = pg_mblen_with_len(mbstr, limit);
limit -= l;
mbstr += l;
@ -1135,7 +1258,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
while (len > 0 && *mbstr)
{
l = pg_mblen(mbstr);
l = pg_mblen_with_len(mbstr, len);
nch++;
if (nch > limit)
break;
@ -1699,12 +1822,19 @@ void
report_invalid_encoding(int encoding, const char *mbstr, int len)
{
int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
report_invalid_encoding_int(encoding, mbstr, l, len);
}
static void
report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len)
{
char buf[8 * 5 + 1];
char *p = buf;
int j,
jlimit;
jlimit = Min(l, len);
jlimit = Min(mblen, len);
jlimit = Min(jlimit, 8); /* prevent buffer overrun */
for (j = 0; j < jlimit; j++)
@ -1721,6 +1851,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
buf)));
}
static void
report_invalid_encoding_db(const char *mbstr, int mblen, int len)
{
report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len);
}
/*
* report_untranslatable_char: complain about untranslatable character
*

View file

@ -697,7 +697,14 @@ extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
extern size_t pg_wchar_strlen(const pg_wchar *str);
extern int pg_mblen_cstr(const char *mbstr);
extern int pg_mblen_range(const char *mbstr, const char *end);
extern int pg_mblen_with_len(const char *mbstr, int limit);
extern int pg_mblen_unbounded(const char *mbstr);
/* deprecated */
extern int pg_mblen(const char *mbstr);
extern int pg_dsplen(const char *mbstr);
extern int pg_mbstrlen(const char *mbstr);
extern int pg_mbstrlen_with_len(const char *mbstr, int limit);

View file

@ -37,13 +37,37 @@ typedef struct
/* The second argument of t_iseq() must be a plain ASCII character */
#define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c))
#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
/* Copy multibyte character of known byte length, return byte length. */
static inline int
ts_copychar_with_len(void *dest, const void *src, int length)
{
memcpy(dest, src, length);
return length;
}
extern int t_isdigit(const char *ptr);
extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
extern int t_isalnum(const char *ptr);
extern int t_isprint(const char *ptr);
/* Copy multibyte character from null-terminated string, return byte length. */
static inline int
ts_copychar_cstr(void *dest, const void *src)
{
return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src));
}
/* Historical macro for the above. */
#define COPYCHAR ts_copychar_cstr
#define GENERATE_T_ISCLASS_DECL(character_class) \
extern int t_is##character_class##_with_len(const char *ptr, int len); \
extern int t_is##character_class##_cstr(const char *ptr); \
extern int t_is##character_class##_unbounded(const char *ptr); \
\
/* deprecated */ \
extern int t_is##character_class(const char *ptr);
GENERATE_T_ISCLASS_DECL(alnum);
GENERATE_T_ISCLASS_DECL(alpha);
GENERATE_T_ISCLASS_DECL(digit);
GENERATE_T_ISCLASS_DECL(print);
GENERATE_T_ISCLASS_DECL(space);
extern char *lowerstr(const char *str);
extern char *lowerstr_with_len(const char *str, int len);

View file

@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state,
extern void close_tsvector_parser(TSVectorParseState state);
/* phrase operator begins with '<' */
#define ISOPERATOR(x) \
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
#define ISOPERATOR(x) (*(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<')
/* parse_tsquery */

View file

@ -414,7 +414,8 @@ parse_test_flags(test_re_flags *flags, text *opts)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression test option: \"%.*s\"",
pg_mblen(opt_p + i), opt_p + i)));
pg_mblen_range(opt_p + i, opt_p + opt_len),
opt_p + i)));
break;
}
}