mirror of
https://github.com/postgres/postgres.git
synced 2026-02-15 16:48:17 -05:00
Replace pg_mblen() with bounds-checked versions.
A corrupted string could cause code that iterates with pg_mblen() to overrun its buffer. Fix, by converting all callers to one of the following: 1. Callers with a null-terminated string now use pg_mblen_cstr(), which raises an "illegal byte sequence" error if it finds a terminator in the middle of the sequence. 2. Callers with a length or end pointer now use either pg_mblen_with_len() or pg_mblen_range(), for the same effect, depending on which of the two seems more convenient at each site. 3. A small number of cases pre-validate a string, and can use pg_mblen_unbounded(). The traditional pg_mblen() function and COPYCHAR macro still exist for backward compatibility, but are no longer used by core code and are hereby deprecated. The same applies to the t_isXXX() functions. Security: CVE-2026-2006 Backpatch-through: 14 Co-authored-by: Thomas Munro <thomas.munro@gmail.com> Co-authored-by: Noah Misch <noah@leadboat.com> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reported-by: Paul Gerste (as part of zeroday.cloud) Reported-by: Moritz Sanft (as part of zeroday.cloud)
This commit is contained in:
parent
7a522039f7
commit
319e8a6441
41 changed files with 532 additions and 359 deletions
|
|
@ -116,36 +116,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
|
|||
|
||||
/*
|
||||
* returns the common prefix length of a node key
|
||||
*
|
||||
* If the underlying type is character data, the prefix length may point in
|
||||
* the middle of a multibyte character.
|
||||
*/
|
||||
static int32
|
||||
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
|
||||
{
|
||||
GBT_VARKEY_R r = gbt_var_key_readable(node);
|
||||
int32 i = 0;
|
||||
int32 l = 0;
|
||||
int32 l_left_to_match = 0;
|
||||
int32 l_total = 0;
|
||||
int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
|
||||
int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
|
||||
int32 ml = Min(t1len, t2len);
|
||||
char *p1 = VARDATA(r.lower);
|
||||
char *p2 = VARDATA(r.upper);
|
||||
const char *end1 = p1 + t1len;
|
||||
const char *end2 = p2 + t2len;
|
||||
|
||||
if (ml == 0)
|
||||
return 0;
|
||||
|
||||
while (i < ml)
|
||||
{
|
||||
if (tinfo->eml > 1 && l == 0)
|
||||
if (tinfo->eml > 1 && l_left_to_match == 0)
|
||||
{
|
||||
if ((l = pg_mblen(p1)) != pg_mblen(p2))
|
||||
l_total = pg_mblen_range(p1, end1);
|
||||
if (l_total != pg_mblen_range(p2, end2))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
l_left_to_match = l_total;
|
||||
}
|
||||
if (*p1 != *p2)
|
||||
{
|
||||
if (tinfo->eml > 1)
|
||||
{
|
||||
return (i - l + 1);
|
||||
int32 l_matched_subset = l_total - l_left_to_match;
|
||||
|
||||
/* end common prefix at final byte of last matching char */
|
||||
return i - l_matched_subset;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -155,7 +166,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
|
|||
|
||||
p1++;
|
||||
p2++;
|
||||
l--;
|
||||
l_left_to_match--;
|
||||
i++;
|
||||
}
|
||||
return ml; /* lower == upper */
|
||||
|
|
|
|||
|
|
@ -48,15 +48,15 @@ find_word(char *in, char **end)
|
|||
char *start;
|
||||
|
||||
*end = NULL;
|
||||
while (*in && t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
while (*in && t_isspace_cstr(in))
|
||||
in += pg_mblen_cstr(in);
|
||||
|
||||
if (!*in || *in == '#')
|
||||
return NULL;
|
||||
start = in;
|
||||
|
||||
while (*in && !t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
while (*in && !t_isspace_cstr(in))
|
||||
in += pg_mblen_cstr(in);
|
||||
|
||||
*end = in;
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ prssyntaxerror(HSParser *state)
|
|||
errsave(state->escontext,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in hstore, near \"%.*s\" at position %d",
|
||||
pg_mblen(state->ptr), state->ptr,
|
||||
pg_mblen_cstr(state->ptr), state->ptr,
|
||||
(int) (state->ptr - state->begin))));
|
||||
/* In soft error situation, return false as convenience for caller */
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -27,14 +27,14 @@ getlexeme(char *start, char *end, int *len)
|
|||
char *ptr;
|
||||
|
||||
while (start < end && t_iseq(start, '_'))
|
||||
start += pg_mblen(start);
|
||||
start += pg_mblen_range(start, end);
|
||||
|
||||
ptr = start;
|
||||
if (ptr >= end)
|
||||
return NULL;
|
||||
|
||||
while (ptr < end && !t_iseq(ptr, '_'))
|
||||
ptr += pg_mblen(ptr);
|
||||
ptr += pg_mblen_range(ptr, end);
|
||||
|
||||
*len = ptr - start;
|
||||
return start;
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ typedef struct
|
|||
#define LQUERY_HASNOT 0x01
|
||||
|
||||
/* valid label chars are alphanumerics, underscores and hyphens */
|
||||
#define ISLABEL(x) ( t_isalnum(x) || t_iseq(x, '_') || t_iseq(x, '-') )
|
||||
#define ISLABEL(x) ( t_isalnum_cstr(x) || t_iseq(x, '_') || t_iseq(x, '-') )
|
||||
|
||||
/* full text query */
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ parse_ltree(const char *buf, struct Node *escontext)
|
|||
ptr = buf;
|
||||
while (*ptr)
|
||||
{
|
||||
charlen = pg_mblen(ptr);
|
||||
charlen = pg_mblen_cstr(ptr);
|
||||
if (t_iseq(ptr, '.'))
|
||||
num++;
|
||||
ptr += charlen;
|
||||
|
|
@ -70,7 +70,7 @@ parse_ltree(const char *buf, struct Node *escontext)
|
|||
ptr = buf;
|
||||
while (*ptr)
|
||||
{
|
||||
charlen = pg_mblen(ptr);
|
||||
charlen = pg_mblen_cstr(ptr);
|
||||
|
||||
switch (state)
|
||||
{
|
||||
|
|
@ -292,7 +292,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
ptr = buf;
|
||||
while (*ptr)
|
||||
{
|
||||
charlen = pg_mblen(ptr);
|
||||
charlen = pg_mblen_cstr(ptr);
|
||||
|
||||
if (t_iseq(ptr, '.'))
|
||||
num++;
|
||||
|
|
@ -312,7 +312,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
ptr = buf;
|
||||
while (*ptr)
|
||||
{
|
||||
charlen = pg_mblen(ptr);
|
||||
charlen = pg_mblen_cstr(ptr);
|
||||
|
||||
switch (state)
|
||||
{
|
||||
|
|
@ -412,7 +412,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
case LQPRS_WAITFNUM:
|
||||
if (t_iseq(ptr, ','))
|
||||
state = LQPRS_WAITSNUM;
|
||||
else if (t_isdigit(ptr))
|
||||
else if (t_isdigit_cstr(ptr))
|
||||
{
|
||||
int low = atoi(ptr);
|
||||
|
||||
|
|
@ -430,7 +430,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
UNCHAR;
|
||||
break;
|
||||
case LQPRS_WAITSNUM:
|
||||
if (t_isdigit(ptr))
|
||||
if (t_isdigit_cstr(ptr))
|
||||
{
|
||||
int high = atoi(ptr);
|
||||
|
||||
|
|
@ -461,7 +461,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
case LQPRS_WAITCLOSE:
|
||||
if (t_iseq(ptr, '}'))
|
||||
state = LQPRS_WAITEND;
|
||||
else if (!t_isdigit(ptr))
|
||||
else if (!t_isdigit_cstr(ptr))
|
||||
UNCHAR;
|
||||
break;
|
||||
case LQPRS_WAITND:
|
||||
|
|
@ -472,7 +472,7 @@ parse_lquery(const char *buf, struct Node *escontext)
|
|||
}
|
||||
else if (t_iseq(ptr, ','))
|
||||
state = LQPRS_WAITSNUM;
|
||||
else if (!t_isdigit(ptr))
|
||||
else if (!t_isdigit_cstr(ptr))
|
||||
UNCHAR;
|
||||
break;
|
||||
case LQPRS_WAITEND:
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
|
|||
|
||||
for (;;)
|
||||
{
|
||||
charlen = pg_mblen(state->buf);
|
||||
charlen = pg_mblen_cstr(state->buf);
|
||||
|
||||
switch (state->state)
|
||||
{
|
||||
|
|
@ -88,7 +88,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
|
|||
*lenval = charlen;
|
||||
*flag = 0;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
else if (!t_isspace_cstr(state->buf))
|
||||
ereturn(state->escontext, ERR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("operand syntax error")));
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ text_to_bits(char *str, int len)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("invalid character \"%.*s\" in t_bits string",
|
||||
pg_mblen(str + off), str + off)));
|
||||
pg_mblen_cstr(str + off), str + off)));
|
||||
|
||||
if (off % 8 == 7)
|
||||
bits[off / 8] = byte;
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@ typedef char trgm[3];
|
|||
} while(0)
|
||||
|
||||
#ifdef KEEPONLYALNUM
|
||||
#define ISWORDCHR(c) (t_isalnum(c))
|
||||
#define ISWORDCHR(c, len) (t_isalnum_with_len(c, len))
|
||||
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
|
||||
#else
|
||||
#define ISWORDCHR(c) (!t_isspace(c))
|
||||
#define ISWORDCHR(c, len) (!t_isspace_with_len(c, len))
|
||||
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
|
||||
#endif
|
||||
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
|
||||
|
|
|
|||
|
|
@ -174,18 +174,29 @@ static char *
|
|||
find_word(char *str, int lenstr, char **endword, int *charlen)
|
||||
{
|
||||
char *beginword = str;
|
||||
const char *endstr = str + lenstr;
|
||||
|
||||
while (beginword - str < lenstr && !ISWORDCHR(beginword))
|
||||
beginword += pg_mblen(beginword);
|
||||
while (beginword < endstr)
|
||||
{
|
||||
int clen = pg_mblen_range(beginword, endstr);
|
||||
|
||||
if (beginword - str >= lenstr)
|
||||
if (ISWORDCHR(beginword, clen))
|
||||
break;
|
||||
beginword += clen;
|
||||
}
|
||||
|
||||
if (beginword >= endstr)
|
||||
return NULL;
|
||||
|
||||
*endword = beginword;
|
||||
*charlen = 0;
|
||||
while (*endword - str < lenstr && ISWORDCHR(*endword))
|
||||
while (*endword < endstr)
|
||||
{
|
||||
*endword += pg_mblen(*endword);
|
||||
int clen = pg_mblen_range(*endword, endstr);
|
||||
|
||||
if (!ISWORDCHR(*endword, clen))
|
||||
break;
|
||||
*endword += clen;
|
||||
(*charlen)++;
|
||||
}
|
||||
|
||||
|
|
@ -233,9 +244,9 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
|
|||
if (bytelen > charlen)
|
||||
{
|
||||
/* Find multibyte character boundaries and apply compact_trigram */
|
||||
int lenfirst = pg_mblen(str),
|
||||
lenmiddle = pg_mblen(str + lenfirst),
|
||||
lenlast = pg_mblen(str + lenfirst + lenmiddle);
|
||||
int lenfirst = pg_mblen_unbounded(str),
|
||||
lenmiddle = pg_mblen_unbounded(str + lenfirst),
|
||||
lenlast = pg_mblen_unbounded(str + lenfirst + lenmiddle);
|
||||
|
||||
while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
|
||||
{
|
||||
|
|
@ -246,7 +257,7 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
|
|||
|
||||
lenfirst = lenmiddle;
|
||||
lenmiddle = lenlast;
|
||||
lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
|
||||
lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -726,6 +737,7 @@ get_wildcard_part(const char *str, int lenstr,
|
|||
{
|
||||
const char *beginword = str;
|
||||
const char *endword;
|
||||
const char *endstr = str + lenstr;
|
||||
char *s = buf;
|
||||
bool in_leading_wildcard_meta = false;
|
||||
bool in_trailing_wildcard_meta = false;
|
||||
|
|
@ -738,11 +750,13 @@ get_wildcard_part(const char *str, int lenstr,
|
|||
* from this loop to the next one, since we may exit at a word character
|
||||
* that is in_escape.
|
||||
*/
|
||||
while (beginword - str < lenstr)
|
||||
while (beginword < endstr)
|
||||
{
|
||||
clen = pg_mblen_range(beginword, endstr);
|
||||
|
||||
if (in_escape)
|
||||
{
|
||||
if (ISWORDCHR(beginword))
|
||||
if (ISWORDCHR(beginword, clen))
|
||||
break;
|
||||
in_escape = false;
|
||||
in_leading_wildcard_meta = false;
|
||||
|
|
@ -753,12 +767,12 @@ get_wildcard_part(const char *str, int lenstr,
|
|||
in_escape = true;
|
||||
else if (ISWILDCARDCHAR(beginword))
|
||||
in_leading_wildcard_meta = true;
|
||||
else if (ISWORDCHR(beginword))
|
||||
else if (ISWORDCHR(beginword, clen))
|
||||
break;
|
||||
else
|
||||
in_leading_wildcard_meta = false;
|
||||
}
|
||||
beginword += pg_mblen(beginword);
|
||||
beginword += clen;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -791,12 +805,12 @@ get_wildcard_part(const char *str, int lenstr,
|
|||
* string boundary. Strip escapes during copy.
|
||||
*/
|
||||
endword = beginword;
|
||||
while (endword - str < lenstr)
|
||||
while (endword < endstr)
|
||||
{
|
||||
clen = pg_mblen(endword);
|
||||
clen = pg_mblen_range(endword, endstr);
|
||||
if (in_escape)
|
||||
{
|
||||
if (ISWORDCHR(endword))
|
||||
if (ISWORDCHR(endword, clen))
|
||||
{
|
||||
memcpy(s, endword, clen);
|
||||
(*charlen)++;
|
||||
|
|
@ -824,7 +838,7 @@ get_wildcard_part(const char *str, int lenstr,
|
|||
in_trailing_wildcard_meta = true;
|
||||
break;
|
||||
}
|
||||
else if (ISWORDCHR(endword))
|
||||
else if (ISWORDCHR(endword, clen))
|
||||
{
|
||||
memcpy(s, endword, clen);
|
||||
(*charlen)++;
|
||||
|
|
|
|||
|
|
@ -481,7 +481,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
|
|||
static void RE_compile(regex_t *regex, text *text_re,
|
||||
int cflags, Oid collation);
|
||||
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
|
||||
static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
|
||||
static int convertPgWchar(pg_wchar c, trgm_mb_char *result);
|
||||
static void transformGraph(TrgmNFA *trgmNFA);
|
||||
static void processState(TrgmNFA *trgmNFA, TrgmState *state);
|
||||
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
|
||||
|
|
@ -806,10 +806,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
|
|||
for (j = 0; j < charsCount; j++)
|
||||
{
|
||||
trgm_mb_char c;
|
||||
int clen = convertPgWchar(chars[j], &c);
|
||||
|
||||
if (!convertPgWchar(chars[j], &c))
|
||||
if (!clen)
|
||||
continue; /* ok to ignore it altogether */
|
||||
if (ISWORDCHR(c.bytes))
|
||||
if (ISWORDCHR(c.bytes, clen))
|
||||
colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
|
||||
else
|
||||
colorInfo->containsNonWord = true;
|
||||
|
|
@ -821,13 +822,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
|
|||
|
||||
/*
|
||||
* Convert pg_wchar to multibyte format.
|
||||
* Returns false if the character should be ignored completely.
|
||||
* Returns 0 if the character should be ignored completely, else returns its
|
||||
* byte length.
|
||||
*/
|
||||
static bool
|
||||
static int
|
||||
convertPgWchar(pg_wchar c, trgm_mb_char *result)
|
||||
{
|
||||
/* "s" has enough space for a multibyte character and a trailing NUL */
|
||||
char s[MAX_MULTIBYTE_CHAR_LEN + 1];
|
||||
int clen;
|
||||
|
||||
/*
|
||||
* We can ignore the NUL character, since it can never appear in a PG text
|
||||
|
|
@ -835,11 +838,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
|
|||
* reconstructing trigrams.
|
||||
*/
|
||||
if (c == 0)
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
/* Do the conversion, making sure the result is NUL-terminated */
|
||||
memset(s, 0, sizeof(s));
|
||||
pg_wchar2mb_with_len(&c, s, 1);
|
||||
clen = pg_wchar2mb_with_len(&c, s, 1);
|
||||
|
||||
/*
|
||||
* In IGNORECASE mode, we can ignore uppercase characters. We assume that
|
||||
|
|
@ -861,7 +864,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
|
|||
if (strcmp(lowerCased, s) != 0)
|
||||
{
|
||||
pfree(lowerCased);
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
pfree(lowerCased);
|
||||
}
|
||||
|
|
@ -869,7 +872,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
|
|||
|
||||
/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
|
||||
memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
|
||||
return true;
|
||||
return clen;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -155,9 +155,9 @@ initTrie(const char *filename)
|
|||
state = 0;
|
||||
for (ptr = line; *ptr; ptr += ptrlen)
|
||||
{
|
||||
ptrlen = pg_mblen(ptr);
|
||||
ptrlen = pg_mblen_cstr(ptr);
|
||||
/* ignore whitespace, but end src or trg */
|
||||
if (t_isspace(ptr))
|
||||
if (t_isspace_cstr(ptr))
|
||||
{
|
||||
if (state == 1)
|
||||
state = 2;
|
||||
|
|
@ -381,6 +381,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
|
|||
char *srcchar = (char *) PG_GETARG_POINTER(1);
|
||||
int32 len = PG_GETARG_INT32(2);
|
||||
char *srcstart = srcchar;
|
||||
const char *srcend = srcstart + len;
|
||||
TSLexeme *res;
|
||||
StringInfoData buf;
|
||||
|
||||
|
|
@ -408,7 +409,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
|
|||
}
|
||||
else
|
||||
{
|
||||
matchlen = pg_mblen(srcchar);
|
||||
matchlen = pg_mblen_range(srcchar, srcend);
|
||||
if (buf.data != NULL)
|
||||
appendBinaryStringInfo(&buf, srcchar, matchlen);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1160,7 +1160,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal,
|
|||
if (cursorpos > 0)
|
||||
newcp++;
|
||||
}
|
||||
chlen = pg_mblen(prosrc);
|
||||
chlen = pg_mblen_cstr(prosrc);
|
||||
if (strncmp(prosrc, literal, chlen) != 0)
|
||||
goto fail;
|
||||
prosrc += chlen;
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags)
|
|||
char *lastchar;
|
||||
|
||||
/* Skip leading spaces */
|
||||
while (*in && t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
while (*in && t_isspace_cstr(in))
|
||||
in += pg_mblen_cstr(in);
|
||||
|
||||
/* Return NULL on empty lines */
|
||||
if (*in == '\0')
|
||||
|
|
@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags)
|
|||
lastchar = start = in;
|
||||
|
||||
/* Find end of word */
|
||||
while (*in && !t_isspace(in))
|
||||
while (*in && !t_isspace_cstr(in))
|
||||
{
|
||||
lastchar = in;
|
||||
in += pg_mblen(in);
|
||||
in += pg_mblen_cstr(in);
|
||||
}
|
||||
|
||||
if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
|
||||
|
|
|
|||
|
|
@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
ptr = line;
|
||||
|
||||
/* is it a comment? */
|
||||
while (*ptr && t_isspace(ptr))
|
||||
ptr += pg_mblen(ptr);
|
||||
while (*ptr && t_isspace_cstr(ptr))
|
||||
ptr += pg_mblen_cstr(ptr);
|
||||
|
||||
if (t_iseq(ptr, '#') || *ptr == '\0' ||
|
||||
t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
|
||||
|
|
@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
errmsg("unexpected delimiter")));
|
||||
state = TR_WAITSUBS;
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
else if (!t_isspace_cstr(ptr))
|
||||
{
|
||||
beginwrd = ptr;
|
||||
state = TR_INLEX;
|
||||
|
|
@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
|
||||
state = TR_WAITSUBS;
|
||||
}
|
||||
else if (t_isspace(ptr))
|
||||
else if (t_isspace_cstr(ptr))
|
||||
{
|
||||
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
|
||||
state = TR_WAITLEX;
|
||||
|
|
@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
{
|
||||
useasis = true;
|
||||
state = TR_INSUBS;
|
||||
beginwrd = ptr + pg_mblen(ptr);
|
||||
beginwrd = ptr + pg_mblen_cstr(ptr);
|
||||
}
|
||||
else if (t_iseq(ptr, '\\'))
|
||||
{
|
||||
useasis = false;
|
||||
state = TR_INSUBS;
|
||||
beginwrd = ptr + pg_mblen(ptr);
|
||||
beginwrd = ptr + pg_mblen_cstr(ptr);
|
||||
}
|
||||
else if (!t_isspace(ptr))
|
||||
else if (!t_isspace_cstr(ptr))
|
||||
{
|
||||
useasis = false;
|
||||
beginwrd = ptr;
|
||||
|
|
@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
}
|
||||
else if (state == TR_INSUBS)
|
||||
{
|
||||
if (t_isspace(ptr))
|
||||
if (t_isspace_cstr(ptr))
|
||||
{
|
||||
if (ptr == beginwrd)
|
||||
ereport(ERROR,
|
||||
|
|
@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
|
|||
else
|
||||
elog(ERROR, "unrecognized thesaurus state: %d", state);
|
||||
|
||||
ptr += pg_mblen(ptr);
|
||||
ptr += pg_mblen_cstr(ptr);
|
||||
}
|
||||
|
||||
if (state == TR_INSUBS)
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ RS_isRegis(const char *str)
|
|||
{
|
||||
if (state == RS_IN_WAIT)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
if (t_isalpha_cstr(c))
|
||||
/* okay */ ;
|
||||
else if (t_iseq(c, '['))
|
||||
state = RS_IN_ONEOF;
|
||||
|
|
@ -48,14 +48,14 @@ RS_isRegis(const char *str)
|
|||
{
|
||||
if (t_iseq(c, '^'))
|
||||
state = RS_IN_NONEOF;
|
||||
else if (t_isalpha(c))
|
||||
else if (t_isalpha_cstr(c))
|
||||
state = RS_IN_ONEOF_IN;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
if (t_isalpha_cstr(c))
|
||||
/* okay */ ;
|
||||
else if (t_iseq(c, ']'))
|
||||
state = RS_IN_WAIT;
|
||||
|
|
@ -64,7 +64,7 @@ RS_isRegis(const char *str)
|
|||
}
|
||||
else
|
||||
elog(ERROR, "internal error in RS_isRegis: state %d", state);
|
||||
c += pg_mblen(c);
|
||||
c += pg_mblen_cstr(c);
|
||||
}
|
||||
|
||||
return (state == RS_IN_WAIT);
|
||||
|
|
@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str)
|
|||
{
|
||||
if (state == RS_IN_WAIT)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
if (t_isalpha_cstr(c))
|
||||
{
|
||||
if (ptr)
|
||||
ptr = newRegisNode(ptr, len);
|
||||
else
|
||||
ptr = r->node = newRegisNode(NULL, len);
|
||||
COPYCHAR(ptr->data, c);
|
||||
ptr->type = RSF_ONEOF;
|
||||
ptr->len = pg_mblen(c);
|
||||
ptr->len = ts_copychar_cstr(ptr->data, c);
|
||||
}
|
||||
else if (t_iseq(c, '['))
|
||||
{
|
||||
|
|
@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str)
|
|||
ptr->type = RSF_NONEOF;
|
||||
state = RS_IN_NONEOF;
|
||||
}
|
||||
else if (t_isalpha(c))
|
||||
else if (t_isalpha_cstr(c))
|
||||
{
|
||||
COPYCHAR(ptr->data, c);
|
||||
ptr->len = pg_mblen(c);
|
||||
ptr->len = ts_copychar_cstr(ptr->data, c);
|
||||
state = RS_IN_ONEOF_IN;
|
||||
}
|
||||
else /* shouldn't get here */
|
||||
|
|
@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str)
|
|||
}
|
||||
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
{
|
||||
COPYCHAR(ptr->data + ptr->len, c);
|
||||
ptr->len += pg_mblen(c);
|
||||
}
|
||||
if (t_isalpha_cstr(c))
|
||||
ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c);
|
||||
else if (t_iseq(c, ']'))
|
||||
state = RS_IN_WAIT;
|
||||
else /* shouldn't get here */
|
||||
|
|
@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str)
|
|||
}
|
||||
else
|
||||
elog(ERROR, "internal error in RS_compile: state %d", state);
|
||||
c += pg_mblen(c);
|
||||
c += pg_mblen_cstr(c);
|
||||
}
|
||||
|
||||
if (state != RS_IN_WAIT) /* shouldn't get here */
|
||||
|
|
@ -187,10 +182,10 @@ mb_strchr(char *str, char *c)
|
|||
char *ptr = str;
|
||||
bool res = false;
|
||||
|
||||
clen = pg_mblen(c);
|
||||
clen = pg_mblen_cstr(c);
|
||||
while (*ptr && !res)
|
||||
{
|
||||
plen = pg_mblen(ptr);
|
||||
plen = pg_mblen_cstr(ptr);
|
||||
if (plen == clen)
|
||||
{
|
||||
i = plen;
|
||||
|
|
@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str)
|
|||
while (*c)
|
||||
{
|
||||
len++;
|
||||
c += pg_mblen(c);
|
||||
c += pg_mblen_cstr(c);
|
||||
}
|
||||
|
||||
if (len < r->nchar)
|
||||
|
|
@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str)
|
|||
{
|
||||
len -= r->nchar;
|
||||
while (len-- > 0)
|
||||
c += pg_mblen(c);
|
||||
c += pg_mblen_cstr(c);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str)
|
|||
elog(ERROR, "unrecognized regis node type: %d", ptr->type);
|
||||
}
|
||||
ptr = ptr->next;
|
||||
c += pg_mblen(c);
|
||||
c += pg_mblen_cstr(c);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ findchar(char *str, int c)
|
|||
{
|
||||
if (t_iseq(str, c))
|
||||
return str;
|
||||
str += pg_mblen(str);
|
||||
str += pg_mblen_cstr(str);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
|
@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2)
|
|||
{
|
||||
if (t_iseq(str, c1) || t_iseq(str, c2))
|
||||
return str;
|
||||
str += pg_mblen(str);
|
||||
str += pg_mblen_cstr(str);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
|
@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
|||
char *next,
|
||||
*sbuf = *sflagset;
|
||||
int maxstep;
|
||||
int clen;
|
||||
bool stop = false;
|
||||
bool met_comma = false;
|
||||
|
||||
|
|
@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
|||
{
|
||||
case FM_LONG:
|
||||
case FM_CHAR:
|
||||
COPYCHAR(sflag, *sflagset);
|
||||
sflag += pg_mblen(*sflagset);
|
||||
clen = ts_copychar_cstr(sflag, *sflagset);
|
||||
sflag += clen;
|
||||
|
||||
/* Go to start of the next flag */
|
||||
*sflagset += pg_mblen(*sflagset);
|
||||
*sflagset += clen;
|
||||
|
||||
/* Check if we get all characters of flag */
|
||||
maxstep--;
|
||||
|
|
@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
|||
*sflagset = next;
|
||||
while (**sflagset)
|
||||
{
|
||||
if (t_isdigit(*sflagset))
|
||||
if (t_isdigit_cstr(*sflagset))
|
||||
{
|
||||
if (!met_comma)
|
||||
ereport(ERROR,
|
||||
|
|
@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
|||
*sflagset)));
|
||||
met_comma = true;
|
||||
}
|
||||
else if (!t_isspace(*sflagset))
|
||||
else if (!t_isspace_cstr(*sflagset))
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
|
|
@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
|
|||
*sflagset)));
|
||||
}
|
||||
|
||||
*sflagset += pg_mblen(*sflagset);
|
||||
*sflagset += pg_mblen_cstr(*sflagset);
|
||||
}
|
||||
stop = true;
|
||||
break;
|
||||
|
|
@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
|
|||
while (*s)
|
||||
{
|
||||
/* we allow only single encoded flags for faster works */
|
||||
if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
|
||||
if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s))
|
||||
s++;
|
||||
else
|
||||
{
|
||||
|
|
@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
|
|||
s = line;
|
||||
while (*s)
|
||||
{
|
||||
if (t_isspace(s))
|
||||
if (t_isspace_cstr(s))
|
||||
{
|
||||
*s = '\0';
|
||||
break;
|
||||
}
|
||||
s += pg_mblen(s);
|
||||
s += pg_mblen_cstr(s);
|
||||
}
|
||||
pstr = lowerstr_ctx(Conf, line);
|
||||
|
||||
|
|
@ -796,17 +797,17 @@ get_nextfield(char **str, char *next)
|
|||
|
||||
while (**str)
|
||||
{
|
||||
int clen = pg_mblen_cstr(*str);
|
||||
|
||||
if (state == PAE_WAIT_MASK)
|
||||
{
|
||||
if (t_iseq(*str, '#'))
|
||||
return false;
|
||||
else if (!t_isspace(*str))
|
||||
else if (!t_isspace_cstr(*str))
|
||||
{
|
||||
int clen = pg_mblen(*str);
|
||||
|
||||
if (clen < avail)
|
||||
{
|
||||
COPYCHAR(next, *str);
|
||||
ts_copychar_with_len(next, *str, clen);
|
||||
next += clen;
|
||||
avail -= clen;
|
||||
}
|
||||
|
|
@ -815,24 +816,22 @@ get_nextfield(char **str, char *next)
|
|||
}
|
||||
else /* state == PAE_INMASK */
|
||||
{
|
||||
if (t_isspace(*str))
|
||||
if (t_isspace_cstr(*str))
|
||||
{
|
||||
*next = '\0';
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
int clen = pg_mblen(*str);
|
||||
|
||||
if (clen < avail)
|
||||
{
|
||||
COPYCHAR(next, *str);
|
||||
ts_copychar_with_len(next, *str, clen);
|
||||
next += clen;
|
||||
avail -= clen;
|
||||
}
|
||||
}
|
||||
}
|
||||
*str += pg_mblen(*str);
|
||||
*str += clen;
|
||||
}
|
||||
|
||||
*next = '\0';
|
||||
|
|
@ -922,14 +921,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
|
||||
while (*str)
|
||||
{
|
||||
int clen = pg_mblen_cstr(str);
|
||||
|
||||
if (state == PAE_WAIT_MASK)
|
||||
{
|
||||
if (t_iseq(str, '#'))
|
||||
return false;
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
{
|
||||
COPYCHAR(pmask, str);
|
||||
pmask += pg_mblen(str);
|
||||
pmask += ts_copychar_with_len(pmask, str, clen);
|
||||
state = PAE_INMASK;
|
||||
}
|
||||
}
|
||||
|
|
@ -940,10 +940,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
*pmask = '\0';
|
||||
state = PAE_WAIT_FIND;
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
{
|
||||
COPYCHAR(pmask, str);
|
||||
pmask += pg_mblen(str);
|
||||
pmask += ts_copychar_with_len(pmask, str, clen);
|
||||
}
|
||||
}
|
||||
else if (state == PAE_WAIT_FIND)
|
||||
|
|
@ -952,13 +951,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
{
|
||||
state = PAE_INFIND;
|
||||
}
|
||||
else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
|
||||
else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
prepl += ts_copychar_with_len(prepl, str, clen);
|
||||
state = PAE_INREPL;
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("syntax error")));
|
||||
|
|
@ -970,12 +968,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
*pfind = '\0';
|
||||
state = PAE_WAIT_REPL;
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
else if (t_isalpha_cstr(str))
|
||||
{
|
||||
COPYCHAR(pfind, str);
|
||||
pfind += pg_mblen(str);
|
||||
pfind += ts_copychar_with_len(pfind, str, clen);
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("syntax error")));
|
||||
|
|
@ -986,13 +983,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
{
|
||||
break; /* void repl */
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
else if (t_isalpha_cstr(str))
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
prepl += ts_copychar_with_len(prepl, str, clen);
|
||||
state = PAE_INREPL;
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("syntax error")));
|
||||
|
|
@ -1004,12 +1000,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
*prepl = '\0';
|
||||
break;
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
else if (t_isalpha_cstr(str))
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
prepl += ts_copychar_with_len(prepl, str, clen);
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
else if (!t_isspace_cstr(str))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("syntax error")));
|
||||
|
|
@ -1017,7 +1012,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
|
|||
else
|
||||
elog(ERROR, "unrecognized state in parse_affentry: %d", state);
|
||||
|
||||
str += pg_mblen(str);
|
||||
str += clen;
|
||||
}
|
||||
|
||||
*pmask = *pfind = *prepl = '\0';
|
||||
|
|
@ -1070,10 +1065,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
|
|||
CompoundAffixFlag *newValue;
|
||||
char sbuf[BUFSIZ];
|
||||
char *sflag;
|
||||
int clen;
|
||||
|
||||
while (*s && t_isspace(s))
|
||||
s += pg_mblen(s);
|
||||
while (*s && t_isspace_cstr(s))
|
||||
s += pg_mblen_cstr(s);
|
||||
|
||||
if (!*s)
|
||||
ereport(ERROR,
|
||||
|
|
@ -1082,10 +1076,10 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
|
|||
|
||||
/* Get flag without \n */
|
||||
sflag = sbuf;
|
||||
while (*s && !t_isspace(s) && *s != '\n')
|
||||
while (*s && !t_isspace_cstr(s) && *s != '\n')
|
||||
{
|
||||
clen = pg_mblen(s);
|
||||
COPYCHAR(sflag, s);
|
||||
int clen = ts_copychar_cstr(sflag, s);
|
||||
|
||||
sflag += clen;
|
||||
s += clen;
|
||||
}
|
||||
|
|
@ -1228,7 +1222,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
|
|||
|
||||
while ((recoded = tsearch_readline(&trst)) != NULL)
|
||||
{
|
||||
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
|
||||
if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
|
||||
{
|
||||
pfree(recoded);
|
||||
continue;
|
||||
|
|
@ -1265,8 +1259,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
|
|||
{
|
||||
char *s = recoded + strlen("FLAG");
|
||||
|
||||
while (*s && t_isspace(s))
|
||||
s += pg_mblen(s);
|
||||
while (*s && t_isspace_cstr(s))
|
||||
s += pg_mblen_cstr(s);
|
||||
|
||||
if (*s)
|
||||
{
|
||||
|
|
@ -1301,7 +1295,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
|
|||
{
|
||||
int fields_read;
|
||||
|
||||
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
|
||||
if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
|
||||
goto nextline;
|
||||
|
||||
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
|
||||
|
|
@ -1464,12 +1458,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
|
|||
s = findchar2(recoded, 'l', 'L');
|
||||
if (s)
|
||||
{
|
||||
while (*s && !t_isspace(s))
|
||||
s += pg_mblen(s);
|
||||
while (*s && t_isspace(s))
|
||||
s += pg_mblen(s);
|
||||
while (*s && !t_isspace_cstr(s))
|
||||
s += pg_mblen_cstr(s);
|
||||
while (*s && t_isspace_cstr(s))
|
||||
s += pg_mblen_cstr(s);
|
||||
|
||||
if (*s && pg_mblen(s) == 1)
|
||||
if (*s && pg_mblen_cstr(s) == 1)
|
||||
{
|
||||
addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
|
||||
Conf->usecompound = true;
|
||||
|
|
@ -1497,8 +1491,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
|
|||
s = recoded + 4; /* we need non-lowercased string */
|
||||
flagflags = 0;
|
||||
|
||||
while (*s && t_isspace(s))
|
||||
s += pg_mblen(s);
|
||||
while (*s && t_isspace_cstr(s))
|
||||
s += pg_mblen_cstr(s);
|
||||
|
||||
if (*s == '*')
|
||||
{
|
||||
|
|
@ -1519,14 +1513,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
|
|||
* be followed by EOL, whitespace, or ':'. Otherwise this is a
|
||||
* new-format flag command.
|
||||
*/
|
||||
if (*s && pg_mblen(s) == 1)
|
||||
if (*s && pg_mblen_cstr(s) == 1)
|
||||
{
|
||||
COPYCHAR(flag, s);
|
||||
flag[0] = *s++;
|
||||
flag[1] = '\0';
|
||||
|
||||
s++;
|
||||
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
|
||||
t_isspace(s))
|
||||
t_isspace_cstr(s))
|
||||
{
|
||||
oldformat = true;
|
||||
goto nextline;
|
||||
|
|
@ -1750,7 +1743,7 @@ NISortDictionary(IspellDict *Conf)
|
|||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("invalid affix alias \"%s\"",
|
||||
Conf->Spell[i]->p.flag)));
|
||||
if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
|
||||
if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("invalid affix alias \"%s\"",
|
||||
|
|
|
|||
|
|
@ -31,81 +31,44 @@ static void tsearch_readline_callback(void *arg);
|
|||
*/
|
||||
#define WC_BUF_LEN 3
|
||||
|
||||
int
|
||||
t_isdigit(const char *ptr)
|
||||
{
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[WC_BUF_LEN];
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || database_ctype_is_c)
|
||||
return isdigit(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
|
||||
|
||||
return iswdigit((wint_t) character[0]);
|
||||
}
|
||||
|
||||
int
|
||||
t_isspace(const char *ptr)
|
||||
{
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[WC_BUF_LEN];
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || database_ctype_is_c)
|
||||
return isspace(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
|
||||
|
||||
return iswspace((wint_t) character[0]);
|
||||
}
|
||||
|
||||
int
|
||||
t_isalpha(const char *ptr)
|
||||
{
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[WC_BUF_LEN];
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || database_ctype_is_c)
|
||||
return isalpha(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
|
||||
|
||||
return iswalpha((wint_t) character[0]);
|
||||
}
|
||||
|
||||
int
|
||||
t_isalnum(const char *ptr)
|
||||
{
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[WC_BUF_LEN];
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || database_ctype_is_c)
|
||||
return isalnum(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
|
||||
|
||||
return iswalnum((wint_t) character[0]);
|
||||
}
|
||||
|
||||
int
|
||||
t_isprint(const char *ptr)
|
||||
{
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[WC_BUF_LEN];
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || database_ctype_is_c)
|
||||
return isprint(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
|
||||
|
||||
return iswprint((wint_t) character[0]);
|
||||
#define GENERATE_T_ISCLASS_DEF(character_class) \
|
||||
/* mblen shall be that of the first character */ \
|
||||
int \
|
||||
t_is##character_class##_with_len(const char *ptr, int mblen) \
|
||||
{ \
|
||||
int clen = pg_mblen_with_len(ptr, mblen); \
|
||||
wchar_t character[WC_BUF_LEN]; \
|
||||
pg_locale_t mylocale = 0; /* TODO */ \
|
||||
if (clen == 1 || database_ctype_is_c) \
|
||||
return is##character_class(TOUCHAR(ptr)); \
|
||||
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \
|
||||
return isw##character_class((wint_t) character[0]); \
|
||||
} \
|
||||
\
|
||||
/* ptr shall point to a NUL-terminated string */ \
|
||||
int \
|
||||
t_is##character_class##_cstr(const char *ptr) \
|
||||
{ \
|
||||
return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
|
||||
} \
|
||||
/* ptr shall point to a string with pre-validated encoding */ \
|
||||
int \
|
||||
t_is##character_class##_unbounded(const char *ptr) \
|
||||
{ \
|
||||
return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
|
||||
} \
|
||||
/* historical name for _unbounded */ \
|
||||
int \
|
||||
t_is##character_class(const char *ptr) \
|
||||
{ \
|
||||
return t_is##character_class##_unbounded(ptr); \
|
||||
}
|
||||
|
||||
GENERATE_T_ISCLASS_DEF(alnum)
|
||||
GENERATE_T_ISCLASS_DEF(alpha)
|
||||
GENERATE_T_ISCLASS_DEF(digit)
|
||||
GENERATE_T_ISCLASS_DEF(print)
|
||||
GENERATE_T_ISCLASS_DEF(space)
|
||||
|
||||
/*
|
||||
* Set up to read a file using tsearch_readline(). This facility is
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
|
|||
char *pbuf = line;
|
||||
|
||||
/* Trim trailing space */
|
||||
while (*pbuf && !t_isspace(pbuf))
|
||||
pbuf += pg_mblen(pbuf);
|
||||
while (*pbuf && !t_isspace_cstr(pbuf))
|
||||
pbuf += pg_mblen_cstr(pbuf);
|
||||
*pbuf = '\0';
|
||||
|
||||
/* Skip empty lines */
|
||||
|
|
|
|||
|
|
@ -1728,7 +1728,8 @@ TParserGet(TParser *prs)
|
|||
prs->state->charlen = 0;
|
||||
else
|
||||
prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
|
||||
pg_mblen(prs->str + prs->state->posbyte);
|
||||
pg_mblen_range(prs->str + prs->state->posbyte,
|
||||
prs->str + prs->lenstr);
|
||||
|
||||
Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
|
||||
Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
|
|||
ereturn(escontext, 0,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid hexadecimal digit: \"%.*s\"",
|
||||
pg_mblen(s), s)));
|
||||
pg_mblen_range(s, srcend), s)));
|
||||
s++;
|
||||
if (s >= srcend)
|
||||
ereturn(escontext, 0,
|
||||
|
|
@ -225,7 +225,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
|
|||
ereturn(escontext, 0,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid hexadecimal digit: \"%.*s\"",
|
||||
pg_mblen(s), s)));
|
||||
pg_mblen_range(s, srcend), s)));
|
||||
s++;
|
||||
*p++ = (v1 << 4) | v2;
|
||||
}
|
||||
|
|
@ -354,7 +354,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
|
||||
pg_mblen(s - 1), s - 1)));
|
||||
pg_mblen_range(s - 1, srcend), s - 1)));
|
||||
}
|
||||
/* add it to buffer */
|
||||
buf = (buf << 6) + b;
|
||||
|
|
|
|||
|
|
@ -1396,7 +1396,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
|
||||
errmsg("invalid datetime format separator: \"%s\"",
|
||||
pnstrdup(str, pg_mblen(str)))));
|
||||
pnstrdup(str, pg_mblen_cstr(str)))));
|
||||
|
||||
if (*str == ' ')
|
||||
n->type = NODE_TYPE_SPACE;
|
||||
|
|
@ -1426,7 +1426,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
|
|||
/* backslash quotes the next character, if any */
|
||||
if (*str == '\\' && *(str + 1))
|
||||
str++;
|
||||
chlen = pg_mblen(str);
|
||||
chlen = pg_mblen_cstr(str);
|
||||
n->type = NODE_TYPE_CHAR;
|
||||
memcpy(n->character, str, chlen);
|
||||
n->character[chlen] = '\0';
|
||||
|
|
@ -1444,7 +1444,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
|
|||
*/
|
||||
if (*str == '\\' && *(str + 1) == '"')
|
||||
str++;
|
||||
chlen = pg_mblen(str);
|
||||
chlen = pg_mblen_cstr(str);
|
||||
|
||||
if ((flags & DCH_FLAG) && is_separator_char(str))
|
||||
n->type = NODE_TYPE_SEPARATOR;
|
||||
|
|
@ -2274,8 +2274,8 @@ asc_toupper_z(const char *buff)
|
|||
do { \
|
||||
if (S_THth(_suf)) \
|
||||
{ \
|
||||
if (*(ptr)) (ptr) += pg_mblen(ptr); \
|
||||
if (*(ptr)) (ptr) += pg_mblen(ptr); \
|
||||
if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
|
||||
if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
|
@ -3481,7 +3481,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
|
|||
* insist that the consumed character match the format's
|
||||
* character.
|
||||
*/
|
||||
s += pg_mblen(s);
|
||||
s += pg_mblen_cstr(s);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
@ -3503,11 +3503,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
|
|||
if (extra_skip > 0)
|
||||
extra_skip--;
|
||||
else
|
||||
s += pg_mblen(s);
|
||||
s += pg_mblen_cstr(s);
|
||||
}
|
||||
else
|
||||
{
|
||||
int chlen = pg_mblen(s);
|
||||
int chlen = pg_mblen_cstr(s);
|
||||
|
||||
/*
|
||||
* Standard mode requires strict match of format characters.
|
||||
|
|
@ -5809,13 +5809,15 @@ NUM_numpart_to_char(NUMProc *Np, int id)
|
|||
static void
|
||||
NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
|
||||
{
|
||||
const char *end = Np->inout + input_len;
|
||||
|
||||
while (n-- > 0)
|
||||
{
|
||||
if (OVERLOAD_TEST)
|
||||
break; /* end of input */
|
||||
if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
|
||||
break; /* it's a data character */
|
||||
Np->inout_p += pg_mblen(Np->inout_p);
|
||||
Np->inout_p += pg_mblen_range(Np->inout_p, end);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -6268,7 +6270,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
|
|||
}
|
||||
else
|
||||
{
|
||||
Np->inout_p += pg_mblen(Np->inout_p);
|
||||
Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -693,7 +693,7 @@ report_json_context(JsonLexContext *lex)
|
|||
{
|
||||
/* Advance to next multibyte character */
|
||||
if (IS_HIGHBIT_SET(*context_start))
|
||||
context_start += pg_mblen(context_start);
|
||||
context_start += pg_mblen_range(context_start, context_end);
|
||||
else
|
||||
context_start++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -597,7 +597,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
|
|||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("invalid input syntax for type %s", "jsonpath"),
|
||||
errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
|
||||
pg_mblen(flags->val + i), flags->val + i)));
|
||||
pg_mblen_range(flags->val + i, flags->val + flags->len),
|
||||
flags->val + i)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,8 @@ varstr_levenshtein(const char *source, int slen,
|
|||
int *s_char_len = NULL;
|
||||
int j;
|
||||
const char *y;
|
||||
const char *send = source + slen;
|
||||
const char *tend = target + tlen;
|
||||
|
||||
/*
|
||||
* For varstr_levenshtein_less_equal, we have real variables called
|
||||
|
|
@ -183,10 +185,10 @@ varstr_levenshtein(const char *source, int slen,
|
|||
#endif
|
||||
|
||||
/*
|
||||
* In order to avoid calling pg_mblen() repeatedly on each character in s,
|
||||
* we cache all the lengths before starting the main loop -- but if all
|
||||
* the characters in both strings are single byte, then we skip this and
|
||||
* use a fast-path in the main loop. If only one string contains
|
||||
* In order to avoid calling pg_mblen_range() repeatedly on each character
|
||||
* in s, we cache all the lengths before starting the main loop -- but if
|
||||
* all the characters in both strings are single byte, then we skip this
|
||||
* and use a fast-path in the main loop. If only one string contains
|
||||
* multi-byte characters, we still build the array, so that the fast-path
|
||||
* needn't deal with the case where the array hasn't been initialized.
|
||||
*/
|
||||
|
|
@ -198,7 +200,7 @@ varstr_levenshtein(const char *source, int slen,
|
|||
s_char_len = (int *) palloc((m + 1) * sizeof(int));
|
||||
for (i = 0; i < m; ++i)
|
||||
{
|
||||
s_char_len[i] = pg_mblen(cp);
|
||||
s_char_len[i] = pg_mblen_range(cp, send);
|
||||
cp += s_char_len[i];
|
||||
}
|
||||
s_char_len[i] = 0;
|
||||
|
|
@ -224,7 +226,7 @@ varstr_levenshtein(const char *source, int slen,
|
|||
{
|
||||
int *temp;
|
||||
const char *x = source;
|
||||
int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
|
||||
int y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1;
|
||||
int i;
|
||||
|
||||
#ifdef LEVENSHTEIN_LESS_EQUAL
|
||||
|
|
|
|||
|
|
@ -55,20 +55,20 @@ static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
|
|||
*--------------------
|
||||
*/
|
||||
static inline int
|
||||
wchareq(const char *p1, const char *p2)
|
||||
wchareq(const char *p1, int p1len, const char *p2, int p2len)
|
||||
{
|
||||
int p1_len;
|
||||
int p1clen;
|
||||
|
||||
/* Optimization: quickly compare the first byte. */
|
||||
if (*p1 != *p2)
|
||||
return 0;
|
||||
|
||||
p1_len = pg_mblen(p1);
|
||||
if (pg_mblen(p2) != p1_len)
|
||||
p1clen = pg_mblen_with_len(p1, p1len);
|
||||
if (pg_mblen_with_len(p2, p2len) != p1clen)
|
||||
return 0;
|
||||
|
||||
/* They are the same length */
|
||||
while (p1_len--)
|
||||
while (p1clen--)
|
||||
{
|
||||
if (*p1++ != *p2++)
|
||||
return 0;
|
||||
|
|
@ -105,11 +105,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
|
|||
#define NextByte(p, plen) ((p)++, (plen)--)
|
||||
|
||||
/* Set up to compile like_match.c for multibyte characters */
|
||||
#define CHAREQ(p1, p2) wchareq((p1), (p2))
|
||||
#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
|
||||
#define NextChar(p, plen) \
|
||||
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
|
||||
do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
|
||||
#define CopyAdvChar(dst, src, srclen) \
|
||||
do { int __l = pg_mblen(src); \
|
||||
do { int __l = pg_mblen_with_len((src), (srclen)); \
|
||||
(srclen) -= __l; \
|
||||
while (__l-- > 0) \
|
||||
*(dst)++ = *(src)++; \
|
||||
|
|
@ -121,7 +121,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
|
|||
#include "like_match.c"
|
||||
|
||||
/* Set up to compile like_match.c for single-byte characters */
|
||||
#define CHAREQ(p1, p2) (*(p1) == *(p2))
|
||||
#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
|
||||
#define NextChar(p, plen) NextByte((p), (plen))
|
||||
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
|
||||
|
||||
|
|
|
|||
|
|
@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc)
|
|||
errhint("Escape string must be empty or one character.")));
|
||||
|
||||
e = VARDATA_ANY(esc);
|
||||
elen = VARSIZE_ANY_EXHDR(esc);
|
||||
|
||||
/*
|
||||
* If specified escape is '\', just copy the pattern as-is.
|
||||
|
|
@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc)
|
|||
afterescape = false;
|
||||
while (plen > 0)
|
||||
{
|
||||
if (CHAREQ(p, e) && !afterescape)
|
||||
if (CHAREQ(p, plen, e, elen) && !afterescape)
|
||||
{
|
||||
*r++ = '\\';
|
||||
NextChar(p, plen);
|
||||
|
|
|
|||
|
|
@ -153,8 +153,8 @@ lpad(PG_FUNCTION_ARGS)
|
|||
char *ptr1,
|
||||
*ptr2,
|
||||
*ptr2start,
|
||||
*ptr2end,
|
||||
*ptr_ret;
|
||||
const char *ptr2end;
|
||||
int m,
|
||||
s1len,
|
||||
s2len;
|
||||
|
|
@ -199,7 +199,7 @@ lpad(PG_FUNCTION_ARGS)
|
|||
|
||||
while (m--)
|
||||
{
|
||||
int mlen = pg_mblen(ptr2);
|
||||
int mlen = pg_mblen_range(ptr2, ptr2end);
|
||||
|
||||
memcpy(ptr_ret, ptr2, mlen);
|
||||
ptr_ret += mlen;
|
||||
|
|
@ -212,7 +212,7 @@ lpad(PG_FUNCTION_ARGS)
|
|||
|
||||
while (s1len--)
|
||||
{
|
||||
int mlen = pg_mblen(ptr1);
|
||||
int mlen = pg_mblen_unbounded(ptr1);
|
||||
|
||||
memcpy(ptr_ret, ptr1, mlen);
|
||||
ptr_ret += mlen;
|
||||
|
|
@ -251,8 +251,8 @@ rpad(PG_FUNCTION_ARGS)
|
|||
char *ptr1,
|
||||
*ptr2,
|
||||
*ptr2start,
|
||||
*ptr2end,
|
||||
*ptr_ret;
|
||||
const char *ptr2end;
|
||||
int m,
|
||||
s1len,
|
||||
s2len;
|
||||
|
|
@ -292,11 +292,12 @@ rpad(PG_FUNCTION_ARGS)
|
|||
m = len - s1len;
|
||||
|
||||
ptr1 = VARDATA_ANY(string1);
|
||||
|
||||
ptr_ret = VARDATA(ret);
|
||||
|
||||
while (s1len--)
|
||||
{
|
||||
int mlen = pg_mblen(ptr1);
|
||||
int mlen = pg_mblen_unbounded(ptr1);
|
||||
|
||||
memcpy(ptr_ret, ptr1, mlen);
|
||||
ptr_ret += mlen;
|
||||
|
|
@ -308,7 +309,7 @@ rpad(PG_FUNCTION_ARGS)
|
|||
|
||||
while (m--)
|
||||
{
|
||||
int mlen = pg_mblen(ptr2);
|
||||
int mlen = pg_mblen_range(ptr2, ptr2end);
|
||||
|
||||
memcpy(ptr_ret, ptr2, mlen);
|
||||
ptr_ret += mlen;
|
||||
|
|
@ -393,6 +394,7 @@ dotrim(const char *string, int stringlen,
|
|||
*/
|
||||
const char **stringchars;
|
||||
const char **setchars;
|
||||
const char *setend;
|
||||
int *stringmblen;
|
||||
int *setmblen;
|
||||
int stringnchars;
|
||||
|
|
@ -400,6 +402,7 @@ dotrim(const char *string, int stringlen,
|
|||
int resultndx;
|
||||
int resultnchars;
|
||||
const char *p;
|
||||
const char *pend;
|
||||
int len;
|
||||
int mblen;
|
||||
const char *str_pos;
|
||||
|
|
@ -410,10 +413,11 @@ dotrim(const char *string, int stringlen,
|
|||
stringnchars = 0;
|
||||
p = string;
|
||||
len = stringlen;
|
||||
pend = p + len;
|
||||
while (len > 0)
|
||||
{
|
||||
stringchars[stringnchars] = p;
|
||||
stringmblen[stringnchars] = mblen = pg_mblen(p);
|
||||
stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend);
|
||||
stringnchars++;
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
|
|
@ -424,10 +428,11 @@ dotrim(const char *string, int stringlen,
|
|||
setnchars = 0;
|
||||
p = set;
|
||||
len = setlen;
|
||||
setend = set + setlen;
|
||||
while (len > 0)
|
||||
{
|
||||
setchars[setnchars] = p;
|
||||
setmblen[setnchars] = mblen = pg_mblen(p);
|
||||
setmblen[setnchars] = mblen = pg_mblen_range(p, setend);
|
||||
setnchars++;
|
||||
p += mblen;
|
||||
len -= mblen;
|
||||
|
|
@ -805,6 +810,8 @@ translate(PG_FUNCTION_ARGS)
|
|||
*to_end;
|
||||
char *source,
|
||||
*target;
|
||||
const char *source_end;
|
||||
const char *from_end;
|
||||
int m,
|
||||
fromlen,
|
||||
tolen,
|
||||
|
|
@ -819,9 +826,11 @@ translate(PG_FUNCTION_ARGS)
|
|||
if (m <= 0)
|
||||
PG_RETURN_TEXT_P(string);
|
||||
source = VARDATA_ANY(string);
|
||||
source_end = source + m;
|
||||
|
||||
fromlen = VARSIZE_ANY_EXHDR(from);
|
||||
from_ptr = VARDATA_ANY(from);
|
||||
from_end = from_ptr + fromlen;
|
||||
tolen = VARSIZE_ANY_EXHDR(to);
|
||||
to_ptr = VARDATA_ANY(to);
|
||||
to_end = to_ptr + tolen;
|
||||
|
|
@ -845,12 +854,12 @@ translate(PG_FUNCTION_ARGS)
|
|||
|
||||
while (m > 0)
|
||||
{
|
||||
source_len = pg_mblen(source);
|
||||
source_len = pg_mblen_range(source, source_end);
|
||||
from_index = 0;
|
||||
|
||||
for (i = 0; i < fromlen; i += len)
|
||||
{
|
||||
len = pg_mblen(&from_ptr[i]);
|
||||
len = pg_mblen_range(&from_ptr[i], from_end);
|
||||
if (len == source_len &&
|
||||
memcmp(source, &from_ptr[i], len) == 0)
|
||||
break;
|
||||
|
|
@ -866,11 +875,11 @@ translate(PG_FUNCTION_ARGS)
|
|||
{
|
||||
if (p >= to_end)
|
||||
break;
|
||||
p += pg_mblen(p);
|
||||
p += pg_mblen_range(p, to_end);
|
||||
}
|
||||
if (p < to_end)
|
||||
{
|
||||
len = pg_mblen(p);
|
||||
len = pg_mblen_range(p, to_end);
|
||||
memcpy(target, p, len);
|
||||
target += len;
|
||||
retlen += len;
|
||||
|
|
|
|||
|
|
@ -443,7 +443,7 @@ parse_re_flags(pg_re_flags *flags, text *opts)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid regular expression option: \"%.*s\"",
|
||||
pg_mblen(opt_p + i), opt_p + i)));
|
||||
pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -673,12 +673,13 @@ textregexreplace(PG_FUNCTION_ARGS)
|
|||
if (VARSIZE_ANY_EXHDR(opt) > 0)
|
||||
{
|
||||
char *opt_p = VARDATA_ANY(opt);
|
||||
const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt);
|
||||
|
||||
if (*opt_p >= '0' && *opt_p <= '9')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid regular expression option: \"%.*s\"",
|
||||
pg_mblen(opt_p), opt_p),
|
||||
pg_mblen_range(opt_p, end_p), opt_p),
|
||||
errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
|
||||
}
|
||||
|
||||
|
|
@ -772,6 +773,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
|
|||
*r;
|
||||
int plen,
|
||||
elen;
|
||||
const char *pend;
|
||||
bool afterescape = false;
|
||||
int nquotes = 0;
|
||||
int bracket_depth = 0; /* square bracket nesting level */
|
||||
|
|
@ -779,6 +781,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
|
|||
|
||||
p = VARDATA_ANY(pat_text);
|
||||
plen = VARSIZE_ANY_EXHDR(pat_text);
|
||||
pend = p + plen;
|
||||
if (esc_text == NULL)
|
||||
{
|
||||
/* No ESCAPE clause provided; default to backslash as escape */
|
||||
|
|
@ -878,7 +881,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
|
|||
|
||||
if (elen > 1)
|
||||
{
|
||||
int mblen = pg_mblen(p);
|
||||
int mblen = pg_mblen_range(p, pend);
|
||||
|
||||
if (mblen > 1)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
|
|||
return buf;
|
||||
|
||||
buf++;
|
||||
while (*buf && pg_mblen(buf) == 1)
|
||||
while (*buf && pg_mblen_cstr(buf) == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
|
|
@ -197,7 +197,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!t_isdigit(ptr))
|
||||
if (!t_isdigit_cstr(ptr))
|
||||
return false;
|
||||
|
||||
errno = 0;
|
||||
|
|
@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate)
|
|||
return false;
|
||||
|
||||
/* it shouldn't be a part of any word */
|
||||
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
|
||||
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr))
|
||||
return false;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
ptr += pg_mblen(ptr);
|
||||
ptr += pg_mblen_cstr(ptr);
|
||||
|
||||
if (*ptr == '\0') /* got end of string without operand */
|
||||
return false;
|
||||
|
|
@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate)
|
|||
* So we still treat OR literal as operation with possibly incorrect
|
||||
* operand and will not search it as lexeme
|
||||
*/
|
||||
if (!t_isspace(ptr))
|
||||
if (!t_isspace_cstr(ptr))
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
|||
/* generic syntax error message is fine */
|
||||
return PT_ERR;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
else if (!t_isspace_cstr(state->buf))
|
||||
{
|
||||
/*
|
||||
* We rely on the tsvector parser to parse the value for
|
||||
|
|
@ -383,14 +383,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
|||
{
|
||||
return (state->count) ? PT_ERR : PT_END;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
else if (!t_isspace_cstr(state->buf))
|
||||
{
|
||||
return PT_ERR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
state->buf += pg_mblen(state->buf);
|
||||
state->buf += pg_mblen_cstr(state->buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
|||
state->state = WAITOPERAND;
|
||||
continue;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
else if (!t_isspace_cstr(state->buf))
|
||||
{
|
||||
/*
|
||||
* We rely on the tsvector parser to parse the value for
|
||||
|
|
@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
|||
state->buf++;
|
||||
continue;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
else if (!t_isspace_cstr(state->buf))
|
||||
{
|
||||
/* insert implicit AND between operands */
|
||||
state->state = WAITOPERAND;
|
||||
|
|
@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
|||
break;
|
||||
}
|
||||
|
||||
state->buf += pg_mblen(state->buf);
|
||||
state->buf += pg_mblen_cstr(state->buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
|
|||
*(in->cur) = '\\';
|
||||
in->cur++;
|
||||
}
|
||||
COPYCHAR(in->cur, op);
|
||||
|
||||
clen = pg_mblen(op);
|
||||
clen = ts_copychar_cstr(in->cur, op);
|
||||
op += clen;
|
||||
in->cur += clen;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -319,9 +319,9 @@ tsvectorout(PG_FUNCTION_ARGS)
|
|||
lenbuf = 0,
|
||||
pp;
|
||||
WordEntry *ptr = ARRPTR(out);
|
||||
char *curbegin,
|
||||
*curin,
|
||||
char *curin,
|
||||
*curout;
|
||||
const char *curend;
|
||||
|
||||
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
|
||||
for (i = 0; i < out->size; i++)
|
||||
|
|
@ -334,13 +334,14 @@ tsvectorout(PG_FUNCTION_ARGS)
|
|||
curout = outbuf = (char *) palloc(lenbuf);
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
curbegin = curin = STRPTR(out) + ptr->pos;
|
||||
curin = STRPTR(out) + ptr->pos;
|
||||
curend = curin + ptr->len;
|
||||
if (i != 0)
|
||||
*curout++ = ' ';
|
||||
*curout++ = '\'';
|
||||
while (curin - curbegin < ptr->len)
|
||||
while (curin < curend)
|
||||
{
|
||||
int len = pg_mblen(curin);
|
||||
int len = pg_mblen_range(curin, curend);
|
||||
|
||||
if (t_iseq(curin, '\''))
|
||||
*curout++ = '\'';
|
||||
|
|
|
|||
|
|
@ -2604,11 +2604,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
|
|||
if (ws)
|
||||
{
|
||||
char *buf;
|
||||
const char *end;
|
||||
|
||||
buf = VARDATA_ANY(ws);
|
||||
while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
|
||||
end = buf + VARSIZE_ANY_EXHDR(ws);
|
||||
while (buf < end)
|
||||
{
|
||||
if (pg_mblen(buf) == 1)
|
||||
int len = pg_mblen_range(buf, end);
|
||||
|
||||
if (len == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
|
|
@ -2632,7 +2636,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
|
|||
stat->weight |= 0;
|
||||
}
|
||||
}
|
||||
buf += pg_mblen(buf);
|
||||
buf += len;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -206,10 +206,9 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
|
||||
(state->is_web && t_iseq(state->prsbuf, '"')))
|
||||
PRSSYNTAXERROR;
|
||||
else if (!t_isspace(state->prsbuf))
|
||||
else if (!t_isspace_cstr(state->prsbuf))
|
||||
{
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
curpos += pg_mblen(state->prsbuf);
|
||||
curpos += ts_copychar_cstr(curpos, state->prsbuf);
|
||||
statecode = WAITENDWORD;
|
||||
}
|
||||
}
|
||||
|
|
@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
curpos += pg_mblen(state->prsbuf);
|
||||
curpos += ts_copychar_cstr(curpos, state->prsbuf);
|
||||
Assert(oldstate != 0);
|
||||
statecode = oldstate;
|
||||
}
|
||||
|
|
@ -236,7 +234,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
statecode = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
(state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
|
||||
(state->is_web && t_iseq(state->prsbuf, '"')))
|
||||
{
|
||||
|
|
@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
curpos += pg_mblen(state->prsbuf);
|
||||
curpos += ts_copychar_cstr(curpos, state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (statecode == WAITENDCMPLX)
|
||||
|
|
@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
curpos += pg_mblen(state->prsbuf);
|
||||
curpos += ts_copychar_cstr(curpos, state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (statecode == WAITCHARCMPLX)
|
||||
|
|
@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
if (!state->is_web && t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
curpos += pg_mblen(state->prsbuf);
|
||||
curpos += ts_copychar_cstr(curpos, state->prsbuf);
|
||||
statecode = WAITENDCMPLX;
|
||||
}
|
||||
else
|
||||
|
|
@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
PRSSYNTAXERROR;
|
||||
if (state->oprisdelim)
|
||||
{
|
||||
/* state->prsbuf+=pg_mblen(state->prsbuf); */
|
||||
/* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
|
||||
RETURN_TOKEN;
|
||||
}
|
||||
else
|
||||
|
|
@ -317,7 +312,7 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
}
|
||||
else if (statecode == INPOSINFO)
|
||||
{
|
||||
if (t_isdigit(state->prsbuf))
|
||||
if (t_isdigit_cstr(state->prsbuf))
|
||||
{
|
||||
if (posalen == 0)
|
||||
{
|
||||
|
|
@ -372,10 +367,10 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
PRSSYNTAXERROR;
|
||||
WEP_SETWEIGHT(pos[npos - 1], 0);
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) ||
|
||||
else if (t_isspace_cstr(state->prsbuf) ||
|
||||
*(state->prsbuf) == '\0')
|
||||
RETURN_TOKEN;
|
||||
else if (!t_isdigit(state->prsbuf))
|
||||
else if (!t_isdigit_cstr(state->prsbuf))
|
||||
PRSSYNTAXERROR;
|
||||
}
|
||||
else /* internal error */
|
||||
|
|
@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state,
|
|||
statecode);
|
||||
|
||||
/* get next char */
|
||||
state->prsbuf += pg_mblen(state->prsbuf);
|
||||
state->prsbuf += pg_mblen_cstr(state->prsbuf);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ bit_in(PG_FUNCTION_ARGS)
|
|||
ereturn(escontext, (Datum) 0,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("\"%.*s\" is not a valid binary digit",
|
||||
pg_mblen(sp), sp)));
|
||||
pg_mblen_cstr(sp), sp)));
|
||||
|
||||
x >>= 1;
|
||||
if (x == 0)
|
||||
|
|
@ -257,7 +257,7 @@ bit_in(PG_FUNCTION_ARGS)
|
|||
ereturn(escontext, (Datum) 0,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("\"%.*s\" is not a valid hexadecimal digit",
|
||||
pg_mblen(sp), sp)));
|
||||
pg_mblen_cstr(sp), sp)));
|
||||
|
||||
if (bc)
|
||||
{
|
||||
|
|
@ -533,7 +533,7 @@ varbit_in(PG_FUNCTION_ARGS)
|
|||
ereturn(escontext, (Datum) 0,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("\"%.*s\" is not a valid binary digit",
|
||||
pg_mblen(sp), sp)));
|
||||
pg_mblen_cstr(sp), sp)));
|
||||
|
||||
x >>= 1;
|
||||
if (x == 0)
|
||||
|
|
@ -558,7 +558,7 @@ varbit_in(PG_FUNCTION_ARGS)
|
|||
ereturn(escontext, (Datum) 0,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("\"%.*s\" is not a valid hexadecimal digit",
|
||||
pg_mblen(sp), sp)));
|
||||
pg_mblen_cstr(sp), sp)));
|
||||
|
||||
if (bc)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -799,8 +799,11 @@ text_catenate(text *t1, text *t2)
|
|||
* charlen_to_bytelen()
|
||||
* Compute the number of bytes occupied by n characters starting at *p
|
||||
*
|
||||
* It is caller's responsibility that there actually are n characters;
|
||||
* the string need not be null-terminated.
|
||||
* The caller shall ensure there are n complete characters. Callers achieve
|
||||
* this by deriving "n" from regmatch_t findings from searching a wchar array.
|
||||
* pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
|
||||
* matches will end no later than the last complete character. (The string
|
||||
* need not be null-terminated.)
|
||||
*/
|
||||
static int
|
||||
charlen_to_bytelen(const char *p, int n)
|
||||
|
|
@ -815,7 +818,7 @@ charlen_to_bytelen(const char *p, int n)
|
|||
const char *s;
|
||||
|
||||
for (s = p; n > 0; n--)
|
||||
s += pg_mblen(s);
|
||||
s += pg_mblen_unbounded(s); /* caller verified encoding */
|
||||
|
||||
return s - p;
|
||||
}
|
||||
|
|
@ -949,6 +952,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
|
|||
int32 slice_start;
|
||||
int32 slice_size;
|
||||
int32 slice_strlen;
|
||||
int32 slice_len;
|
||||
text *slice;
|
||||
int32 E1;
|
||||
int32 i;
|
||||
|
|
@ -1018,7 +1022,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
|
|||
slice = (text *) DatumGetPointer(str);
|
||||
|
||||
/* see if we got back an empty string */
|
||||
if (VARSIZE_ANY_EXHDR(slice) == 0)
|
||||
slice_len = VARSIZE_ANY_EXHDR(slice);
|
||||
if (slice_len == 0)
|
||||
{
|
||||
if (slice != (text *) DatumGetPointer(str))
|
||||
pfree(slice);
|
||||
|
|
@ -1027,7 +1032,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
|
|||
|
||||
/* Now we can get the actual length of the slice in MB characters */
|
||||
slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
|
||||
VARSIZE_ANY_EXHDR(slice));
|
||||
slice_len);
|
||||
|
||||
/*
|
||||
* Check that the start position wasn't > slice_strlen. If so, SQL99
|
||||
|
|
@ -1054,7 +1059,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
|
|||
*/
|
||||
p = VARDATA_ANY(slice);
|
||||
for (i = 0; i < S1 - 1; i++)
|
||||
p += pg_mblen(p);
|
||||
p += pg_mblen_unbounded(p);
|
||||
|
||||
/* hang onto a pointer to our start position */
|
||||
s = p;
|
||||
|
|
@ -1064,7 +1069,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
|
|||
* length.
|
||||
*/
|
||||
for (i = S1; i < E1; i++)
|
||||
p += pg_mblen(p);
|
||||
p += pg_mblen_unbounded(p);
|
||||
|
||||
ret = (text *) palloc(VARHDRSZ + (p - s));
|
||||
SET_VARSIZE(ret, VARHDRSZ + (p - s));
|
||||
|
|
@ -1362,6 +1367,8 @@ retry:
|
|||
*/
|
||||
if (state->is_multibyte_char_in_char)
|
||||
{
|
||||
const char *haystack_end = state->str1 + state->len1;
|
||||
|
||||
/* Walk one character at a time, until we reach the match. */
|
||||
|
||||
/* the search should never move backwards. */
|
||||
|
|
@ -1370,7 +1377,7 @@ retry:
|
|||
while (state->refpoint < matchptr)
|
||||
{
|
||||
/* step to next character. */
|
||||
state->refpoint += pg_mblen(state->refpoint);
|
||||
state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
|
||||
state->refpos++;
|
||||
|
||||
/*
|
||||
|
|
@ -4685,6 +4692,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
|
|||
}
|
||||
else
|
||||
{
|
||||
const char *end_ptr;
|
||||
|
||||
/*
|
||||
* When fldsep is NULL, each character in the input string becomes a
|
||||
* separate element in the result set. The separator is effectively
|
||||
|
|
@ -4693,10 +4702,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
|
|||
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
|
||||
|
||||
start_ptr = VARDATA_ANY(inputstring);
|
||||
end_ptr = start_ptr + inputstring_len;
|
||||
|
||||
while (inputstring_len > 0)
|
||||
{
|
||||
int chunk_len = pg_mblen(start_ptr);
|
||||
int chunk_len = pg_mblen_range(start_ptr, end_ptr);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
|
|
@ -5600,7 +5610,7 @@ text_reverse(PG_FUNCTION_ARGS)
|
|||
{
|
||||
int sz;
|
||||
|
||||
sz = pg_mblen(p);
|
||||
sz = pg_mblen_range(p, endp);
|
||||
dst -= sz;
|
||||
memcpy(dst, p, sz);
|
||||
p += sz;
|
||||
|
|
@ -5761,7 +5771,7 @@ text_format(PG_FUNCTION_ARGS)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized format() type specifier \"%.*s\"",
|
||||
pg_mblen(cp), cp),
|
||||
pg_mblen_range(cp, end_ptr), cp),
|
||||
errhint("For a single \"%%\" use \"%%%%\".")));
|
||||
|
||||
/* If indirect width was specified, get its value */
|
||||
|
|
@ -5882,7 +5892,7 @@ text_format(PG_FUNCTION_ARGS)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized format() type specifier \"%.*s\"",
|
||||
pg_mblen(cp), cp),
|
||||
pg_mblen_range(cp, end_ptr), cp),
|
||||
errhint("For a single \"%%\" use \"%%%%\".")));
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2338,8 +2338,7 @@ sqlchar_to_unicode(const char *s)
|
|||
char *utf8string;
|
||||
pg_wchar ret[2]; /* need space for trailing zero */
|
||||
|
||||
/* note we're not assuming s is null-terminated */
|
||||
utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
|
||||
utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
|
||||
|
||||
pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
|
||||
pg_encoding_mblen(PG_UTF8, utf8string));
|
||||
|
|
@ -2392,7 +2391,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
|
|||
|
||||
initStringInfo(&buf);
|
||||
|
||||
for (p = ident; *p; p += pg_mblen(p))
|
||||
for (p = ident; *p; p += pg_mblen_cstr(p))
|
||||
{
|
||||
if (*p == ':' && (p == ident || fully_escaped))
|
||||
appendStringInfoString(&buf, "_x003A_");
|
||||
|
|
@ -2417,7 +2416,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
|
|||
: !is_valid_xml_namechar(u))
|
||||
appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
|
||||
else
|
||||
appendBinaryStringInfo(&buf, p, pg_mblen(p));
|
||||
appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2440,7 +2439,7 @@ map_xml_name_to_sql_identifier(const char *name)
|
|||
|
||||
initStringInfo(&buf);
|
||||
|
||||
for (p = name; *p; p += pg_mblen(p))
|
||||
for (p = name; *p; p += pg_mblen_cstr(p))
|
||||
{
|
||||
if (*p == '_' && *(p + 1) == 'x'
|
||||
&& isxdigit((unsigned char) *(p + 2))
|
||||
|
|
@ -2458,7 +2457,7 @@ map_xml_name_to_sql_identifier(const char *name)
|
|||
p += 6;
|
||||
}
|
||||
else
|
||||
appendBinaryStringInfo(&buf, p, pg_mblen(p));
|
||||
appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
|
||||
}
|
||||
|
||||
return buf.data;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "catalog/namespace.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/memdebug.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/relcache.h"
|
||||
#include "varatt.h"
|
||||
|
|
@ -97,6 +98,13 @@ static char *perform_default_encoding_conversion(const char *src,
|
|||
int len, bool is_client_to_server);
|
||||
static int cliplen(const char *str, int len, int limit);
|
||||
|
||||
pg_attribute_noreturn()
|
||||
static void report_invalid_encoding_int(int encoding, const char *mbstr,
|
||||
int mblen, int len);
|
||||
|
||||
pg_attribute_noreturn()
|
||||
static void report_invalid_encoding_db(const char *mbstr, int mblen, int len);
|
||||
|
||||
|
||||
/*
|
||||
* Prepare for a future call to SetClientEncoding. Success should mean
|
||||
|
|
@ -1019,11 +1027,126 @@ pg_encoding_wchar2mb_with_len(int encoding,
|
|||
return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
|
||||
}
|
||||
|
||||
/* returns the byte length of a multibyte character */
|
||||
/*
|
||||
* Returns the byte length of a multibyte character sequence in a
|
||||
* null-terminated string. Raises an illegal byte sequence error if the
|
||||
* sequence would hit a null terminator.
|
||||
*
|
||||
* The caller is expected to have checked for a terminator at *mbstr == 0
|
||||
* before calling, but some callers want 1 in that case, so this function
|
||||
* continues that tradition.
|
||||
*
|
||||
* This must only be used for strings that have a null-terminator to enable
|
||||
* bounds detection.
|
||||
*/
|
||||
int
|
||||
pg_mblen_cstr(const char *mbstr)
|
||||
{
|
||||
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
||||
|
||||
/*
|
||||
* The .mblen functions return 1 when given a pointer to a terminator.
|
||||
* Some callers depend on that, so we tolerate it for now. Well-behaved
|
||||
* callers check the leading byte for a terminator *before* calling.
|
||||
*/
|
||||
for (int i = 1; i < length; ++i)
|
||||
if (unlikely(mbstr[i] == 0))
|
||||
report_invalid_encoding_db(mbstr, length, i);
|
||||
|
||||
/*
|
||||
* String should be NUL-terminated, but checking that would make typical
|
||||
* callers O(N^2), tripling Valgrind check-world time. Unless
|
||||
* VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we
|
||||
* found a character, not a terminator, the next byte must be a terminator
|
||||
* or the start of the next character.) If the caller iterates the whole
|
||||
* string, the last call will diagnose a missing terminator.
|
||||
*/
|
||||
if (mbstr[0] != '\0')
|
||||
{
|
||||
#ifdef VALGRIND_EXPENSIVE
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr));
|
||||
#else
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the byte length of a multibyte character sequence bounded by a range
|
||||
* [mbstr, end) of at least one byte in size. Raises an illegal byte sequence
|
||||
* error if the sequence would exceed the range.
|
||||
*/
|
||||
int
|
||||
pg_mblen_range(const char *mbstr, const char *end)
|
||||
{
|
||||
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
||||
|
||||
Assert(end > mbstr);
|
||||
#ifdef VALGRIND_EXPENSIVE
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr);
|
||||
#else
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
|
||||
#endif
|
||||
|
||||
if (unlikely(mbstr + length > end))
|
||||
report_invalid_encoding_db(mbstr, length, end - mbstr);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the byte length of a multibyte character sequence bounded by a range
|
||||
* extending for 'limit' bytes, which must be at least one. Raises an illegal
|
||||
* byte sequence error if the sequence would exceed the range.
|
||||
*/
|
||||
int
|
||||
pg_mblen_with_len(const char *mbstr, int limit)
|
||||
{
|
||||
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
||||
|
||||
Assert(limit >= 1);
|
||||
#ifdef VALGRIND_EXPENSIVE
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit);
|
||||
#else
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
|
||||
#endif
|
||||
|
||||
if (unlikely(length > limit))
|
||||
report_invalid_encoding_db(mbstr, length, limit);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Returns the length of a multibyte character sequence, without any
|
||||
* validation of bounds.
|
||||
*
|
||||
* PLEASE NOTE: This function can only be used safely if the caller has
|
||||
* already verified the input string, since otherwise there is a risk of
|
||||
* overrunning the buffer if the string is invalid. A prior call to a
|
||||
* pg_mbstrlen* function suffices.
|
||||
*/
|
||||
int
|
||||
pg_mblen_unbounded(const char *mbstr)
|
||||
{
|
||||
int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
||||
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/*
|
||||
* Historical name for pg_mblen_unbounded(). Should not be used and will be
|
||||
* removed in a later version.
|
||||
*/
|
||||
int
|
||||
pg_mblen(const char *mbstr)
|
||||
{
|
||||
return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
||||
return pg_mblen_unbounded(mbstr);
|
||||
}
|
||||
|
||||
/* returns the display length of a multibyte character */
|
||||
|
|
@ -1045,14 +1168,14 @@ pg_mbstrlen(const char *mbstr)
|
|||
|
||||
while (*mbstr)
|
||||
{
|
||||
mbstr += pg_mblen(mbstr);
|
||||
mbstr += pg_mblen_cstr(mbstr);
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/* returns the length (counted in wchars) of a multibyte string
|
||||
* (not necessarily NULL terminated)
|
||||
* (stops at the first of "limit" or a NUL)
|
||||
*/
|
||||
int
|
||||
pg_mbstrlen_with_len(const char *mbstr, int limit)
|
||||
|
|
@ -1065,7 +1188,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
|
|||
|
||||
while (limit > 0 && *mbstr)
|
||||
{
|
||||
int l = pg_mblen(mbstr);
|
||||
int l = pg_mblen_with_len(mbstr, limit);
|
||||
|
||||
limit -= l;
|
||||
mbstr += l;
|
||||
|
|
@ -1135,7 +1258,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
|
|||
|
||||
while (len > 0 && *mbstr)
|
||||
{
|
||||
l = pg_mblen(mbstr);
|
||||
l = pg_mblen_with_len(mbstr, len);
|
||||
nch++;
|
||||
if (nch > limit)
|
||||
break;
|
||||
|
|
@ -1699,12 +1822,19 @@ void
|
|||
report_invalid_encoding(int encoding, const char *mbstr, int len)
|
||||
{
|
||||
int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
|
||||
|
||||
report_invalid_encoding_int(encoding, mbstr, l, len);
|
||||
}
|
||||
|
||||
static void
|
||||
report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len)
|
||||
{
|
||||
char buf[8 * 5 + 1];
|
||||
char *p = buf;
|
||||
int j,
|
||||
jlimit;
|
||||
|
||||
jlimit = Min(l, len);
|
||||
jlimit = Min(mblen, len);
|
||||
jlimit = Min(jlimit, 8); /* prevent buffer overrun */
|
||||
|
||||
for (j = 0; j < jlimit; j++)
|
||||
|
|
@ -1721,6 +1851,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
|
|||
buf)));
|
||||
}
|
||||
|
||||
static void
|
||||
report_invalid_encoding_db(const char *mbstr, int mblen, int len)
|
||||
{
|
||||
report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* report_untranslatable_char: complain about untranslatable character
|
||||
*
|
||||
|
|
|
|||
|
|
@ -697,7 +697,14 @@ extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
|
|||
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
|
||||
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
|
||||
extern size_t pg_wchar_strlen(const pg_wchar *str);
|
||||
extern int pg_mblen_cstr(const char *mbstr);
|
||||
extern int pg_mblen_range(const char *mbstr, const char *end);
|
||||
extern int pg_mblen_with_len(const char *mbstr, int limit);
|
||||
extern int pg_mblen_unbounded(const char *mbstr);
|
||||
|
||||
/* deprecated */
|
||||
extern int pg_mblen(const char *mbstr);
|
||||
|
||||
extern int pg_dsplen(const char *mbstr);
|
||||
extern int pg_mbstrlen(const char *mbstr);
|
||||
extern int pg_mbstrlen_with_len(const char *mbstr, int limit);
|
||||
|
|
|
|||
|
|
@ -37,13 +37,37 @@ typedef struct
|
|||
/* The second argument of t_iseq() must be a plain ASCII character */
|
||||
#define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c))
|
||||
|
||||
#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
|
||||
/* Copy multibyte character of known byte length, return byte length. */
|
||||
static inline int
|
||||
ts_copychar_with_len(void *dest, const void *src, int length)
|
||||
{
|
||||
memcpy(dest, src, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
extern int t_isdigit(const char *ptr);
|
||||
extern int t_isspace(const char *ptr);
|
||||
extern int t_isalpha(const char *ptr);
|
||||
extern int t_isalnum(const char *ptr);
|
||||
extern int t_isprint(const char *ptr);
|
||||
/* Copy multibyte character from null-terminated string, return byte length. */
|
||||
static inline int
|
||||
ts_copychar_cstr(void *dest, const void *src)
|
||||
{
|
||||
return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src));
|
||||
}
|
||||
|
||||
/* Historical macro for the above. */
|
||||
#define COPYCHAR ts_copychar_cstr
|
||||
|
||||
#define GENERATE_T_ISCLASS_DECL(character_class) \
|
||||
extern int t_is##character_class##_with_len(const char *ptr, int len); \
|
||||
extern int t_is##character_class##_cstr(const char *ptr); \
|
||||
extern int t_is##character_class##_unbounded(const char *ptr); \
|
||||
\
|
||||
/* deprecated */ \
|
||||
extern int t_is##character_class(const char *ptr);
|
||||
|
||||
GENERATE_T_ISCLASS_DECL(alnum);
|
||||
GENERATE_T_ISCLASS_DECL(alpha);
|
||||
GENERATE_T_ISCLASS_DECL(digit);
|
||||
GENERATE_T_ISCLASS_DECL(print);
|
||||
GENERATE_T_ISCLASS_DECL(space);
|
||||
|
||||
extern char *lowerstr(const char *str);
|
||||
extern char *lowerstr_with_len(const char *str, int len);
|
||||
|
|
|
|||
|
|
@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state,
|
|||
extern void close_tsvector_parser(TSVectorParseState state);
|
||||
|
||||
/* phrase operator begins with '<' */
|
||||
#define ISOPERATOR(x) \
|
||||
( pg_mblen(x) == 1 && ( *(x) == '!' || \
|
||||
*(x) == '&' || \
|
||||
*(x) == '|' || \
|
||||
*(x) == '(' || \
|
||||
*(x) == ')' || \
|
||||
*(x) == '<' \
|
||||
) )
|
||||
#define ISOPERATOR(x) (*(x) == '!' || \
|
||||
*(x) == '&' || \
|
||||
*(x) == '|' || \
|
||||
*(x) == '(' || \
|
||||
*(x) == ')' || \
|
||||
*(x) == '<')
|
||||
|
||||
/* parse_tsquery */
|
||||
|
||||
|
|
|
|||
|
|
@ -414,7 +414,8 @@ parse_test_flags(test_re_flags *flags, text *opts)
|
|||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid regular expression test option: \"%.*s\"",
|
||||
pg_mblen(opt_p + i), opt_p + i)));
|
||||
pg_mblen_range(opt_p + i, opt_p + opt_len),
|
||||
opt_p + i)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue