mirror of
https://github.com/postgres/postgres.git
synced 2026-05-19 08:41:23 -04:00
Don't accept length of -1 in pg_locale.h APIs.
Reverts ac30021356. Per discussion, that commit interfered with useful
tooling, and was not worth the special cases.
Suggested-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/s32n3tm2mjh247f3xkkxkdk7cf77hglbr3ia3hrsdjylajou7y@nlldpag3tjd5
This commit is contained in:
parent
41b60bf172
commit
6d22c67c3b
8 changed files with 297 additions and 254 deletions
|
|
@ -1262,11 +1262,10 @@ get_collation_actual_version(char collprovider, const char *collcollate)
|
|||
|
||||
/* lowercasing/casefolding in C locale */
|
||||
static size_t
|
||||
strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
||||
strlower_c(char *dst, size_t dstsize, const char *src, size_t srclen)
|
||||
{
|
||||
int i;
|
||||
|
||||
srclen = (srclen >= 0) ? srclen : strlen(src);
|
||||
for (i = 0; i < srclen && i < dstsize; i++)
|
||||
dst[i] = pg_ascii_tolower(src[i]);
|
||||
if (i < dstsize)
|
||||
|
|
@ -1276,12 +1275,11 @@ strlower_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
|||
|
||||
/* titlecasing in C locale */
|
||||
static size_t
|
||||
strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
||||
strtitle_c(char *dst, size_t dstsize, const char *src, size_t srclen)
|
||||
{
|
||||
bool wasalnum = false;
|
||||
int i;
|
||||
|
||||
srclen = (srclen >= 0) ? srclen : strlen(src);
|
||||
for (i = 0; i < srclen && i < dstsize; i++)
|
||||
{
|
||||
char c = src[i];
|
||||
|
|
@ -1302,11 +1300,10 @@ strtitle_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
|||
|
||||
/* uppercasing in C locale */
|
||||
static size_t
|
||||
strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
||||
strupper_c(char *dst, size_t dstsize, const char *src, size_t srclen)
|
||||
{
|
||||
int i;
|
||||
|
||||
srclen = (srclen >= 0) ? srclen : strlen(src);
|
||||
for (i = 0; i < srclen && i < dstsize; i++)
|
||||
dst[i] = pg_ascii_toupper(src[i]);
|
||||
if (i < dstsize)
|
||||
|
|
@ -1315,7 +1312,7 @@ strupper_c(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
|||
}
|
||||
|
||||
size_t
|
||||
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_strlower(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->ctype == NULL)
|
||||
|
|
@ -1325,7 +1322,7 @@ pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
size_t
|
||||
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_strtitle(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->ctype == NULL)
|
||||
|
|
@ -1335,7 +1332,7 @@ pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
size_t
|
||||
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_strupper(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->ctype == NULL)
|
||||
|
|
@ -1345,7 +1342,7 @@ pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
size_t
|
||||
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_strfold(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
/* in the C locale, casefolding is the same as lowercasing */
|
||||
|
|
@ -1363,7 +1360,7 @@ pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* pg_strfold(..., default_locale)?
|
||||
*/
|
||||
size_t
|
||||
pg_downcase_ident(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
||||
pg_downcase_ident(char *dst, size_t dstsize, const char *src, size_t srclen)
|
||||
{
|
||||
pg_locale_t locale = default_locale;
|
||||
|
||||
|
|
@ -1383,7 +1380,7 @@ pg_downcase_ident(char *dst, size_t dstsize, const char *src, ssize_t srclen)
|
|||
int
|
||||
pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strncoll(arg1, -1, arg2, -1, locale);
|
||||
return locale->collate->strcoll(arg1, arg2, locale);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1393,15 +1390,14 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
|
|||
* appropriate for the given locale, platform, and database encoding. If the
|
||||
* locale is not specified, use the database collation.
|
||||
*
|
||||
* The input strings must be encoded in the database encoding. If an input
|
||||
* string is NUL-terminated, its length may be specified as -1.
|
||||
* The input strings must be encoded in the database encoding.
|
||||
*
|
||||
* The caller is responsible for breaking ties if the collation is
|
||||
* deterministic; this maintains consistency with pg_strnxfrm(), which cannot
|
||||
* easily account for deterministic collations.
|
||||
*/
|
||||
int
|
||||
pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
||||
pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strncoll(arg1, len1, arg2, len2, locale);
|
||||
|
|
@ -1433,7 +1429,7 @@ pg_strxfrm_enabled(pg_locale_t locale)
|
|||
size_t
|
||||
pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strnxfrm(dest, destsize, src, -1, locale);
|
||||
return locale->collate->strxfrm(dest, destsize, src, locale);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1443,9 +1439,8 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
|
|||
* ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
|
||||
* untransformed strings.
|
||||
*
|
||||
* The input string must be encoded in the database encoding. If the input
|
||||
* string is NUL-terminated, its length may be specified as -1. If 'destsize'
|
||||
* is zero, 'dest' may be NULL.
|
||||
* The input string must be encoded in the database encoding. If 'destsize' is
|
||||
* zero, 'dest' may be NULL.
|
||||
*
|
||||
* Not all providers support pg_strnxfrm() safely. The caller should check
|
||||
* pg_strxfrm_enabled() first, otherwise this function may return wrong
|
||||
|
|
@ -1456,7 +1451,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
|
|||
* 'destsize' or greater, the resulting contents of 'dest' are undefined.
|
||||
*/
|
||||
size_t
|
||||
pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strnxfrm(dest, destsize, src, srclen, locale);
|
||||
|
|
@ -1481,7 +1476,7 @@ size_t
|
|||
pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strnxfrm_prefix(dest, destsize, src, -1, locale);
|
||||
return locale->collate->strxfrm_prefix(dest, destsize, src, locale);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1491,8 +1486,7 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
|||
* memcmp() on the byte sequence is equivalent to pg_strncoll() on
|
||||
* untransformed strings. The result is not nul-terminated.
|
||||
*
|
||||
* The input string must be encoded in the database encoding. If the input
|
||||
* string is NUL-terminated, its length may be specified as -1.
|
||||
* The input string must be encoded in the database encoding.
|
||||
*
|
||||
* Not all providers support pg_strnxfrm_prefix() safely. The caller should
|
||||
* check pg_strxfrm_prefix_enabled() first, otherwise this function may return
|
||||
|
|
@ -1504,7 +1498,7 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
|||
*/
|
||||
size_t
|
||||
pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
size_t srclen, pg_locale_t locale)
|
||||
{
|
||||
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,8 +60,7 @@ initcap_wbnext(void *state)
|
|||
{
|
||||
struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
|
||||
|
||||
while (wbstate->offset < wbstate->len &&
|
||||
wbstate->str[wbstate->offset] != '\0')
|
||||
while (wbstate->offset < wbstate->len)
|
||||
{
|
||||
char32_t u = utf8_to_unicode((const unsigned char *) wbstate->str +
|
||||
wbstate->offset);
|
||||
|
|
@ -84,7 +83,7 @@ initcap_wbnext(void *state)
|
|||
}
|
||||
|
||||
static size_t
|
||||
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strlower_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return unicode_strlower(dest, destsize, src, srclen,
|
||||
|
|
@ -92,12 +91,12 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strtitle_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
struct WordBoundaryState wbstate = {
|
||||
.str = src,
|
||||
.len = (srclen < 0) ? strlen(src) : srclen,
|
||||
.len = srclen,
|
||||
.offset = 0,
|
||||
.posix = !locale->builtin.casemap_full,
|
||||
.init = false,
|
||||
|
|
@ -110,7 +109,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strupper_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return unicode_strupper(dest, destsize, src, srclen,
|
||||
|
|
@ -118,7 +117,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strfold_builtin(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return unicode_strfold(dest, destsize, src, srclen,
|
||||
|
|
|
|||
|
|
@ -57,29 +57,33 @@ extern UCollator *pg_ucol_open(const char *loc_str);
|
|||
static UCaseMap *pg_ucasemap_open(const char *loc_str);
|
||||
|
||||
static size_t strlower_icu(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strupper_icu(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strfold_icu(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
static int strncoll_icu(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static int strncoll_icu(const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2,
|
||||
pg_locale_t locale);
|
||||
static int strcoll_icu(const char *arg1, const char *arg2,
|
||||
pg_locale_t locale);
|
||||
static size_t strnxfrm_icu(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strxfrm_icu(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale);
|
||||
extern char *get_collation_actual_version_icu(const char *collcollate);
|
||||
|
||||
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
|
||||
|
|
@ -96,20 +100,24 @@ static UConverter *icu_converter = NULL;
|
|||
|
||||
static UCollator *make_icu_collator(const char *iculocstr,
|
||||
const char *icurules);
|
||||
static int strncoll_icu(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
pg_locale_t locale);
|
||||
static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strxfrm_prefix_icu(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale);
|
||||
#ifdef HAVE_UCOL_STRCOLLUTF8
|
||||
static int strncoll_icu_utf8(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
static int strncoll_icu_utf8(const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2,
|
||||
pg_locale_t locale);
|
||||
static int strcoll_icu_utf8(const char *arg1,
|
||||
const char *arg2,
|
||||
pg_locale_t locale);
|
||||
#endif
|
||||
static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale);
|
||||
static void init_icu_converter(void);
|
||||
static int32_t uchar_length(UConverter *converter,
|
||||
const char *str, int32_t len);
|
||||
|
|
@ -124,7 +132,7 @@ static void icu_set_collation_attributes(UCollator *collator, const char *loc,
|
|||
UErrorCode *status);
|
||||
static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
|
||||
size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
|
|
@ -154,19 +162,26 @@ tolower_icu(pg_wchar wc, pg_locale_t locale)
|
|||
|
||||
static const struct collate_methods collate_methods_icu = {
|
||||
.strncoll = strncoll_icu,
|
||||
.strcoll = strcoll_icu,
|
||||
.strnxfrm = strnxfrm_icu,
|
||||
.strxfrm = strxfrm_icu,
|
||||
.strnxfrm_prefix = strnxfrm_prefix_icu,
|
||||
.strxfrm_prefix = strxfrm_prefix_icu,
|
||||
.strxfrm_is_safe = true,
|
||||
};
|
||||
|
||||
static const struct collate_methods collate_methods_icu_utf8 = {
|
||||
#ifdef HAVE_UCOL_STRCOLLUTF8
|
||||
.strncoll = strncoll_icu_utf8,
|
||||
.strcoll = strcoll_icu_utf8,
|
||||
#else
|
||||
.strncoll = strncoll_icu,
|
||||
.strcoll = strcoll_icu,
|
||||
#endif
|
||||
.strnxfrm = strnxfrm_icu,
|
||||
.strxfrm = strxfrm_icu,
|
||||
.strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
|
||||
.strxfrm_prefix = strxfrm_prefix_icu_utf8,
|
||||
.strxfrm_is_safe = true,
|
||||
};
|
||||
|
||||
|
|
@ -604,35 +619,35 @@ make_icu_collator(const char *iculocstr, const char *icurules)
|
|||
}
|
||||
|
||||
static size_t
|
||||
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strlower_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strtitle_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strupper_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strfold_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strlower_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
@ -646,7 +661,7 @@ strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strtitle_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
@ -660,7 +675,7 @@ strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strupper_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
@ -674,7 +689,7 @@ strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strfold_icu_utf8(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
@ -695,7 +710,7 @@ strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
*/
|
||||
static size_t
|
||||
downcase_ident_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
size_t srclen, pg_locale_t locale)
|
||||
{
|
||||
int i;
|
||||
bool libc_lower;
|
||||
|
|
@ -724,12 +739,11 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src,
|
|||
* strncoll_icu_utf8
|
||||
*
|
||||
* Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
|
||||
* database encoding. An argument length of -1 means the string is
|
||||
* NUL-terminated.
|
||||
* database encoding.
|
||||
*/
|
||||
#ifdef HAVE_UCOL_STRCOLLUTF8
|
||||
int
|
||||
strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
||||
strncoll_icu_utf8(const char *arg1, size_t len1, const char *arg2, size_t len2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
int result;
|
||||
|
|
@ -748,12 +762,31 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
strcoll_icu_utf8(const char *arg1, const char *arg2, pg_locale_t locale)
|
||||
{
|
||||
int result;
|
||||
UErrorCode status;
|
||||
|
||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
result = ucol_strcollUTF8(locale->icu.ucol,
|
||||
arg1, -1,
|
||||
arg2, -1,
|
||||
&status);
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("collation failed: %s", u_errorName(status))));
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 'srclen' of -1 means the strings are NUL-terminated */
|
||||
size_t
|
||||
strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
static size_t
|
||||
strnxfrm_icu_internal(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UChar sbuf[TEXTBUFLEN / sizeof(UChar)];
|
||||
UChar *uchar = sbuf;
|
||||
|
|
@ -789,11 +822,24 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
return result_bsize;
|
||||
}
|
||||
|
||||
/* 'srclen' of -1 means the strings are NUL-terminated */
|
||||
size_t
|
||||
strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
static size_t
|
||||
strnxfrm_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_icu_internal(dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strxfrm_icu(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_icu_internal(dest, destsize, src, -1, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strnxfrm_prefix_icu_utf8_internal(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
size_t result;
|
||||
UCharIterator iter;
|
||||
|
|
@ -819,6 +865,21 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
|
|||
return result;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_prefix_icu_utf8_internal(dest, destsize, src, -1, locale);
|
||||
}
|
||||
|
||||
char *
|
||||
get_collation_actual_version_icu(const char *collcollate)
|
||||
{
|
||||
|
|
@ -934,7 +995,7 @@ convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
|
|||
|
||||
static int32_t
|
||||
icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen, pg_locale_t locale)
|
||||
const char *src, size_t srclen, pg_locale_t locale)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
int32_t len_conv;
|
||||
|
|
@ -1004,15 +1065,15 @@ foldcase_options(const char *locale)
|
|||
* strncoll_icu
|
||||
*
|
||||
* Convert the arguments from the database encoding to UChar strings, then
|
||||
* call ucol_strcoll(). An argument length of -1 means that the string is
|
||||
* NUL-terminated.
|
||||
* call ucol_strcoll().
|
||||
*
|
||||
* When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
|
||||
* caller should call that instead.
|
||||
*/
|
||||
static int
|
||||
strncoll_icu(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2, pg_locale_t locale)
|
||||
strncoll_icu_internal(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UChar sbuf[TEXTBUFLEN / sizeof(UChar)];
|
||||
UChar *buf = sbuf;
|
||||
|
|
@ -1054,11 +1115,23 @@ strncoll_icu(const char *arg1, ssize_t len1,
|
|||
return result;
|
||||
}
|
||||
|
||||
/* 'srclen' of -1 means the strings are NUL-terminated */
|
||||
static int
|
||||
strncoll_icu(const char *arg1, size_t len1, const char *arg2, size_t len2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strncoll_icu_internal(arg1, len1, arg2, len2, locale);
|
||||
}
|
||||
|
||||
static int
|
||||
strcoll_icu(const char *arg1, const char *arg2, pg_locale_t locale)
|
||||
{
|
||||
return strncoll_icu_internal(arg1, -1, arg2, -1, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strnxfrm_prefix_icu(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
strnxfrm_prefix_icu_internal(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
UChar sbuf[TEXTBUFLEN / sizeof(UChar)];
|
||||
UChar *uchar = sbuf;
|
||||
|
|
@ -1100,6 +1173,20 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
|
|||
return result_bsize;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_prefix_icu_internal(dest, destsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strxfrm_prefix_icu(char *dest, size_t destsize, const char *src,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return strnxfrm_prefix_icu_internal(dest, destsize, src, -1, locale);
|
||||
}
|
||||
|
||||
static void
|
||||
init_icu_converter(void)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -82,42 +82,48 @@
|
|||
|
||||
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
||||
|
||||
static int strncoll_libc(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
static int strncoll_libc(const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2,
|
||||
pg_locale_t locale);
|
||||
static int strcoll_libc(const char *arg1, const char *arg2,
|
||||
pg_locale_t locale);
|
||||
static size_t strnxfrm_libc(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strxfrm_libc(char *dest, size_t destsize,
|
||||
const char *src, pg_locale_t locale);
|
||||
extern char *get_collation_actual_version_libc(const char *collcollate);
|
||||
static locale_t make_libc_collator(const char *collate,
|
||||
const char *ctype);
|
||||
|
||||
#ifdef WIN32
|
||||
static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
static int strncoll_libc_win32_utf8(const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2,
|
||||
pg_locale_t locale);
|
||||
static int strcoll_libc_win32_utf8(const char *arg1, const char *arg2,
|
||||
pg_locale_t locale);
|
||||
#endif
|
||||
|
||||
static size_t char2wchar(wchar_t *to, size_t tolen, const char *from,
|
||||
size_t fromlen, locale_t loc);
|
||||
|
||||
static size_t strlower_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strlower_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strtitle_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strtitle_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strupper_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strupper_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
static bool
|
||||
|
|
@ -324,7 +330,7 @@ tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
|
|||
*/
|
||||
static size_t
|
||||
downcase_ident_libc_sb(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
size_t srclen, pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
int i;
|
||||
|
|
@ -420,8 +426,11 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
|
|||
|
||||
static const struct collate_methods collate_methods_libc = {
|
||||
.strncoll = strncoll_libc,
|
||||
.strcoll = strcoll_libc,
|
||||
.strnxfrm = strnxfrm_libc,
|
||||
.strxfrm = strxfrm_libc,
|
||||
.strnxfrm_prefix = NULL,
|
||||
.strxfrm_prefix = NULL,
|
||||
|
||||
/*
|
||||
* Unfortunately, it seems that strxfrm() for non-C collations is broken
|
||||
|
|
@ -442,7 +451,9 @@ static const struct collate_methods collate_methods_libc = {
|
|||
#ifdef WIN32
|
||||
static const struct collate_methods collate_methods_libc_win32_utf8 = {
|
||||
.strncoll = strncoll_libc_win32_utf8,
|
||||
.strcoll = strcoll_libc_win32_utf8,
|
||||
.strnxfrm = strnxfrm_libc,
|
||||
.strxfrm = strxfrm_libc,
|
||||
.strnxfrm_prefix = NULL,
|
||||
#ifdef TRUST_STRXFRM
|
||||
.strxfrm_is_safe = true,
|
||||
|
|
@ -453,12 +464,9 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
|
|||
#endif
|
||||
|
||||
static size_t
|
||||
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strlower_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -492,7 +500,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strlower_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -502,9 +510,6 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
|
|
@ -540,12 +545,9 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strtitle_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -596,7 +598,7 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strtitle_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -607,9 +609,6 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
|
|
@ -651,12 +650,9 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strupper_libc_sb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -690,7 +686,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
}
|
||||
|
||||
static size_t
|
||||
strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
strupper_libc_mb(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->lt;
|
||||
|
|
@ -700,9 +696,6 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
|
|
@ -888,18 +881,18 @@ make_libc_collator(const char *collate, const char *ctype)
|
|||
/*
|
||||
* strncoll_libc
|
||||
*
|
||||
* NUL-terminate arguments, if necessary, and pass to strcoll_l().
|
||||
*
|
||||
* An input string length of -1 means that it's already NUL-terminated.
|
||||
* NUL-terminate arguments and pass to strcoll_l().
|
||||
*/
|
||||
int
|
||||
strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
||||
static int
|
||||
strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
char sbuf[TEXTBUFLEN];
|
||||
char *buf = sbuf;
|
||||
size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
|
||||
size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
|
||||
size_t bufsize1 = len1 + 1;
|
||||
size_t bufsize2 = len2 + 1;
|
||||
char *buf1;
|
||||
char *buf2;
|
||||
const char *arg1n;
|
||||
const char *arg2n;
|
||||
int result;
|
||||
|
|
@ -907,32 +900,16 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
|||
if (bufsize1 + bufsize2 > TEXTBUFLEN)
|
||||
buf = palloc(bufsize1 + bufsize2);
|
||||
|
||||
/* nul-terminate arguments if necessary */
|
||||
if (len1 == -1)
|
||||
{
|
||||
arg1n = arg1;
|
||||
}
|
||||
else
|
||||
{
|
||||
char *buf1 = buf;
|
||||
buf1 = buf;
|
||||
buf2 = buf + bufsize1;
|
||||
|
||||
memcpy(buf1, arg1, len1);
|
||||
buf1[len1] = '\0';
|
||||
arg1n = buf1;
|
||||
}
|
||||
memcpy(buf1, arg1, len1);
|
||||
buf1[len1] = '\0';
|
||||
arg1n = buf1;
|
||||
|
||||
if (len2 == -1)
|
||||
{
|
||||
arg2n = arg2;
|
||||
}
|
||||
else
|
||||
{
|
||||
char *buf2 = buf + bufsize1;
|
||||
|
||||
memcpy(buf2, arg2, len2);
|
||||
buf2[len2] = '\0';
|
||||
arg2n = buf2;
|
||||
}
|
||||
memcpy(buf2, arg2, len2);
|
||||
buf2[len2] = '\0';
|
||||
arg2n = buf2;
|
||||
|
||||
result = strcoll_l(arg1n, arg2n, locale->lt);
|
||||
|
||||
|
|
@ -942,15 +919,22 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* strcoll_libc
|
||||
*/
|
||||
static int
|
||||
strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
|
||||
{
|
||||
return strcoll_l(arg1, arg2, locale->lt);
|
||||
}
|
||||
|
||||
/*
|
||||
* strnxfrm_libc
|
||||
*
|
||||
* NUL-terminate src, if necessary, and pass to strxfrm_l().
|
||||
*
|
||||
* A source length of -1 means that it's already NUL-terminated.
|
||||
* NUL-terminate src and pass to strxfrm_l().
|
||||
*/
|
||||
size_t
|
||||
strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
static size_t
|
||||
strnxfrm_libc(char *dest, size_t destsize, const char *src, size_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
char sbuf[TEXTBUFLEN];
|
||||
|
|
@ -958,9 +942,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
size_t bufsize = srclen + 1;
|
||||
size_t result;
|
||||
|
||||
if (srclen == -1)
|
||||
return strxfrm_l(dest, src, destsize, locale->lt);
|
||||
|
||||
if (bufsize > TEXTBUFLEN)
|
||||
buf = palloc(bufsize);
|
||||
|
||||
|
|
@ -979,6 +960,15 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* strxfrm_libc
|
||||
*/
|
||||
static size_t
|
||||
strxfrm_libc(char *dest, size_t destsize, const char *src, pg_locale_t locale)
|
||||
{
|
||||
return strxfrm_l(dest, src, destsize, locale->lt);
|
||||
}
|
||||
|
||||
char *
|
||||
get_collation_actual_version_libc(const char *collcollate)
|
||||
{
|
||||
|
|
@ -1049,13 +1039,11 @@ get_collation_actual_version_libc(const char *collcollate)
|
|||
*
|
||||
* Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
|
||||
* invoke wcscoll_l().
|
||||
*
|
||||
* An input string length of -1 means that it's NUL-terminated.
|
||||
*/
|
||||
#ifdef WIN32
|
||||
static int
|
||||
strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
|
||||
ssize_t len2, pg_locale_t locale)
|
||||
strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
|
||||
size_t len2, pg_locale_t locale)
|
||||
{
|
||||
char sbuf[TEXTBUFLEN];
|
||||
char *buf = sbuf;
|
||||
|
|
@ -1069,11 +1057,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
|
|||
|
||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||
|
||||
if (len1 == -1)
|
||||
len1 = strlen(arg1);
|
||||
if (len2 == -1)
|
||||
len2 = strlen(arg2);
|
||||
|
||||
/*
|
||||
* In a 32-bit build, twice the input length can overflow size_t, so we
|
||||
* must be careful.
|
||||
|
|
@ -1126,6 +1109,16 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int
|
||||
strcoll_libc_win32_utf8(const char *arg1, const char *arg2,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
size_t len1 = strlen(arg1);
|
||||
size_t len2 = strlen(arg2);
|
||||
|
||||
return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
/* simple subroutine for reporting errors from newlocale() */
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ static UCaseMap *casemap = NULL;
|
|||
#endif
|
||||
|
||||
typedef size_t (*TestFunc) (char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen);
|
||||
size_t srclen);
|
||||
|
||||
/* simple boundary iterator copied from pg_locale_builtin.c */
|
||||
struct WordBoundaryState
|
||||
|
|
@ -114,6 +114,7 @@ icu_test_full(char *str)
|
|||
char icu_upper[BUFSZ];
|
||||
char icu_fold[BUFSZ];
|
||||
UErrorCode status;
|
||||
size_t len = strlen(str);
|
||||
|
||||
/* full case mapping doesn't use posix semantics */
|
||||
struct WordBoundaryState wbstate = {
|
||||
|
|
@ -125,18 +126,18 @@ icu_test_full(char *str)
|
|||
.prev_alnum = false,
|
||||
};
|
||||
|
||||
unicode_strlower(lower, BUFSZ, str, -1, true);
|
||||
unicode_strtitle(title, BUFSZ, str, -1, true, initcap_wbnext, &wbstate);
|
||||
unicode_strupper(upper, BUFSZ, str, -1, true);
|
||||
unicode_strfold(fold, BUFSZ, str, -1, true);
|
||||
unicode_strlower(lower, BUFSZ, str, len, true);
|
||||
unicode_strtitle(title, BUFSZ, str, len, true, initcap_wbnext, &wbstate);
|
||||
unicode_strupper(upper, BUFSZ, str, len, true);
|
||||
unicode_strfold(fold, BUFSZ, str, len, true);
|
||||
status = U_ZERO_ERROR;
|
||||
ucasemap_utf8ToLower(casemap, icu_lower, BUFSZ, str, -1, &status);
|
||||
ucasemap_utf8ToLower(casemap, icu_lower, BUFSZ, str, len, &status);
|
||||
status = U_ZERO_ERROR;
|
||||
ucasemap_utf8ToTitle(casemap, icu_title, BUFSZ, str, -1, &status);
|
||||
ucasemap_utf8ToTitle(casemap, icu_title, BUFSZ, str, len, &status);
|
||||
status = U_ZERO_ERROR;
|
||||
ucasemap_utf8ToUpper(casemap, icu_upper, BUFSZ, str, -1, &status);
|
||||
ucasemap_utf8ToUpper(casemap, icu_upper, BUFSZ, str, len, &status);
|
||||
status = U_ZERO_ERROR;
|
||||
ucasemap_utf8FoldCase(casemap, icu_fold, BUFSZ, str, -1, &status);
|
||||
ucasemap_utf8FoldCase(casemap, icu_fold, BUFSZ, str, len, &status);
|
||||
|
||||
if (strcmp(lower, icu_lower) != 0)
|
||||
{
|
||||
|
|
@ -209,18 +210,16 @@ static void
|
|||
test_convert(TestFunc tfunc, const char *test_string, const char *expected)
|
||||
{
|
||||
size_t src1len = strlen(test_string);
|
||||
size_t src2len = -1; /* NUL-terminated */
|
||||
size_t dst1len = strlen(expected);
|
||||
size_t dst2len = strlen(expected) + 1; /* NUL-terminated */
|
||||
char *src1 = malloc(src1len);
|
||||
char *dst1 = malloc(dst1len);
|
||||
char *src2 = strdup(test_string);
|
||||
char *dst2 = malloc(dst2len);
|
||||
size_t needed;
|
||||
|
||||
memcpy(src1, test_string, src1len); /* not NUL-terminated */
|
||||
|
||||
/* neither source nor destination are NUL-terminated */
|
||||
/* destination is not NUL-terminated */
|
||||
memset(dst1, 0x7F, dst1len);
|
||||
needed = tfunc(dst1, dst1len, src1, src1len);
|
||||
if (needed != strlen(expected))
|
||||
|
|
@ -236,7 +235,7 @@ test_convert(TestFunc tfunc, const char *test_string, const char *expected)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/* destination is NUL-terminated and source is not */
|
||||
/* destination is NUL-terminated */
|
||||
memset(dst2, 0x7F, dst2len);
|
||||
needed = tfunc(dst2, dst2len, src1, src1len);
|
||||
if (needed != strlen(expected))
|
||||
|
|
@ -252,59 +251,25 @@ test_convert(TestFunc tfunc, const char *test_string, const char *expected)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/* source is NUL-terminated and destination is not */
|
||||
memset(dst1, 0x7F, dst1len);
|
||||
needed = tfunc(dst1, dst1len, src2, src2len);
|
||||
if (needed != strlen(expected))
|
||||
{
|
||||
printf("case_test: convert_case test3 FAILURE: '%s' needed %zu expected %zu\n",
|
||||
test_string, needed, strlen(expected));
|
||||
printf("case_test: convert_case test3 FAILURE: needed %zu\n", needed);
|
||||
exit(1);
|
||||
}
|
||||
if (memcmp(dst1, expected, dst1len) != 0)
|
||||
{
|
||||
printf("case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
|
||||
test_string, (int) dst1len, dst1, expected);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* both source and destination are NUL-terminated */
|
||||
memset(dst2, 0x7F, dst2len);
|
||||
needed = tfunc(dst2, dst2len, src2, src2len);
|
||||
if (needed != strlen(expected))
|
||||
{
|
||||
printf("case_test: convert_case test4 FAILURE: '%s' needed %zu expected %zu\n",
|
||||
test_string, needed, strlen(expected));
|
||||
exit(1);
|
||||
}
|
||||
if (strcmp(dst2, expected) != 0)
|
||||
{
|
||||
printf("case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
|
||||
test_string, dst2, expected);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
free(src1);
|
||||
free(dst1);
|
||||
free(src2);
|
||||
free(dst2);
|
||||
}
|
||||
|
||||
static size_t
|
||||
tfunc_lower(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen)
|
||||
size_t srclen)
|
||||
{
|
||||
return unicode_strlower(dst, dstsize, src, srclen, true);
|
||||
}
|
||||
|
||||
static size_t
|
||||
tfunc_title(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen)
|
||||
size_t srclen)
|
||||
{
|
||||
struct WordBoundaryState wbstate = {
|
||||
.str = src,
|
||||
.len = (srclen < 0) ? strlen(src) : srclen,
|
||||
.len = srclen,
|
||||
.offset = 0,
|
||||
.init = false,
|
||||
.prev_alnum = false,
|
||||
|
|
@ -316,14 +281,14 @@ tfunc_title(char *dst, size_t dstsize, const char *src,
|
|||
|
||||
static size_t
|
||||
tfunc_upper(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen)
|
||||
size_t srclen)
|
||||
{
|
||||
return unicode_strupper(dst, dstsize, src, srclen, true);
|
||||
}
|
||||
|
||||
static size_t
|
||||
tfunc_fold(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen)
|
||||
size_t srclen)
|
||||
{
|
||||
return unicode_strfold(dst, dstsize, src, srclen, true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ static const char32_t *const casekind_map[NCaseKind] =
|
|||
};
|
||||
|
||||
static char32_t find_case_map(char32_t ucs, const char32_t *map);
|
||||
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
static size_t convert_case(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
|
||||
void *wbstate);
|
||||
static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full,
|
||||
|
|
@ -84,8 +84,7 @@ unicode_casefold_simple(char32_t code)
|
|||
* Convert src to lowercase, and return the result length (not including
|
||||
* terminating NUL).
|
||||
*
|
||||
* String src must be encoded in UTF-8. If srclen < 0, src must be
|
||||
* NUL-terminated.
|
||||
* String src must be encoded in UTF-8.
|
||||
*
|
||||
* Result string is stored in dst, truncating if larger than dstsize. If
|
||||
* dstsize is greater than the result length, dst will be NUL-terminated;
|
||||
|
|
@ -98,7 +97,7 @@ unicode_casefold_simple(char32_t code)
|
|||
* conditions are satisfied.
|
||||
*/
|
||||
size_t
|
||||
unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
unicode_strlower(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
bool full)
|
||||
{
|
||||
return convert_case(dst, dstsize, src, srclen, CaseLower, full, NULL,
|
||||
|
|
@ -111,8 +110,7 @@ unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* Convert src to titlecase, and return the result length (not including
|
||||
* terminating NUL).
|
||||
*
|
||||
* String src must be encoded in UTF-8. If srclen < 0, src must be
|
||||
* NUL-terminated.
|
||||
* String src must be encoded in UTF-8.
|
||||
*
|
||||
* Result string is stored in dst, truncating if larger than dstsize. If
|
||||
* dstsize is greater than the result length, dst will be NUL-terminated;
|
||||
|
|
@ -135,7 +133,7 @@ unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* the string to indicate the final boundary.
|
||||
*/
|
||||
size_t
|
||||
unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
unicode_strtitle(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
bool full, WordBoundaryNext wbnext, void *wbstate)
|
||||
{
|
||||
return convert_case(dst, dstsize, src, srclen, CaseTitle, full, wbnext,
|
||||
|
|
@ -148,8 +146,7 @@ unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* Convert src to uppercase, and return the result length (not including
|
||||
* terminating NUL).
|
||||
*
|
||||
* String src must be encoded in UTF-8. If srclen < 0, src must be
|
||||
* NUL-terminated.
|
||||
* String src must be encoded in UTF-8.
|
||||
*
|
||||
* Result string is stored in dst, truncating if larger than dstsize. If
|
||||
* dstsize is greater than the result length, dst will be NUL-terminated;
|
||||
|
|
@ -162,7 +159,7 @@ unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* conditions are satisfied.
|
||||
*/
|
||||
size_t
|
||||
unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
unicode_strupper(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
bool full)
|
||||
{
|
||||
return convert_case(dst, dstsize, src, srclen, CaseUpper, full, NULL,
|
||||
|
|
@ -175,8 +172,7 @@ unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* Case fold src, and return the result length (not including terminating
|
||||
* NUL).
|
||||
*
|
||||
* String src must be encoded in UTF-8. If srclen < 0, src must be
|
||||
* NUL-terminated.
|
||||
* String src must be encoded in UTF-8.
|
||||
*
|
||||
* Result string is stored in dst, truncating if larger than dstsize. If
|
||||
* dstsize is greater than the result length, dst will be NUL-terminated;
|
||||
|
|
@ -186,7 +182,7 @@ unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* required buffer size before allocating.
|
||||
*/
|
||||
size_t
|
||||
unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
unicode_strfold(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
bool full)
|
||||
{
|
||||
return convert_case(dst, dstsize, src, srclen, CaseFold, full, NULL,
|
||||
|
|
@ -210,7 +206,7 @@ unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
* map a single codepoint to multiple codepoints, or depend on conditions.
|
||||
*/
|
||||
static size_t
|
||||
convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
convert_case(char *dst, size_t dstsize, const char *src, size_t srclen,
|
||||
CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
|
||||
void *wbstate)
|
||||
{
|
||||
|
|
@ -229,7 +225,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||
Assert(boundary == 0); /* start of text is always a boundary */
|
||||
}
|
||||
|
||||
while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')
|
||||
while (srcoff < srclen)
|
||||
{
|
||||
char32_t u1 = utf8_to_unicode((const unsigned char *) src + srcoff);
|
||||
int u1len = unicode_utf8len(u1);
|
||||
|
|
|
|||
|
|
@ -21,13 +21,13 @@ char32_t unicode_titlecase_simple(char32_t code);
|
|||
char32_t unicode_uppercase_simple(char32_t code);
|
||||
char32_t unicode_casefold_simple(char32_t code);
|
||||
size_t unicode_strlower(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, bool full);
|
||||
size_t srclen, bool full);
|
||||
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, bool full,
|
||||
size_t srclen, bool full,
|
||||
WordBoundaryNext wbnext, void *wbstate);
|
||||
size_t unicode_strupper(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, bool full);
|
||||
size_t srclen, bool full);
|
||||
size_t unicode_strfold(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, bool full);
|
||||
size_t srclen, bool full);
|
||||
|
||||
#endif /* UNICODE_CASE_H */
|
||||
|
|
|
|||
|
|
@ -63,20 +63,29 @@ typedef struct pg_locale_struct *pg_locale_t;
|
|||
struct collate_methods
|
||||
{
|
||||
/* required */
|
||||
int (*strncoll) (const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
int (*strncoll) (const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2,
|
||||
pg_locale_t locale);
|
||||
|
||||
int (*strcoll) (const char *arg1, const char *arg2,
|
||||
pg_locale_t locale);
|
||||
|
||||
/* required */
|
||||
size_t (*strnxfrm) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
size_t (*strxfrm) (char *dest, size_t destsize,
|
||||
const char *src, pg_locale_t locale);
|
||||
|
||||
/* optional */
|
||||
size_t (*strnxfrm_prefix) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
size_t (*strxfrm_prefix) (char *dest, size_t destsize,
|
||||
const char *src, pg_locale_t locale);
|
||||
|
||||
/*
|
||||
* If the strnxfrm method is not trusted to return the correct results,
|
||||
* set strxfrm_is_safe to false. It set to false, the method will not be
|
||||
|
|
@ -90,19 +99,19 @@ struct ctype_methods
|
|||
{
|
||||
/* case mapping: LOWER()/INITCAP()/UPPER() */
|
||||
size_t (*strlower) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
size_t (*strtitle) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
size_t (*strupper) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
size_t (*strfold) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
size_t (*downcase_ident) (char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
/* required */
|
||||
|
|
@ -172,32 +181,32 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
|||
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
||||
|
||||
extern size_t pg_strlower(char *dst, size_t dstsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strtitle(char *dst, size_t dstsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strupper(char *dst, size_t dstsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strfold(char *dst, size_t dstsize,
|
||||
const char *src, ssize_t srclen,
|
||||
const char *src, size_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_downcase_ident(char *dst, size_t dstsize,
|
||||
const char *src, ssize_t srclen);
|
||||
const char *src, size_t srclen);
|
||||
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
|
||||
extern int pg_strncoll(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2, pg_locale_t locale);
|
||||
extern int pg_strncoll(const char *arg1, size_t len1,
|
||||
const char *arg2, size_t len2, pg_locale_t locale);
|
||||
extern bool pg_strxfrm_enabled(pg_locale_t locale);
|
||||
extern size_t pg_strxfrm(char *dest, const char *src, size_t destsize,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strnxfrm(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
extern bool pg_strxfrm_prefix_enabled(pg_locale_t locale);
|
||||
extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
size_t srclen, pg_locale_t locale);
|
||||
|
||||
extern bool pg_iswdigit(pg_wchar wc, pg_locale_t locale);
|
||||
extern bool pg_iswalpha(pg_wchar wc, pg_locale_t locale);
|
||||
|
|
|
|||
Loading…
Reference in a new issue