From c8308a984d9dd45b3111bd99c8fba23cb78aeaa0 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 26 Feb 2026 12:23:22 -0800 Subject: [PATCH] Fix more multibyte issues in ltree. Commit 84d5efa7e3 missed some multibyte issues caused by short-circuit logic in the callers. The callers assumed that if the predicate string is longer than the label string, then it couldn't possibly be a match, but it can be when using case-insensitive matching (LVAR_INCASE) if casefolding changes the byte length. Fix by refactoring to get rid of the short-circuit logic as well as the function pointer, and consolidate the logic in a replacement function ltree_label_match(). Discussion: https://postgr.es/m/02c6ef6cf56a5013ede61ad03c7a26affd27d449.camel@j-davis.com Backpatch-through: 14 --- contrib/ltree/lquery_op.c | 103 +++++++++++++++++------------------ contrib/ltree/ltree.h | 10 ++-- contrib/ltree/ltxtquery_op.c | 11 ++-- 3 files changed, 60 insertions(+), 64 deletions(-) diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c index 0adcdd8ff2a..e6a1969c3d3 100644 --- a/contrib/ltree/lquery_op.c +++ b/contrib/ltree/lquery_op.c @@ -41,8 +41,7 @@ getlexeme(char *start, char *end, int *len) } bool -compare_subnode(ltree_level *t, char *qn, int len, - ltree_prefix_eq_func prefix_eq, bool anyend) +compare_subnode(ltree_level *t, char *qn, int len, bool prefix, bool ci) { char *endt = t->name + t->len; char *endq = qn + len; @@ -57,10 +56,8 @@ compare_subnode(ltree_level *t, char *qn, int len, isok = false; while ((tn = getlexeme(tn, endt, &lent)) != NULL) { - if ((lent == lenq || (lent > lenq && anyend)) && - (*prefix_eq) (qn, lenq, tn, lent)) + if (ltree_label_match(qn, lenq, tn, lent, prefix, ci)) { - isok = true; break; } @@ -76,64 +73,69 @@ compare_subnode(ltree_level *t, char *qn, int len, } /* - * Check if 'a' is a prefix of 'b'. + * Check if the label matches the predicate string. If 'prefix' is true, then + * the predicate string is treated as a prefix. If 'ci' is true, then the + * predicate string is case-insensitive (and locale-aware). */ bool -ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz) -{ - if (a_sz > b_sz) - return false; - else - return (strncmp(a, b, a_sz) == 0); -} - -/* - * Case-insensitive check if 'a' is a prefix of 'b'. - */ -bool -ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz) +ltree_label_match(const char *pred, size_t pred_len, const char *label, + size_t label_len, bool prefix, bool ci) { static pg_locale_t locale = NULL; - size_t al_sz = a_sz + 1; - size_t al_len; - char *al = palloc(al_sz); - size_t bl_sz = b_sz + 1; - size_t bl_len; - char *bl = palloc(bl_sz); + char *fpred; /* casefolded predicate */ + size_t fpred_len = pred_len; + char *flabel; /* casefolded label */ + size_t flabel_len = label_len; + size_t len; bool res; + /* fast path for binary match or binary prefix match */ + if ((pred_len == label_len || (prefix && pred_len < label_len)) && + strncmp(pred, label, pred_len) == 0) + return true; + else if (!ci) + return false; + + /* + * Slow path for case-insensitive comparison: case fold and then compare. + * This path is necessary even if pred_len > label_len, because the byte + * lengths may change after casefolding. + */ if (!locale) locale = pg_database_locale(); - /* casefold both a and b */ - - al_len = pg_strfold(al, al_sz, a, a_sz, locale); - if (al_len + 1 > al_sz) + fpred = palloc(fpred_len + 1); + len = pg_strfold(fpred, fpred_len + 1, pred, pred_len, locale); + if (len > fpred_len) { /* grow buffer if needed and retry */ - al_sz = al_len + 1; - al = repalloc(al, al_sz); - al_len = pg_strfold(al, al_sz, a, a_sz, locale); - Assert(al_len + 1 <= al_sz); + fpred_len = len; + fpred = repalloc(fpred, fpred_len + 1); + len = pg_strfold(fpred, fpred_len + 1, pred, pred_len, locale); } + Assert(len <= fpred_len); + fpred_len = len; - bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale); - if (bl_len + 1 > bl_sz) + flabel = palloc(flabel_len + 1); + len = pg_strfold(flabel, flabel_len + 1, label, label_len, locale); + if (len > flabel_len) { /* grow buffer if needed and retry */ - bl_sz = bl_len + 1; - bl = repalloc(bl, bl_sz); - bl_len = pg_strfold(bl, bl_sz, b, b_sz, locale); - Assert(bl_len + 1 <= bl_sz); + flabel_len = len; + flabel = repalloc(flabel, flabel_len + 1); + len = pg_strfold(flabel, flabel_len + 1, label, label_len, locale); } + Assert(len <= flabel_len); + flabel_len = len; - if (al_len > bl_len) - res = false; + if ((fpred_len == flabel_len || (prefix && fpred_len < flabel_len)) && + strncmp(fpred, flabel, fpred_len) == 0) + res = true; else - res = (strncmp(al, bl, al_len) == 0); + res = false; - pfree(al); - pfree(bl); + pfree(fpred); + pfree(flabel); return res; } @@ -158,19 +160,16 @@ checkLevel(lquery_level *curq, ltree_level *curt) for (int i = 0; i < curq->numvar; i++) { - ltree_prefix_eq_func prefix_eq; - - prefix_eq = (curvar->flag & LVAR_INCASE) ? ltree_prefix_eq_ci : ltree_prefix_eq; + bool prefix = (curvar->flag & LVAR_ANYEND); + bool ci = (curvar->flag & LVAR_INCASE); if (curvar->flag & LVAR_SUBLEXEME) { - if (compare_subnode(curt, curvar->name, curvar->len, prefix_eq, - (curvar->flag & LVAR_ANYEND))) + if (compare_subnode(curt, curvar->name, curvar->len, prefix, ci)) return success; } - else if ((curvar->len == curt->len || - (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) && - (*prefix_eq) (curvar->name, curvar->len, curt->name, curt->len)) + else if (ltree_label_match(curvar->name, curvar->len, curt->name, + curt->len, prefix, ci)) return success; curvar = LVAR_NEXT(curvar); diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index b0ded40eba9..226c1cb2115 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -157,8 +157,6 @@ typedef struct char data[FLEXIBLE_ARRAY_MEMBER]; } ltxtquery; -typedef bool (*ltree_prefix_eq_func) (const char *, size_t, const char *, size_t); - #define HDRSIZEQT MAXALIGN(VARHDRSZ + sizeof(int32)) #define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + (size) * sizeof(ITEM) + (lenofoperand) ) #define LTXTQUERY_TOO_BIG(size,lenofoperand) \ @@ -209,11 +207,11 @@ bool ltree_execute(ITEM *curitem, void *checkval, int ltree_compare(const ltree *a, const ltree *b); bool inner_isparent(const ltree *c, const ltree *p); -bool compare_subnode(ltree_level *t, char *qn, int len, - ltree_prefix_eq_func prefix_eq, bool anyend); +bool compare_subnode(ltree_level *t, char *qn, int len, bool prefix, bool ci); ltree *lca_inner(ltree **a, int len); -bool ltree_prefix_eq(const char *a, size_t a_sz, const char *b, size_t b_sz); -bool ltree_prefix_eq_ci(const char *a, size_t a_sz, const char *b, size_t b_sz); +bool ltree_label_match(const char *pred, size_t pred_len, + const char *label, size_t label_len, + bool prefix, bool ci); /* fmgr macros for ltree objects */ #define DatumGetLtreeP(X) ((ltree *) PG_DETOAST_DATUM(X)) diff --git a/contrib/ltree/ltxtquery_op.c b/contrib/ltree/ltxtquery_op.c index 3dcbab2c484..0e6612ff77a 100644 --- a/contrib/ltree/ltxtquery_op.c +++ b/contrib/ltree/ltxtquery_op.c @@ -58,19 +58,18 @@ checkcondition_str(void *checkval, ITEM *val) ltree_level *level = LTREE_FIRST(((CHKVAL *) checkval)->node); int tlen = ((CHKVAL *) checkval)->node->numlevel; char *op = ((CHKVAL *) checkval)->operand + val->distance; - ltree_prefix_eq_func prefix_eq; + bool prefix = (val->flag & LVAR_ANYEND); + bool ci = (val->flag & LVAR_INCASE); - prefix_eq = (val->flag & LVAR_INCASE) ? ltree_prefix_eq_ci : ltree_prefix_eq; while (tlen > 0) { if (val->flag & LVAR_SUBLEXEME) { - if (compare_subnode(level, op, val->length, prefix_eq, (val->flag & LVAR_ANYEND))) + if (compare_subnode(level, op, val->length, prefix, ci)) return true; } - else if ((val->length == level->len || - (level->len > val->length && (val->flag & LVAR_ANYEND))) && - (*prefix_eq) (op, val->length, level->name, level->len)) + else if (ltree_label_match(op, val->length, level->name, level->len, + prefix, ci)) return true; tlen--;