fix(expansion):Resolve the issue of UTF-8 characters being truncated, resulting in invalid UTF-8

This commit is contained in:
qiuxue 2026-01-19 21:17:21 +08:00
parent cb077823fb
commit d78f090d1b
2 changed files with 21 additions and 3 deletions

View file

@ -2,6 +2,7 @@ package expansion
import ( import (
"bytes" "bytes"
"unicode/utf8"
) )
const ( const (
@ -79,10 +80,11 @@ func Expand(input string, mapping func(string) string) string {
// //
// The input string is assumed not to contain the initial operator. // The input string is assumed not to contain the initial operator.
func tryReadVariableName(input string) (string, bool, int) { func tryReadVariableName(input string) (string, bool, int) {
switch input[0] { r, size := utf8.DecodeRuneInString(input)
switch r {
case operator: case operator:
// Escaped operator; return it. // Escaped operator; return it.
return input[0:1], false, 1 return input[0:size], false, size
case referenceOpener: case referenceOpener:
// Scan to expression closer // Scan to expression closer
for i := 1; i < len(input); i++ { for i := 1; i < len(input); i++ {
@ -97,6 +99,7 @@ func tryReadVariableName(input string) (string, bool, int) {
// Not the beginning of an expression, ie, an operator // Not the beginning of an expression, ie, an operator
// that doesn't begin an expression. Return the operator // that doesn't begin an expression. Return the operator
// and the first rune in the string. // and the first rune in the string.
return (string(operator) + string(input[0])), false, 1
return string(operator) + string(r), false, size
} }
} }

View file

@ -274,6 +274,21 @@ func doExpansionTest(t *testing.T, mapping func(string) string) {
input: "\n", input: "\n",
expected: "\n", expected: "\n",
}, },
{
name: "dollar sign followed by non-ASCII UTF-8 character",
input: "$£FOO",
expected: "$£FOO",
},
{
name: "dollar sign followed by multi-byte UTF-8 character in middle",
input: "prefix-$€-suffix",
expected: "prefix-$€-suffix",
},
{
name: "dollar sign followed by Chinese character",
input: "$中文",
expected: "$中文",
},
} }
for _, tc := range cases { for _, tc := range cases {