Implement unaccent Unicode data update in meson

The meson/ninja update-unicode target did not cover the required
updates in contrib/unaccent/.  This is fixed now.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Alexander Borisov <lex.borisov@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/2a668979-ed92-49a3-abf9-a3ec2d460ec2%40eisentraut.org
This commit is contained in:
Peter Eisentraut 2026-03-18 13:36:44 +01:00
parent 4f433025f6
commit 1b0c269f2e
4 changed files with 61 additions and 16 deletions

View file

@ -28,7 +28,6 @@ install_data(
install_dir: dir_data / 'tsearch_data'
)
# XXX: Implement downlo
tests += {
'name': 'unaccent',
'sd': meson.current_source_dir(),
@ -39,3 +38,43 @@ tests += {
],
},
}
# Download CLDR files on demand.
cldr_baseurl = 'https://raw.githubusercontent.com/unicode-org/cldr/release-@0@/common/transforms/@1@'
if not wget.found() or not cp.found()
subdir_done()
endif
foreach f : ['Latin-ASCII.xml']
# XXX could use .replace when we require meson 0.58
cldr_version_dashed = '-'.join(CLDR_VERSION.split('.'))
url = cldr_baseurl.format(cldr_version_dashed, f)
target = custom_target(f,
output: f,
command: [wget, wget_flags, url],
build_by_default: false,
)
unicode_data += {f: target}
endforeach
unaccent_update_unicode_targets = \
custom_target('unaccent.rules',
input: [unicode_data['UnicodeData.txt'], unicode_data['Latin-ASCII.xml']],
output: ['unaccent.rules'],
command: [python, files('generate_unaccent_rules.py'), '--unicode-data-file', '@INPUT0@', '--latin-ascii-file', '@INPUT1@'],
build_by_default: false,
capture: true,
)
update_unicode_unaccent = custom_target('update-unicode',
output: ['dont-exist'],
input: unaccent_update_unicode_targets,
command: [cp, '@INPUT@', '@SOURCE_ROOT@/contrib/unaccent/'],
build_by_default: false,
build_always_stale: true,
)
update_unicode_targets += update_unicode_unaccent

View file

@ -3263,6 +3263,7 @@ pl_targets = []
contrib_targets = []
testprep_targets = []
nls_targets = []
update_unicode_targets = []
# Define the tests to distribute them to the correct test styles later
@ -4051,6 +4052,10 @@ alias_target('testprep', testprep_targets)
alias_target('world', all_built, docs)
alias_target('install-world', install_quiet, installdocs)
if update_unicode_targets.length() > 0
alias_target('update-unicode', update_unicode_targets)
endif
run_target('help',
command: [
perl, '-ne', 'next if /^#/; print',

View file

@ -10,7 +10,7 @@ Update Unicode Version
----------------------
Edit src/Makefile.global.in and src/common/unicode/meson.build
to update the UNICODE_VERSION.
to update the UNICODE_VERSION and the CLDR_VERSION.
Then, generate the new header files with:

View file

@ -1,6 +1,7 @@
# Copyright (c) 2022-2026, PostgreSQL Global Development Group
UNICODE_VERSION = '16.0.0'
CLDR_VERSION = '47'
unicode_data = {}
unicode_baseurl = 'https://www.unicode.org/Public/@0@/ucd/@1@'
@ -22,9 +23,9 @@ foreach f : ['CompositionExclusions.txt', 'CaseFolding.txt', 'DerivedCorePropert
endforeach
update_unicode_targets = []
update_unicode_common_targets = []
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_case_table.h',
input: [unicode_data['CaseFolding.txt'], unicode_data['SpecialCasing.txt'], unicode_data['UnicodeData.txt']],
output: ['unicode_case_table.h'],
@ -34,7 +35,7 @@ update_unicode_targets += \
build_by_default: false,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_category_table.h',
input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']],
output: ['unicode_category_table.h'],
@ -44,7 +45,7 @@ update_unicode_targets += \
build_by_default: false,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_east_asian_fw_table.h',
input: [unicode_data['EastAsianWidth.txt']],
output: ['unicode_east_asian_fw_table.h'],
@ -53,7 +54,7 @@ update_unicode_targets += \
capture: true,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_nonspacing_table.h',
input: [unicode_data['UnicodeData.txt']],
output: ['unicode_nonspacing_table.h'],
@ -63,7 +64,7 @@ update_unicode_targets += \
capture: true,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_norm_table.h',
input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']],
output: ['unicode_norm_table.h', 'unicode_norm_hashfunc.h'],
@ -74,7 +75,7 @@ update_unicode_targets += \
build_by_default: false,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_normprops_table.h',
input: [unicode_data['DerivedNormalizationProps.txt']],
output: ['unicode_normprops_table.h'],
@ -84,7 +85,7 @@ update_unicode_targets += \
capture: true,
)
update_unicode_targets += \
update_unicode_common_targets += \
custom_target('unicode_version.h',
output: ['unicode_version.h'],
command: [
@ -140,7 +141,7 @@ update_unicode_dep = []
if not meson.is_cross_build()
update_unicode_dep += custom_target('case_test.run',
output: 'case_test.run',
input: update_unicode_targets,
input: update_unicode_common_targets,
command: [case_test, UNICODE_VERSION],
build_by_default: false,
build_always_stale: true,
@ -150,7 +151,7 @@ endif
if not meson.is_cross_build()
update_unicode_dep += custom_target('category_test.run',
output: 'category_test.run',
input: update_unicode_targets,
input: update_unicode_common_targets,
command: [category_test, UNICODE_VERSION],
build_by_default: false,
build_always_stale: true,
@ -160,7 +161,7 @@ endif
if not meson.is_cross_build()
update_unicode_dep += custom_target('norm_test.run',
output: 'norm_test.run',
input: update_unicode_targets,
input: update_unicode_common_targets,
command: [norm_test],
build_by_default: false,
build_always_stale: true,
@ -170,13 +171,13 @@ endif
# Use a custom target, as run targets serialize the output, making this harder
# to debug, and don't deal well with targets with multiple outputs.
update_unicode = custom_target('update-unicode',
update_unicode_common = custom_target('update-unicode',
depends: update_unicode_dep,
output: ['dont-exist'],
input: update_unicode_targets,
input: update_unicode_common_targets,
command: [cp, '@INPUT@', '@SOURCE_ROOT@/src/include/common/'],
build_by_default: false,
build_always_stale: true,
)
alias_target('update-unicode', update_unicode)
update_unicode_targets += update_unicode_common