From 1b0c269f2e4f1cdcfd6d7c71d8eaa8020c23ebdb Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 18 Mar 2026 13:36:44 +0100 Subject: [PATCH] Implement unaccent Unicode data update in meson The meson/ninja update-unicode target did not cover the required updates in contrib/unaccent/. This is fixed now. Reviewed-by: Chao Li Reviewed-by: Alexander Borisov Discussion: https://www.postgresql.org/message-id/flat/2a668979-ed92-49a3-abf9-a3ec2d460ec2%40eisentraut.org --- contrib/unaccent/meson.build | 41 +++++++++++++++++++++++++++++++++- meson.build | 5 +++++ src/common/unicode/README | 2 +- src/common/unicode/meson.build | 29 ++++++++++++------------ 4 files changed, 61 insertions(+), 16 deletions(-) diff --git a/contrib/unaccent/meson.build b/contrib/unaccent/meson.build index 7f582fc04a7..938d9522da3 100644 --- a/contrib/unaccent/meson.build +++ b/contrib/unaccent/meson.build @@ -28,7 +28,6 @@ install_data( install_dir: dir_data / 'tsearch_data' ) -# XXX: Implement downlo tests += { 'name': 'unaccent', 'sd': meson.current_source_dir(), @@ -39,3 +38,43 @@ tests += { ], }, } + + +# Download CLDR files on demand. + +cldr_baseurl = 'https://raw.githubusercontent.com/unicode-org/cldr/release-@0@/common/transforms/@1@' + +if not wget.found() or not cp.found() + subdir_done() +endif + +foreach f : ['Latin-ASCII.xml'] + # XXX could use .replace when we require meson 0.58 + cldr_version_dashed = '-'.join(CLDR_VERSION.split('.')) + url = cldr_baseurl.format(cldr_version_dashed, f) + target = custom_target(f, + output: f, + command: [wget, wget_flags, url], + build_by_default: false, + ) + unicode_data += {f: target} +endforeach + +unaccent_update_unicode_targets = \ + custom_target('unaccent.rules', + input: [unicode_data['UnicodeData.txt'], unicode_data['Latin-ASCII.xml']], + output: ['unaccent.rules'], + command: [python, files('generate_unaccent_rules.py'), '--unicode-data-file', '@INPUT0@', '--latin-ascii-file', '@INPUT1@'], + build_by_default: false, + capture: true, + ) + +update_unicode_unaccent = custom_target('update-unicode', + output: ['dont-exist'], + input: unaccent_update_unicode_targets, + command: [cp, '@INPUT@', '@SOURCE_ROOT@/contrib/unaccent/'], + build_by_default: false, + build_always_stale: true, +) + +update_unicode_targets += update_unicode_unaccent diff --git a/meson.build b/meson.build index 10cdd26cc2a..c6e7cae5055 100644 --- a/meson.build +++ b/meson.build @@ -3263,6 +3263,7 @@ pl_targets = [] contrib_targets = [] testprep_targets = [] nls_targets = [] +update_unicode_targets = [] # Define the tests to distribute them to the correct test styles later @@ -4051,6 +4052,10 @@ alias_target('testprep', testprep_targets) alias_target('world', all_built, docs) alias_target('install-world', install_quiet, installdocs) +if update_unicode_targets.length() > 0 + alias_target('update-unicode', update_unicode_targets) +endif + run_target('help', command: [ perl, '-ne', 'next if /^#/; print', diff --git a/src/common/unicode/README b/src/common/unicode/README index 4974c3c885d..fdb8b66fa58 100644 --- a/src/common/unicode/README +++ b/src/common/unicode/README @@ -10,7 +10,7 @@ Update Unicode Version ---------------------- Edit src/Makefile.global.in and src/common/unicode/meson.build -to update the UNICODE_VERSION. +to update the UNICODE_VERSION and the CLDR_VERSION. Then, generate the new header files with: diff --git a/src/common/unicode/meson.build b/src/common/unicode/meson.build index f650dd95b5c..94d02f0070d 100644 --- a/src/common/unicode/meson.build +++ b/src/common/unicode/meson.build @@ -1,6 +1,7 @@ # Copyright (c) 2022-2026, PostgreSQL Global Development Group UNICODE_VERSION = '16.0.0' +CLDR_VERSION = '47' unicode_data = {} unicode_baseurl = 'https://www.unicode.org/Public/@0@/ucd/@1@' @@ -22,9 +23,9 @@ foreach f : ['CompositionExclusions.txt', 'CaseFolding.txt', 'DerivedCorePropert endforeach -update_unicode_targets = [] +update_unicode_common_targets = [] -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_case_table.h', input: [unicode_data['CaseFolding.txt'], unicode_data['SpecialCasing.txt'], unicode_data['UnicodeData.txt']], output: ['unicode_case_table.h'], @@ -34,7 +35,7 @@ update_unicode_targets += \ build_by_default: false, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_category_table.h', input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']], output: ['unicode_category_table.h'], @@ -44,7 +45,7 @@ update_unicode_targets += \ build_by_default: false, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_east_asian_fw_table.h', input: [unicode_data['EastAsianWidth.txt']], output: ['unicode_east_asian_fw_table.h'], @@ -53,7 +54,7 @@ update_unicode_targets += \ capture: true, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_nonspacing_table.h', input: [unicode_data['UnicodeData.txt']], output: ['unicode_nonspacing_table.h'], @@ -63,7 +64,7 @@ update_unicode_targets += \ capture: true, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_norm_table.h', input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']], output: ['unicode_norm_table.h', 'unicode_norm_hashfunc.h'], @@ -74,7 +75,7 @@ update_unicode_targets += \ build_by_default: false, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_normprops_table.h', input: [unicode_data['DerivedNormalizationProps.txt']], output: ['unicode_normprops_table.h'], @@ -84,7 +85,7 @@ update_unicode_targets += \ capture: true, ) -update_unicode_targets += \ +update_unicode_common_targets += \ custom_target('unicode_version.h', output: ['unicode_version.h'], command: [ @@ -140,7 +141,7 @@ update_unicode_dep = [] if not meson.is_cross_build() update_unicode_dep += custom_target('case_test.run', output: 'case_test.run', - input: update_unicode_targets, + input: update_unicode_common_targets, command: [case_test, UNICODE_VERSION], build_by_default: false, build_always_stale: true, @@ -150,7 +151,7 @@ endif if not meson.is_cross_build() update_unicode_dep += custom_target('category_test.run', output: 'category_test.run', - input: update_unicode_targets, + input: update_unicode_common_targets, command: [category_test, UNICODE_VERSION], build_by_default: false, build_always_stale: true, @@ -160,7 +161,7 @@ endif if not meson.is_cross_build() update_unicode_dep += custom_target('norm_test.run', output: 'norm_test.run', - input: update_unicode_targets, + input: update_unicode_common_targets, command: [norm_test], build_by_default: false, build_always_stale: true, @@ -170,13 +171,13 @@ endif # Use a custom target, as run targets serialize the output, making this harder # to debug, and don't deal well with targets with multiple outputs. -update_unicode = custom_target('update-unicode', +update_unicode_common = custom_target('update-unicode', depends: update_unicode_dep, output: ['dont-exist'], - input: update_unicode_targets, + input: update_unicode_common_targets, command: [cp, '@INPUT@', '@SOURCE_ROOT@/src/include/common/'], build_by_default: false, build_always_stale: true, ) -alias_target('update-unicode', update_unicode) +update_unicode_targets += update_unicode_common