From eb9ab7e0930aef9fe1da4b71d0979ebe5361ee48 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 23 Feb 2026 09:26:00 -0600 Subject: [PATCH] Remove uses of popcount builtins. This commit replaces the implementations of pg_popcount{32,64} with branchless ones in plain C. While these new implementations do not make use of more sophisticated population count instructions available on some CPUs, testing indicates they perform well, especially now that they are inlined. Newer versions of popular compilers will automatically replace these with special instructions if possible, anyway. A follow-up commit will replace various loops over these functions with calls to pg_popcount(), leaving us little reason to worry about micro-optimizing them further. Since this commit removes the only uses of the popcount builtins, we can also remove the corresponding configuration checks. Suggested-by: John Naylor Reviewed-by: John Naylor Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com --- configure | 38 ------------------------ configure.ac | 1 - meson.build | 1 - src/include/pg_config.h.in | 3 -- src/include/port/pg_bitutils.h | 54 ++++++++++++++-------------------- src/port/pg_popcount_aarch64.c | 5 ---- 6 files changed, 22 insertions(+), 80 deletions(-) diff --git a/configure b/configure index 7e6d9cb2139..8f35310cd08 100755 --- a/configure +++ b/configure @@ -15939,44 +15939,6 @@ cat >>confdefs.h <<_ACEOF #define HAVE__BUILTIN_CTZ 1 _ACEOF -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 -$as_echo_n "checking for __builtin_popcount... " >&6; } -if ${pgac_cv__builtin_popcount+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -call__builtin_popcount(unsigned int x) -{ - return __builtin_popcount(x); -} -int -main () -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - pgac_cv__builtin_popcount=yes -else - pgac_cv__builtin_popcount=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5 -$as_echo "$pgac_cv__builtin_popcount" >&6; } -if test x"${pgac_cv__builtin_popcount}" = xyes ; then - -cat >>confdefs.h <<_ACEOF -#define HAVE__BUILTIN_POPCOUNT 1 -_ACEOF - fi # __builtin_frame_address may draw a diagnostic for non-constant argument, # so it needs a different test function. diff --git a/configure.ac b/configure.ac index f77ced6ddea..0bad016f01c 100644 --- a/configure.ac +++ b/configure.ac @@ -1865,7 +1865,6 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap64], [long int x]) # We assume that we needn't test all widths of these explicitly: PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x]) PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x]) -PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x]) # __builtin_frame_address may draw a diagnostic for non-constant argument, # so it needs a different test function. PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0]) diff --git a/meson.build b/meson.build index ebfb85e93e5..83d1f06b528 100644 --- a/meson.build +++ b/meson.build @@ -2006,7 +2006,6 @@ builtins = [ 'ctz', 'constant_p', 'frame_address', - 'popcount', 'unreachable', ] diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 3824a5571bb..af08c5a7eb8 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -526,9 +526,6 @@ /* Define to 1 if your compiler understands __builtin_$op_overflow. */ #undef HAVE__BUILTIN_OP_OVERFLOW -/* Define to 1 if your compiler understands __builtin_popcount. */ -#undef HAVE__BUILTIN_POPCOUNT - /* Define to 1 if your compiler understands __builtin_types_compatible_p. */ #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 789663edd93..0bca559caaa 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -297,51 +297,41 @@ extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mas /* * pg_popcount32 * Return the number of 1 bits set in word + * + * Adapted from + * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel. + * + * Note that newer versions of popular compilers will automatically replace + * this with a special popcount instruction if possible, so we don't bother + * using builtin functions or intrinsics. */ static inline int pg_popcount32(uint32 word) { -#ifdef HAVE__BUILTIN_POPCOUNT - return __builtin_popcount(word); -#else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; -#endif /* HAVE__BUILTIN_POPCOUNT */ + word -= (word >> 1) & 0x55555555; + word = (word & 0x33333333) + ((word >> 2) & 0x33333333); + return (((word + (word >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24; } /* * pg_popcount64 * Return the number of 1 bits set in word + * + * Adapted from + * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel. + * + * Note that newer versions of popular compilers will automatically replace + * this with a special popcount instruction if possible, so we don't bother + * using builtin functions or intrinsics. */ static inline int pg_popcount64(uint64 word) { -#ifdef HAVE__BUILTIN_POPCOUNT -#if SIZEOF_LONG == 8 - return __builtin_popcountl(word); -#elif SIZEOF_LONG_LONG == 8 - return __builtin_popcountll(word); -#else -#error "cannot find integer of the same size as uint64_t" -#endif -#else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; -#endif /* HAVE__BUILTIN_POPCOUNT */ + word -= (word >> 1) & UINT64CONST(0x5555555555555555); + word = (word & UINT64CONST(0x3333333333333333)) + + ((word >> 2) & UINT64CONST(0x3333333333333333)); + word = (word + (word >> 4)) & UINT64CONST(0xf0f0f0f0f0f0f0f); + return (word * UINT64CONST(0x101010101010101)) >> 56; } /* diff --git a/src/port/pg_popcount_aarch64.c b/src/port/pg_popcount_aarch64.c index f474ef45510..b0f10ae07a4 100644 --- a/src/port/pg_popcount_aarch64.c +++ b/src/port/pg_popcount_aarch64.c @@ -298,11 +298,6 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask) static inline int pg_popcount64_neon(uint64 word) { - /* - * For some compilers, __builtin_popcountl() already emits Neon - * instructions. The line below should compile to the same code on those - * systems. - */ return vaddv_u8(vcnt_u8(vld1_u8((const uint8 *) &word))); }