Remove uses of popcount builtins.

This commit replaces the implementations of pg_popcount{32,64} with
branchless ones in plain C.  While these new implementations do not
make use of more sophisticated population count instructions
available on some CPUs, testing indicates they perform well,
especially now that they are inlined.  Newer versions of popular
compilers will automatically replace these with special
instructions if possible, anyway.  A follow-up commit will replace
various loops over these functions with calls to pg_popcount(),
leaving us little reason to worry about micro-optimizing them
further.

Since this commit removes the only uses of the popcount builtins,
we can also remove the corresponding configuration checks.

Suggested-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com
This commit is contained in:
Nathan Bossart 2026-02-23 09:26:00 -06:00
parent b9278871f9
commit eb9ab7e093
6 changed files with 22 additions and 80 deletions

38
configure vendored
View file

@ -15939,44 +15939,6 @@ cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_CTZ 1
_ACEOF
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
$as_echo_n "checking for __builtin_popcount... " >&6; }
if ${pgac_cv__builtin_popcount+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_popcount(unsigned int x)
{
return __builtin_popcount(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_popcount=yes
else
pgac_cv__builtin_popcount=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
$as_echo "$pgac_cv__builtin_popcount" >&6; }
if test x"${pgac_cv__builtin_popcount}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_POPCOUNT 1
_ACEOF
fi
# __builtin_frame_address may draw a diagnostic for non-constant argument,
# so it needs a different test function.

View file

@ -1865,7 +1865,6 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap64], [long int x])
# We assume that we needn't test all widths of these explicitly:
PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
# __builtin_frame_address may draw a diagnostic for non-constant argument,
# so it needs a different test function.
PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0])

View file

@ -2006,7 +2006,6 @@ builtins = [
'ctz',
'constant_p',
'frame_address',
'popcount',
'unreachable',
]

View file

@ -526,9 +526,6 @@
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW
/* Define to 1 if your compiler understands __builtin_popcount. */
#undef HAVE__BUILTIN_POPCOUNT
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P

View file

@ -297,51 +297,41 @@ extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mas
/*
* pg_popcount32
* Return the number of 1 bits set in word
*
* Adapted from
* https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
*
* Note that newer versions of popular compilers will automatically replace
* this with a special popcount instruction if possible, so we don't bother
* using builtin functions or intrinsics.
*/
static inline int
pg_popcount32(uint32 word)
{
#ifdef HAVE__BUILTIN_POPCOUNT
return __builtin_popcount(word);
#else /* !HAVE__BUILTIN_POPCOUNT */
int result = 0;
while (word != 0)
{
result += pg_number_of_ones[word & 255];
word >>= 8;
}
return result;
#endif /* HAVE__BUILTIN_POPCOUNT */
word -= (word >> 1) & 0x55555555;
word = (word & 0x33333333) + ((word >> 2) & 0x33333333);
return (((word + (word >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
}
/*
* pg_popcount64
* Return the number of 1 bits set in word
*
* Adapted from
* https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
*
* Note that newer versions of popular compilers will automatically replace
* this with a special popcount instruction if possible, so we don't bother
* using builtin functions or intrinsics.
*/
static inline int
pg_popcount64(uint64 word)
{
#ifdef HAVE__BUILTIN_POPCOUNT
#if SIZEOF_LONG == 8
return __builtin_popcountl(word);
#elif SIZEOF_LONG_LONG == 8
return __builtin_popcountll(word);
#else
#error "cannot find integer of the same size as uint64_t"
#endif
#else /* !HAVE__BUILTIN_POPCOUNT */
int result = 0;
while (word != 0)
{
result += pg_number_of_ones[word & 255];
word >>= 8;
}
return result;
#endif /* HAVE__BUILTIN_POPCOUNT */
word -= (word >> 1) & UINT64CONST(0x5555555555555555);
word = (word & UINT64CONST(0x3333333333333333)) +
((word >> 2) & UINT64CONST(0x3333333333333333));
word = (word + (word >> 4)) & UINT64CONST(0xf0f0f0f0f0f0f0f);
return (word * UINT64CONST(0x101010101010101)) >> 56;
}
/*

View file

@ -298,11 +298,6 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
static inline int
pg_popcount64_neon(uint64 word)
{
/*
* For some compilers, __builtin_popcountl() already emits Neon
* instructions. The line below should compile to the same code on those
* systems.
*/
return vaddv_u8(vcnt_u8(vld1_u8((const uint8 *) &word)));
}