mirror of
https://github.com/postgres/postgres.git
synced 2026-03-22 18:33:19 -04:00
Remove uses of popcount builtins.
This commit replaces the implementations of pg_popcount{32,64} with
branchless ones in plain C. While these new implementations do not
make use of more sophisticated population count instructions
available on some CPUs, testing indicates they perform well,
especially now that they are inlined. Newer versions of popular
compilers will automatically replace these with special
instructions if possible, anyway. A follow-up commit will replace
various loops over these functions with calls to pg_popcount(),
leaving us little reason to worry about micro-optimizing them
further.
Since this commit removes the only uses of the popcount builtins,
we can also remove the corresponding configuration checks.
Suggested-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com
This commit is contained in:
parent
b9278871f9
commit
eb9ab7e093
6 changed files with 22 additions and 80 deletions
38
configure
vendored
38
configure
vendored
|
|
@ -15939,44 +15939,6 @@ cat >>confdefs.h <<_ACEOF
|
|||
#define HAVE__BUILTIN_CTZ 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
|
||||
$as_echo_n "checking for __builtin_popcount... " >&6; }
|
||||
if ${pgac_cv__builtin_popcount+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
int
|
||||
call__builtin_popcount(unsigned int x)
|
||||
{
|
||||
return __builtin_popcount(x);
|
||||
}
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
pgac_cv__builtin_popcount=yes
|
||||
else
|
||||
pgac_cv__builtin_popcount=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
|
||||
$as_echo "$pgac_cv__builtin_popcount" >&6; }
|
||||
if test x"${pgac_cv__builtin_popcount}" = xyes ; then
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define HAVE__BUILTIN_POPCOUNT 1
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
# __builtin_frame_address may draw a diagnostic for non-constant argument,
|
||||
# so it needs a different test function.
|
||||
|
|
|
|||
|
|
@ -1865,7 +1865,6 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_bswap64], [long int x])
|
|||
# We assume that we needn't test all widths of these explicitly:
|
||||
PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
|
||||
PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x])
|
||||
PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
|
||||
# __builtin_frame_address may draw a diagnostic for non-constant argument,
|
||||
# so it needs a different test function.
|
||||
PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0])
|
||||
|
|
|
|||
|
|
@ -2006,7 +2006,6 @@ builtins = [
|
|||
'ctz',
|
||||
'constant_p',
|
||||
'frame_address',
|
||||
'popcount',
|
||||
'unreachable',
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -526,9 +526,6 @@
|
|||
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
|
||||
#undef HAVE__BUILTIN_OP_OVERFLOW
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_popcount. */
|
||||
#undef HAVE__BUILTIN_POPCOUNT
|
||||
|
||||
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
|
||||
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
|
||||
|
||||
|
|
|
|||
|
|
@ -297,51 +297,41 @@ extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mas
|
|||
/*
|
||||
* pg_popcount32
|
||||
* Return the number of 1 bits set in word
|
||||
*
|
||||
* Adapted from
|
||||
* https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
|
||||
*
|
||||
* Note that newer versions of popular compilers will automatically replace
|
||||
* this with a special popcount instruction if possible, so we don't bother
|
||||
* using builtin functions or intrinsics.
|
||||
*/
|
||||
static inline int
|
||||
pg_popcount32(uint32 word)
|
||||
{
|
||||
#ifdef HAVE__BUILTIN_POPCOUNT
|
||||
return __builtin_popcount(word);
|
||||
#else /* !HAVE__BUILTIN_POPCOUNT */
|
||||
int result = 0;
|
||||
|
||||
while (word != 0)
|
||||
{
|
||||
result += pg_number_of_ones[word & 255];
|
||||
word >>= 8;
|
||||
}
|
||||
|
||||
return result;
|
||||
#endif /* HAVE__BUILTIN_POPCOUNT */
|
||||
word -= (word >> 1) & 0x55555555;
|
||||
word = (word & 0x33333333) + ((word >> 2) & 0x33333333);
|
||||
return (((word + (word >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_popcount64
|
||||
* Return the number of 1 bits set in word
|
||||
*
|
||||
* Adapted from
|
||||
* https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
|
||||
*
|
||||
* Note that newer versions of popular compilers will automatically replace
|
||||
* this with a special popcount instruction if possible, so we don't bother
|
||||
* using builtin functions or intrinsics.
|
||||
*/
|
||||
static inline int
|
||||
pg_popcount64(uint64 word)
|
||||
{
|
||||
#ifdef HAVE__BUILTIN_POPCOUNT
|
||||
#if SIZEOF_LONG == 8
|
||||
return __builtin_popcountl(word);
|
||||
#elif SIZEOF_LONG_LONG == 8
|
||||
return __builtin_popcountll(word);
|
||||
#else
|
||||
#error "cannot find integer of the same size as uint64_t"
|
||||
#endif
|
||||
#else /* !HAVE__BUILTIN_POPCOUNT */
|
||||
int result = 0;
|
||||
|
||||
while (word != 0)
|
||||
{
|
||||
result += pg_number_of_ones[word & 255];
|
||||
word >>= 8;
|
||||
}
|
||||
|
||||
return result;
|
||||
#endif /* HAVE__BUILTIN_POPCOUNT */
|
||||
word -= (word >> 1) & UINT64CONST(0x5555555555555555);
|
||||
word = (word & UINT64CONST(0x3333333333333333)) +
|
||||
((word >> 2) & UINT64CONST(0x3333333333333333));
|
||||
word = (word + (word >> 4)) & UINT64CONST(0xf0f0f0f0f0f0f0f);
|
||||
return (word * UINT64CONST(0x101010101010101)) >> 56;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -298,11 +298,6 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
|
|||
static inline int
|
||||
pg_popcount64_neon(uint64 word)
|
||||
{
|
||||
/*
|
||||
* For some compilers, __builtin_popcountl() already emits Neon
|
||||
* instructions. The line below should compile to the same code on those
|
||||
* systems.
|
||||
*/
|
||||
return vaddv_u8(vcnt_u8(vld1_u8((const uint8 *) &word)));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue