From 4352999e0e6cd2d971642274202f03e50ef9c4c2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 15 Nov 2016 09:43:26 +0000 Subject: [PATCH] Pass CPUID[1] %edx (cpu_feature), %ecx (cpu_feature2) and CPUID[7].%ebx (cpu_stdext_feature), %ecx (cpu_stdext_feature2) to the ifunc resolvers on x86. It is much more clean to use CPUID instruction in usermode to retrieve this information than to pass AT_HWCAP aux vector from kernel, on x86. Still, the change does allow for use of AT_HWCAP on arches where it is needed, by passing aux array to ifunc_init() initializer which should prepare arguments for ifunc resolvers. Current signature for resolvers on x86 is func_t iresolve(uint32_t cpu_feature, uint32_t cpu_feature2, uint32_t cpu_stdext_feature, uint32_t cpu_stdext_feature2); where arguments have identical meaning as the kernel variables of the same name. The ABIs allow to use resolvers with the void or shortened list of arguments. Reviewed by: jhb Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D8448 --- libexec/rtld-elf/aarch64/reloc.c | 5 +++ libexec/rtld-elf/aarch64/rtld_machdep.h | 5 ++- libexec/rtld-elf/amd64/reloc.c | 22 +++++++++++- libexec/rtld-elf/amd64/rtld_machdep.h | 8 +++++ libexec/rtld-elf/arm/reloc.c | 5 +++ libexec/rtld-elf/arm/rtld_machdep.h | 3 ++ libexec/rtld-elf/i386/reloc.c | 42 ++++++++++++++++++++++- libexec/rtld-elf/i386/rtld_machdep.h | 8 +++++ libexec/rtld-elf/mips/reloc.c | 5 +++ libexec/rtld-elf/mips/rtld_machdep.h | 3 ++ libexec/rtld-elf/powerpc/reloc.c | 5 +++ libexec/rtld-elf/powerpc/rtld_machdep.h | 3 ++ libexec/rtld-elf/powerpc64/reloc.c | 5 +++ libexec/rtld-elf/powerpc64/rtld_machdep.h | 3 ++ libexec/rtld-elf/riscv/reloc.c | 5 +++ libexec/rtld-elf/riscv/rtld_machdep.h | 3 ++ libexec/rtld-elf/rtld.c | 3 +- libexec/rtld-elf/rtld.h | 1 + libexec/rtld-elf/sparc64/reloc.c | 5 +++ libexec/rtld-elf/sparc64/rtld_machdep.h | 5 ++- 20 files changed, 139 insertions(+), 5 deletions(-) diff --git a/libexec/rtld-elf/aarch64/reloc.c b/libexec/rtld-elf/aarch64/reloc.c index de7ffd59482..cff813ac805 100644 --- a/libexec/rtld-elf/aarch64/reloc.c +++ b/libexec/rtld-elf/aarch64/reloc.c @@ -299,6 +299,11 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *defobj, return target; } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + /* * Process non-PLT relocations */ diff --git a/libexec/rtld-elf/aarch64/rtld_machdep.h b/libexec/rtld-elf/aarch64/rtld_machdep.h index a2fc74af73e..46101432567 100644 --- a/libexec/rtld-elf/aarch64/rtld_machdep.h +++ b/libexec/rtld-elf/aarch64/rtld_machdep.h @@ -61,7 +61,10 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) -#define round(size, align) \ +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + +#define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(16, align) diff --git a/libexec/rtld-elf/amd64/reloc.c b/libexec/rtld-elf/amd64/reloc.c index 80a3c35d659..43e3ce969c5 100644 --- a/libexec/rtld-elf/amd64/reloc.c +++ b/libexec/rtld-elf/amd64/reloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -406,7 +407,7 @@ reloc_iresolve(Obj_Entry *obj, RtldLockState *lockstate) ptr = (Elf_Addr *)(obj->relocbase + rela->r_addend); where = (Elf_Addr *)(obj->relocbase + rela->r_offset); lock_release(rtld_bind_lock, lockstate); - target = ((Elf_Addr (*)(void))ptr)(); + target = call_ifunc_resolver(ptr); wlock_acquire(rtld_bind_lock, lockstate); *where = target; break; @@ -450,6 +451,25 @@ reloc_gnu_ifunc(Obj_Entry *obj, int flags, RtldLockState *lockstate) return (0); } +uint32_t cpu_feature, cpu_feature2, cpu_stdext_feature, cpu_stdext_feature2; + +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ + u_int p[4], cpu_high; + + do_cpuid(1, p); + cpu_feature = p[3]; + cpu_feature2 = p[2]; + do_cpuid(0, p); + cpu_high = p[0]; + if (cpu_high >= 7) { + cpuid_count(7, 0, p); + cpu_stdext_feature = p[1]; + cpu_stdext_feature2 = p[2]; + } +} + void allocate_initial_tls(Obj_Entry *objs) { diff --git a/libexec/rtld-elf/amd64/rtld_machdep.h b/libexec/rtld-elf/amd64/rtld_machdep.h index a8696feb37a..df6fc339132 100644 --- a/libexec/rtld-elf/amd64/rtld_machdep.h +++ b/libexec/rtld-elf/amd64/rtld_machdep.h @@ -61,6 +61,14 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +extern uint32_t cpu_feature; +extern uint32_t cpu_feature2; +extern uint32_t cpu_stdext_feature; +extern uint32_t cpu_stdext_feature2; +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(uint32_t, uint32_t, uint32_t, uint32_t))ptr)( \ + cpu_feature, cpu_feature2, cpu_stdext_feature, cpu_stdext_feature2)) + #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ diff --git a/libexec/rtld-elf/arm/reloc.c b/libexec/rtld-elf/arm/reloc.c index b6d0babfc87..94a9bf0e021 100644 --- a/libexec/rtld-elf/arm/reloc.c +++ b/libexec/rtld-elf/arm/reloc.c @@ -479,6 +479,11 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *defobj, return target; } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + void allocate_initial_tls(Obj_Entry *objs) { diff --git a/libexec/rtld-elf/arm/rtld_machdep.h b/libexec/rtld-elf/arm/rtld_machdep.h index c61bce0cdd2..2c1a2b80c89 100644 --- a/libexec/rtld-elf/arm/rtld_machdep.h +++ b/libexec/rtld-elf/arm/rtld_machdep.h @@ -51,6 +51,9 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + #define TLS_TCB_SIZE 8 typedef struct { unsigned long ti_module; diff --git a/libexec/rtld-elf/i386/reloc.c b/libexec/rtld-elf/i386/reloc.c index 2d6021c9779..8a405fbccb6 100644 --- a/libexec/rtld-elf/i386/reloc.c +++ b/libexec/rtld-elf/i386/reloc.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -359,7 +360,7 @@ reloc_iresolve(Obj_Entry *obj, RtldLockState *lockstate) case R_386_IRELATIVE: where = (Elf_Addr *)(obj->relocbase + rel->r_offset); lock_release(rtld_bind_lock, lockstate); - target = ((Elf_Addr (*)(void))(obj->relocbase + *where))(); + target = call_ifunc_resolver(obj->relocbase + *where); wlock_acquire(rtld_bind_lock, lockstate); *where = target; break; @@ -404,6 +405,45 @@ reloc_gnu_ifunc(Obj_Entry *obj, int flags, RtldLockState *lockstate) return (0); } +uint32_t cpu_feature, cpu_feature2, cpu_stdext_feature, cpu_stdext_feature2; + +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ + u_int p[4], cpu_high; + int cpuid_supported; + + __asm __volatile( + " pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " xorl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + " jmp 2f\n" + "1: movl $0,%0\n" + "2:\n" + : "=r" (cpuid_supported) : : "eax", "ecx"); + if (!cpuid_supported) + return; + + do_cpuid(1, p); + cpu_feature = p[3]; + cpu_feature2 = p[2]; + do_cpuid(0, p); + cpu_high = p[0]; + if (cpu_high >= 7) { + cpuid_count(7, 0, p); + cpu_stdext_feature = p[1]; + cpu_stdext_feature2 = p[2]; + } +} + void allocate_initial_tls(Obj_Entry *objs) { diff --git a/libexec/rtld-elf/i386/rtld_machdep.h b/libexec/rtld-elf/i386/rtld_machdep.h index 5237d4fce04..1ffdca411c7 100644 --- a/libexec/rtld-elf/i386/rtld_machdep.h +++ b/libexec/rtld-elf/i386/rtld_machdep.h @@ -61,6 +61,14 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +extern uint32_t cpu_feature; +extern uint32_t cpu_feature2; +extern uint32_t cpu_stdext_feature; +extern uint32_t cpu_stdext_feature2; +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(uint32_t, uint32_t, uint32_t, uint32_t))ptr)( \ + cpu_feature, cpu_feature2, cpu_stdext_feature, cpu_stdext_feature2)) + #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ diff --git a/libexec/rtld-elf/mips/reloc.c b/libexec/rtld-elf/mips/reloc.c index 0f4a5d66bd1..4f3d5c450d1 100644 --- a/libexec/rtld-elf/mips/reloc.c +++ b/libexec/rtld-elf/mips/reloc.c @@ -617,6 +617,11 @@ reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *defobj, return target; } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + void allocate_initial_tls(Obj_Entry *objs) { diff --git a/libexec/rtld-elf/mips/rtld_machdep.h b/libexec/rtld-elf/mips/rtld_machdep.h index 34e8f3c591f..484af1b4133 100644 --- a/libexec/rtld-elf/mips/rtld_machdep.h +++ b/libexec/rtld-elf/mips/rtld_machdep.h @@ -52,6 +52,9 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + typedef struct { unsigned long ti_module; unsigned long ti_offset; diff --git a/libexec/rtld-elf/powerpc/reloc.c b/libexec/rtld-elf/powerpc/reloc.c index 79d41f21fb6..e76da987f21 100644 --- a/libexec/rtld-elf/powerpc/reloc.c +++ b/libexec/rtld-elf/powerpc/reloc.c @@ -619,6 +619,11 @@ init_pltgot(Obj_Entry *obj) */ } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + void allocate_initial_tls(Obj_Entry *list) { diff --git a/libexec/rtld-elf/powerpc/rtld_machdep.h b/libexec/rtld-elf/powerpc/rtld_machdep.h index 3e39c8231c0..92bbf9a3de0 100644 --- a/libexec/rtld-elf/powerpc/rtld_machdep.h +++ b/libexec/rtld-elf/powerpc/rtld_machdep.h @@ -51,6 +51,9 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + /* * Lazy binding entry point, called via PLT. */ diff --git a/libexec/rtld-elf/powerpc64/reloc.c b/libexec/rtld-elf/powerpc64/reloc.c index 3db5fd25441..6a09185b73a 100644 --- a/libexec/rtld-elf/powerpc64/reloc.c +++ b/libexec/rtld-elf/powerpc64/reloc.c @@ -523,6 +523,11 @@ init_pltgot(Obj_Entry *obj) #endif } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + void allocate_initial_tls(Obj_Entry *list) { diff --git a/libexec/rtld-elf/powerpc64/rtld_machdep.h b/libexec/rtld-elf/powerpc64/rtld_machdep.h index 32c7d12d449..90253d70d92 100644 --- a/libexec/rtld-elf/powerpc64/rtld_machdep.h +++ b/libexec/rtld-elf/powerpc64/rtld_machdep.h @@ -51,6 +51,9 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + /* * Lazy binding entry point, called via PLT. */ diff --git a/libexec/rtld-elf/riscv/reloc.c b/libexec/rtld-elf/riscv/reloc.c index 92c4276df08..5da0bd17566 100644 --- a/libexec/rtld-elf/riscv/reloc.c +++ b/libexec/rtld-elf/riscv/reloc.c @@ -366,6 +366,11 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, return (0); } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + void allocate_initial_tls(Obj_Entry *objs) { diff --git a/libexec/rtld-elf/riscv/rtld_machdep.h b/libexec/rtld-elf/riscv/rtld_machdep.h index 660787f5d16..46387e7bcdd 100644 --- a/libexec/rtld-elf/riscv/rtld_machdep.h +++ b/libexec/rtld-elf/riscv/rtld_machdep.h @@ -78,6 +78,9 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, __asm __volatile("mv gp, %0" :: "r"(old1)); \ }) +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + /* * Lazy binding entry point, called via PLT. */ diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 6c8aff44bf9..874f80bdd35 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -642,6 +642,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */ map_stacks_exec(NULL); + ifunc_init(aux); dbg("resolving ifuncs"); if (resolve_objects_ifunc(obj_main, @@ -690,7 +691,7 @@ rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def) Elf_Addr target; ptr = (void *)make_function_pointer(def, obj); - target = ((Elf_Addr (*)(void))ptr)(); + target = call_ifunc_resolver(ptr); return ((void *)target); } diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index 8b7024b841a..99b56c12959 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -367,6 +367,7 @@ void dump_Elf_Rela(Obj_Entry *, const Elf_Rela *, u_long); unsigned long elf_hash(const char *); const Elf_Sym *find_symdef(unsigned long, const Obj_Entry *, const Obj_Entry **, int, SymCache *, struct Struct_RtldLockState *); +void ifunc_init(Elf_Auxinfo[static AT_COUNT]); void init_pltgot(Obj_Entry *); void lockdflt_init(void); void digest_notes(Obj_Entry *, Elf_Addr, Elf_Addr); diff --git a/libexec/rtld-elf/sparc64/reloc.c b/libexec/rtld-elf/sparc64/reloc.c index f8534e8712f..981eea502b8 100644 --- a/libexec/rtld-elf/sparc64/reloc.c +++ b/libexec/rtld-elf/sparc64/reloc.c @@ -786,6 +786,11 @@ reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, return (target); } +void +ifunc_init(Elf_Auxinfo aux_info[static AT_COUNT] __unused) +{ +} + /* * Install rtld function call into this PLT slot. */ diff --git a/libexec/rtld-elf/sparc64/rtld_machdep.h b/libexec/rtld-elf/sparc64/rtld_machdep.h index 9df63b8bb1f..b53f79795e1 100644 --- a/libexec/rtld-elf/sparc64/rtld_machdep.h +++ b/libexec/rtld-elf/sparc64/rtld_machdep.h @@ -53,7 +53,10 @@ Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) -#define round(size, align) \ +#define call_ifunc_resolver(ptr) \ + (((Elf_Addr (*)(void))ptr)()) + +#define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(size, align)