diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:41:08 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:41:08 -0800 |
commit | 1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f (patch) | |
tree | a5dabaa924d50867cbe347e20a7643b2850f11c0 /tools/testing/selftests/rseq/rseq.c | |
parent | Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel... (diff) | |
parent | sched/rt: pick_next_rt_entity(): check list_entry (diff) | |
download | linux-1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f.tar.gz linux-1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f.tar.bz2 linux-1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f.zip |
Merge tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Improve the scalability of the CFS bandwidth unthrottling logic with
large number of CPUs.
- Fix & rework various cpuidle routines, simplify interaction with the
generic scheduler code. Add __cpuidle methods as noinstr to objtool's
noinstr detection and fix boatloads of cpuidle bugs & quirks.
- Add new ABI: introduce MEMBARRIER_CMD_GET_REGISTRATIONS, to query
previously issued registrations.
- Limit scheduler slice duration to the sysctl_sched_latency period, to
improve scheduling granularity with a large number of SCHED_IDLE
tasks.
- Debuggability enhancement on sys_exit(): warn about disabled IRQs,
but also enable them to prevent a cascade of followup problems and
repeat warnings.
- Fix the rescheduling logic in prio_changed_dl().
- Micro-optimize cpufreq and sched-util methods.
- Micro-optimize ttwu_runnable()
- Micro-optimize the idle-scanning in update_numa_stats(),
select_idle_capacity() and steal_cookie_task().
- Update the RSEQ code & self-tests
- Constify various scheduler methods
- Remove unused methods
- Refine __init tags
- Documentation updates
- Misc other cleanups, fixes
* tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (110 commits)
sched/rt: pick_next_rt_entity(): check list_entry
sched/deadline: Add more reschedule cases to prio_changed_dl()
sched/fair: sanitize vruntime of entity being placed
sched/fair: Remove capacity inversion detection
sched/fair: unlink misfit task from cpu overutilized
objtool: mem*() are not uaccess safe
cpuidle: Fix poll_idle() noinstr annotation
sched/clock: Make local_clock() noinstr
sched/clock/x86: Mark sched_clock() noinstr
x86/pvclock: Improve atomic update of last_value in pvclock_clocksource_read()
x86/atomics: Always inline arch_atomic64*()
cpuidle: tracing, preempt: Squash _rcuidle tracing
cpuidle: tracing: Warn about !rcu_is_watching()
cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG
cpuidle: drivers: firmware: psci: Dont instrument suspend code
KVM: selftests: Fix build of rseq test
exit: Detect and fix irq disabled state in oops
cpuidle, arm64: Fix the ARM64 cpuidle logic
cpuidle: mvebu: Fix duplicate flags assignment
sched/fair: Limit sched slice duration
...
Diffstat (limited to 'tools/testing/selftests/rseq/rseq.c')
-rw-r--r-- | tools/testing/selftests/rseq/rseq.c | 91 |
1 files changed, 81 insertions, 10 deletions
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 4177f9507bbe..4e4aa006004c 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -28,6 +28,8 @@ #include <limits.h> #include <dlfcn.h> #include <stddef.h> +#include <sys/auxv.h> +#include <linux/auxvec.h> #include "../kselftest.h" #include "rseq.h" @@ -36,20 +38,38 @@ static const ptrdiff_t *libc_rseq_offset_p; static const unsigned int *libc_rseq_size_p; static const unsigned int *libc_rseq_flags_p; -/* Offset from the thread pointer to the rseq area. */ +/* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; -/* Size of the registered rseq area. 0 if the registration was - unsuccessful. */ +/* + * Size of the registered rseq area. 0 if the registration was + * unsuccessful. + */ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; +/* + * rseq feature size supported by the kernel. 0 if the registration was + * unsuccessful. + */ +unsigned int rseq_feature_size = -1U; + static int rseq_ownership; +static int rseq_reg_success; /* At least one rseq registration has succeded. */ + +/* Allocate a large area for the TLS. */ +#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 + +/* Original struct rseq feature size is 20 bytes. */ +#define ORIG_RSEQ_FEATURE_SIZE 20 + +/* Original struct rseq allocation size is 32 bytes. */ +#define ORIG_RSEQ_ALLOC_SIZE 32 static -__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }; @@ -59,6 +79,11 @@ static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } +static int sys_getcpu(unsigned *cpu, unsigned *node) +{ + return syscall(__NR_getcpu, cpu, node, NULL); +} + int rseq_available(void) { int rc; @@ -84,10 +109,16 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); - if (rc) + rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + if (rc) { + if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* Incoherent success/failure within process. */ + abort(); + } return -1; + } assert(rseq_current_cpu_raw() >= 0); + RSEQ_WRITE_ONCE(rseq_reg_success, 1); return 0; } @@ -99,12 +130,28 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } +static +unsigned int get_rseq_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + static __attribute__((constructor)) void rseq_init(void) { @@ -117,14 +164,24 @@ void rseq_init(void) rseq_offset = *libc_rseq_offset_p; rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; + rseq_feature_size = get_rseq_feature_size(); + if (rseq_feature_size > rseq_size) + rseq_feature_size = rseq_size; return; } - if (!rseq_available()) - return; rseq_ownership = 1; + if (!rseq_available()) { + rseq_size = 0; + rseq_feature_size = 0; + return; + } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); - rseq_size = sizeof(struct rseq_abi); rseq_flags = 0; + rseq_feature_size = get_rseq_feature_size(); + if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -134,6 +191,7 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; + rseq_feature_size = -1U; rseq_ownership = 0; } @@ -148,3 +206,16 @@ int32_t rseq_fallback_current_cpu(void) } return cpu; } + +int32_t rseq_fallback_current_node(void) +{ + uint32_t cpu_id, node_id; + int ret; + + ret = sys_getcpu(&cpu_id, &node_id); + if (ret) { + perror("sys_getcpu()"); + return ret; + } + return (int32_t) node_id; +} |