aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h13
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h21
-rw-r--r--tools/arch/x86/include/asm/msr-index.h4
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h11
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/perf/builtin-trace.c2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py34
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c27
-rw-r--r--tools/perf/util/bpf-loader.c18
-rw-r--r--tools/perf/util/symbol-elf.c56
-rw-r--r--tools/power/pm-graph/README6
-rwxr-xr-xtools/power/pm-graph/bootgraph.py20
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg2
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py518
-rw-r--r--tools/power/x86/turbostat/turbostat.8200
-rw-r--r--tools/power/x86/turbostat/turbostat.c240
-rw-r--r--tools/spi/spidev_test.c11
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c8
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c605
-rwxr-xr-xtools/testing/selftests/net/io_uring_zerocopy_tx.sh131
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h50
-rw-r--r--tools/testing/selftests/rseq/rseq.c3
-rw-r--r--tools/testing/selftests/safesetid/Makefile2
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c295
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c90
-rw-r--r--tools/thermal/tmon/pid.c2
-rw-r--r--tools/thermal/tmon/tmon.h3
-rw-r--r--tools/vm/slabinfo.c26
32 files changed, 1950 insertions, 458 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 03acc823838a..a77b915d36a8 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -203,8 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@@ -296,6 +296,13 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -316,6 +323,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@@ -447,5 +455,6 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 36369e76cc63..33d2cd04d254 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,25 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -82,7 +101,7 @@
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 2eab6a3a8a8c..cc615be27a54 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -95,6 +95,7 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
@@ -576,6 +577,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
index 0197042b7dfb..1ecdb911add8 100644
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
@@ -90,7 +91,7 @@
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
@@ -115,11 +116,13 @@
#define F_GETSIG 11 /* for sockets. */
#endif
+#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
+#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
@@ -178,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
@@ -185,6 +192,7 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
@@ -209,5 +217,6 @@ struct flock64 {
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 5088bd9f1922..860f867c50c0 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -2083,7 +2083,8 @@ struct kvm_stats_header {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 897fc504918b..f075cf37a65e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4280,6 +4280,7 @@ static int trace__replay(struct trace *trace)
goto out;
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
+ trace->syscalls.events.sys_enter = evsel;
/* older kernels have syscalls tp versus raw_syscalls */
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
@@ -4292,6 +4293,7 @@ static int trace__replay(struct trace *trace)
}
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
+ trace->syscalls.events.sys_exit = evsel;
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
if (evsel &&
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index 5f57d9829956..4339692a8d0b 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -61,7 +61,7 @@ def get_optional(perf_dict, field):
def get_offset(perf_dict, field):
if field in perf_dict:
- return f"+0x{perf_dict[field]:x}"
+ return "+%#x" % perf_dict[field]
return ""
def get_dso_file_path(dso_name, dso_build_id):
@@ -76,7 +76,7 @@ def get_dso_file_path(dso_name, dso_build_id):
else:
append = "/elf"
- dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append;
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
@@ -94,8 +94,8 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr):
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
- f"--start-address=0x{start_addr:x}",
- f"--stop-address=0x{stop_addr:x}" ]
+ "--start-address="+format(start_addr,"#x"),
+ "--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
@@ -109,12 +109,14 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr):
m = disasm_re.search(line)
if m is None:
continue
- print(f"\t{line}")
+ print("\t" + line)
def print_sample(sample):
- print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
- f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
- f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+ print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
+ "pid: %d tid: %d period: %d time: %d }" % \
+ (sample['cpu'], sample['addr'], sample['phys_addr'], \
+ sample['ip'], sample['pid'], sample['tid'], \
+ sample['period'], sample['time']))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
@@ -131,7 +133,7 @@ def common_start_str(comm, sample):
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+ return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns)
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
@@ -171,7 +173,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso):
glb_line_number = line_number
glb_source_file_name = source_file_name
- print(f"{start_str}{src_str}")
+ print(start_str, src_str)
def process_event(param_dict):
global cache_size
@@ -188,7 +190,7 @@ def process_event(param_dict):
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
- print(f"Event type: {name}")
+ print("Event type: %s" % name)
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
@@ -197,7 +199,7 @@ def process_event(param_dict):
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
- print(f"Failed to find valid dso map for dso {dso}")
+ print("Failed to find valid dso map for dso %s" % dso)
return
if (name[0:12] == "instructions"):
@@ -244,15 +246,15 @@ def process_event(param_dict):
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
- print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
- print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
- print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
if (options.objdump_name != None):
@@ -267,6 +269,6 @@ def process_event(param_dict):
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
- print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 4ad0dfbc8b21..7c7d20fc503a 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,8 +20,6 @@
#include "tsc.h"
#include "mmap.h"
#include "tests.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
/*
* Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just
@@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
evlist__config(evlist, &opts, NULL);
- evsel = evlist__first(evlist);
-
- evsel->core.attr.comm = 1;
- evsel->core.attr.disabled = 1;
- evsel->core.attr.enable_on_exec = 0;
-
- /*
- * For hybrid "cycles:u", it creates two events.
- * Init the second evsel here.
- */
- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
- evsel = evsel__next(evsel);
+ /* For hybrid "cycles:u", it creates two events */
+ evlist__for_each_entry(evlist, evsel) {
evsel->core.attr.comm = 1;
evsel->core.attr.disabled = 1;
evsel->core.attr.enable_on_exec = 0;
}
- if (evlist__open(evlist) == -ENOENT) {
- err = TEST_SKIP;
+ ret = evlist__open(evlist);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ err = TEST_SKIP;
+ else
+ pr_debug("evlist__open() failed\n");
goto out_err;
}
- CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
@@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea247..cdd6463a5b68 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -63,20 +63,16 @@ static struct hashmap *bpf_map_hash;
static struct bpf_perf_object *
bpf_perf_object__next(struct bpf_perf_object *prev)
{
- struct bpf_perf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_perf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ if (!prev) {
+ if (list_empty(&bpf_objects_list))
+ return NULL;
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
+ return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+ }
+ if (list_is_last(&prev->list, &bpf_objects_list))
return NULL;
- return next;
+ return list_next_entry(prev, list);
}
#define bpf_perf_object__for_each(perf_obj, tmp) \
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ecd377938eea..b3be5b1d9dbb 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
return NULL;
}
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
static bool want_demangle(bool is_kernel_sym)
{
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
sym.st_value);
used_opd = true;
}
+
/*
* When loading symbols in a data mapping, ABS symbols (which
* has a value of SHN_ABS in its st_shndx) failed at
@@ -1227,6 +1255,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
gelf_getshdr(sec, &shdr);
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
secstrs = secstrs_sym;
/*
@@ -1262,11 +1301,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(syms_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_warning("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ continue;
+ }
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
- (u64)sym.st_value, (u64)shdr.sh_addr,
- (u64)shdr.sh_offset);
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
}
demangled = demangle_sym(dso, kmodule, elf_name);
diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index da468bd510ca..e6020c0d59ec 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -6,7 +6,7 @@
|_| |___/ |_|
pm-graph: suspend/resume/boot timing analysis tools
- Version: 5.8
+ Version: 5.9
Author: Todd Brandt <todd.e.brandt@intel.com>
Home Page: https://01.org/pm-graph
@@ -97,8 +97,8 @@
(kernel/pre-3.15/enable_trace_events_suspend_resume.patch)
(kernel/pre-3.15/enable_trace_events_device_pm_callback.patch)
- If you're using a kernel older than 3.15.0, the following
- additional kernel parameters are required:
+ If you're using bootgraph, or sleepgraph with a kernel older than 3.15.0,
+ the following additional kernel parameters are required:
(e.g. in file /etc/default/grub)
GRUB_CMDLINE_LINUX_DEFAULT="... initcall_debug log_buf_len=32M ..."
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index 2823cd3122f7..f96f50e0c336 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -69,22 +69,24 @@ class SystemValues(aslib.SystemValues):
bootloader = 'grub'
blexec = []
def __init__(self):
- self.hostname = platform.node()
+ self.kernel, self.hostname = 'unknown', platform.node()
self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
if os.path.exists('/proc/version'):
fp = open('/proc/version', 'r')
- val = fp.read().strip()
+ self.kernel = self.kernelVersion(fp.read().strip())
fp.close()
- self.kernel = self.kernelVersion(val)
- else:
- self.kernel = 'unknown'
self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S')
def kernelVersion(self, msg):
- return msg.split()[2]
+ m = re.match('^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg)
+ if m:
+ return m.group('v')
+ return 'unknown'
def checkFtraceKernelVersion(self):
- val = tuple(map(int, self.kernel.split('-')[0].split('.')))
- if val >= (4, 10, 0):
- return True
+ m = re.match('^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel)
+ if m:
+ val = tuple(map(int, m.groups()))
+ if val >= (4, 10, 0):
+ return True
return False
def kernelParams(self):
cmdline = 'initcall_debug log_buf_len=32M'
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 962e5768681c..4f80ad7d7275 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -125,7 +125,7 @@ acpi_suspend_begin:
suspend_console:
acpi_pm_prepare:
syscore_suspend:
-arch_thaw_secondary_cpus_end:
+arch_enable_nonboot_cpus_end:
syscore_resume:
acpi_pm_finish:
resume_console:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index ffd50953a024..33981adcdd68 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -66,8 +66,13 @@ from threading import Thread
from subprocess import call, Popen, PIPE
import base64
+debugtiming = False
+mystarttime = time.time()
def pprint(msg):
- print(msg)
+ if debugtiming:
+ print('[%09.3f] %s' % (time.time()-mystarttime, msg))
+ else:
+ print(msg)
sys.stdout.flush()
def ascii(text):
@@ -81,13 +86,14 @@ def ascii(text):
# store system values and test parameters
class SystemValues:
title = 'SleepGraph'
- version = '5.8'
+ version = '5.9'
ansi = False
rs = 0
display = ''
gzip = False
sync = False
wifi = False
+ netfix = False
verbose = False
testlog = True
dmesglog = True
@@ -108,6 +114,7 @@ class SystemValues:
cpucount = 0
memtotal = 204800
memfree = 204800
+ osversion = ''
srgap = 0
cgexp = False
testdir = ''
@@ -116,6 +123,7 @@ class SystemValues:
fpdtpath = '/sys/firmware/acpi/tables/FPDT'
epath = '/sys/kernel/debug/tracing/events/power/'
pmdpath = '/sys/power/pm_debug_messages'
+ s0ixpath = '/sys/module/intel_pmc_core/parameters/warn_on_s0ix_failures'
acpipath='/sys/module/acpi/parameters/debug_level'
traceevents = [
'suspend_resume',
@@ -156,6 +164,7 @@ class SystemValues:
ftop = False
usetraceevents = False
usetracemarkers = True
+ useftrace = True
usekprobes = True
usedevsrc = False
useprocmon = False
@@ -279,10 +288,16 @@ class SystemValues:
'intel_fbdev_set_suspend': {},
}
infocmds = [
+ [0, 'sysinfo', 'uname', '-a'],
+ [0, 'cpuinfo', 'head', '-7', '/proc/cpuinfo'],
[0, 'kparams', 'cat', '/proc/cmdline'],
[0, 'mcelog', 'mcelog'],
[0, 'pcidevices', 'lspci', '-tv'],
- [0, 'usbdevices', 'lsusb', '-t'],
+ [0, 'usbdevices', 'lsusb', '-tv'],
+ [0, 'acpidevices', 'sh', '-c', 'ls -l /sys/bus/acpi/devices/*/physical_node'],
+ [0, 's0ix_require', 'cat', '/sys/kernel/debug/pmc_core/substate_requirements'],
+ [0, 's0ix_debug', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_debug_status'],
+ [1, 's0ix_residency', 'cat', '/sys/kernel/debug/pmc_core/slp_s0_residency_usec'],
[1, 'interrupts', 'cat', '/proc/interrupts'],
[1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'],
[2, 'gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/*'],
@@ -358,8 +373,19 @@ class SystemValues:
self.outputResult({'error':msg})
sys.exit(1)
return False
- def usable(self, file):
- return (os.path.exists(file) and os.path.getsize(file) > 0)
+ def usable(self, file, ishtml=False):
+ if not os.path.exists(file) or os.path.getsize(file) < 1:
+ return False
+ if ishtml:
+ try:
+ fp = open(file, 'r')
+ res = fp.read(1000)
+ fp.close()
+ except:
+ return False
+ if '<html>' not in res:
+ return False
+ return True
def getExec(self, cmd):
try:
fp = Popen(['which', cmd], stdout=PIPE, stderr=PIPE).stdout
@@ -413,12 +439,16 @@ class SystemValues:
r = info['bios-release-date'] if 'bios-release-date' in info else ''
self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | biosdate:%s | numcpu:%d | memsz:%d | memfr:%d' % \
(m, p, c, b, r, self.cpucount, self.memtotal, self.memfree)
+ if self.osversion:
+ self.sysstamp += ' | os:%s' % self.osversion
def printSystemInfo(self, fatal=False):
self.rootCheck(True)
out = dmidecode(self.mempath, fatal)
if len(out) < 1:
return
fmt = '%-24s: %s'
+ if self.osversion:
+ print(fmt % ('os-version', self.osversion))
for name in sorted(out):
print(fmt % (name, out[name]))
print(fmt % ('cpucount', ('%d' % self.cpucount)))
@@ -426,20 +456,25 @@ class SystemValues:
print(fmt % ('memfree', ('%d kB' % self.memfree)))
def cpuInfo(self):
self.cpucount = 0
- fp = open('/proc/cpuinfo', 'r')
- for line in fp:
- if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
- self.cpucount += 1
- fp.close()
- fp = open('/proc/meminfo', 'r')
- for line in fp:
- m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
- if m:
- self.memtotal = int(m.group('sz'))
- m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line)
- if m:
- self.memfree = int(m.group('sz'))
- fp.close()
+ if os.path.exists('/proc/cpuinfo'):
+ with open('/proc/cpuinfo', 'r') as fp:
+ for line in fp:
+ if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
+ self.cpucount += 1
+ if os.path.exists('/proc/meminfo'):
+ with open('/proc/meminfo', 'r') as fp:
+ for line in fp:
+ m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memtotal = int(m.group('sz'))
+ m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memfree = int(m.group('sz'))
+ if os.path.exists('/etc/os-release'):
+ with open('/etc/os-release', 'r') as fp:
+ for line in fp:
+ if line.startswith('PRETTY_NAME='):
+ self.osversion = line[12:].strip().replace('"', '')
def initTestOutput(self, name):
self.prefix = self.hostname
v = open('/proc/version', 'r').read().strip()
@@ -698,6 +733,8 @@ class SystemValues:
return False
return True
def fsetVal(self, val, path):
+ if not self.useftrace:
+ return False
return self.setVal(val, self.tpath+path)
def getVal(self, file):
res = ''
@@ -711,9 +748,11 @@ class SystemValues:
pass
return res
def fgetVal(self, path):
+ if not self.useftrace:
+ return ''
return self.getVal(self.tpath+path)
def cleanupFtrace(self):
- if(self.usecallgraph or self.usetraceevents or self.usedevsrc):
+ if self.useftrace:
self.fsetVal('0', 'events/kprobes/enable')
self.fsetVal('', 'kprobe_events')
self.fsetVal('1024', 'buffer_size_kb')
@@ -734,13 +773,14 @@ class SystemValues:
return True
return False
def initFtrace(self, quiet=False):
+ if not self.useftrace:
+ return
if not quiet:
sysvals.printSystemInfo(False)
pprint('INITIALIZING FTRACE...')
# turn trace off
self.fsetVal('0', 'tracing_on')
self.cleanupFtrace()
- self.testVal(self.pmdpath, 'basic', '1')
# set the trace clock to global
self.fsetVal('global', 'trace_clock')
self.fsetVal('nop', 'current_tracer')
@@ -766,6 +806,10 @@ class SystemValues:
# set trace type
self.fsetVal('function_graph', 'current_tracer')
self.fsetVal('', 'set_ftrace_filter')
+ # temporary hack to fix https://bugzilla.kernel.org/show_bug.cgi?id=212761
+ fp = open(self.tpath+'set_ftrace_notrace', 'w')
+ fp.write('native_queued_spin_lock_slowpath\ndev_driver_string')
+ fp.close()
# set trace format options
self.fsetVal('print-parent', 'trace_options')
self.fsetVal('funcgraph-abstime', 'trace_options')
@@ -846,6 +890,8 @@ class SystemValues:
fp.write('# turbostat %s\n' % test['turbo'])
if 'wifi' in test:
fp.write('# wifi %s\n' % test['wifi'])
+ if 'netfix' in test:
+ fp.write('# netfix %s\n' % test['netfix'])
if test['error'] or len(testdata) > 1:
fp.write('# enter_sleep_error %s\n' % test['error'])
return fp
@@ -865,6 +911,8 @@ class SystemValues:
fp.write('error%s: %s\n' % (n, testdata['error']))
else:
fp.write('result%s: pass\n' % n)
+ if 'mode' in testdata:
+ fp.write('mode%s: %s\n' % (n, testdata['mode']))
for v in ['suspend', 'resume', 'boot', 'lastinit']:
if v in testdata:
fp.write('%s%s: %.3f\n' % (v, n, testdata[v]))
@@ -901,6 +949,8 @@ class SystemValues:
fp.write(text)
fp.close()
def dlog(self, text):
+ if not self.dmesgfile:
+ return
self.putlog(self.dmesgfile, '# %s\n' % text)
def flog(self, text):
self.putlog(self.ftracefile, text)
@@ -954,34 +1004,31 @@ class SystemValues:
dirname = props[dev].syspath
if not dirname or not os.path.exists(dirname):
continue
- with open(dirname+'/power/async') as fp:
- text = fp.read()
- props[dev].isasync = False
- if 'enabled' in text:
+ props[dev].isasync = False
+ if os.path.exists(dirname+'/power/async'):
+ fp = open(dirname+'/power/async')
+ if 'enabled' in fp.read():
props[dev].isasync = True
+ fp.close()
fields = os.listdir(dirname)
- if 'product' in fields:
- with open(dirname+'/product', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'name' in fields:
- with open(dirname+'/name', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'model' in fields:
- with open(dirname+'/model', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'description' in fields:
- with open(dirname+'/description', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'id' in fields:
- with open(dirname+'/id', 'rb') as fp:
- props[dev].altname = ascii(fp.read())
- elif 'idVendor' in fields and 'idProduct' in fields:
- idv, idp = '', ''
- with open(dirname+'/idVendor', 'rb') as fp:
- idv = ascii(fp.read()).strip()
- with open(dirname+'/idProduct', 'rb') as fp:
- idp = ascii(fp.read()).strip()
- props[dev].altname = '%s:%s' % (idv, idp)
+ for file in ['product', 'name', 'model', 'description', 'id', 'idVendor']:
+ if file not in fields:
+ continue
+ try:
+ with open(os.path.join(dirname, file), 'rb') as fp:
+ props[dev].altname = ascii(fp.read())
+ except:
+ continue
+ if file == 'idVendor':
+ idv, idp = props[dev].altname.strip(), ''
+ try:
+ with open(os.path.join(dirname, 'idProduct'), 'rb') as fp:
+ idp = ascii(fp.read()).strip()
+ except:
+ props[dev].altname = ''
+ break
+ props[dev].altname = '%s:%s' % (idv, idp)
+ break
if props[dev].altname:
out = props[dev].altname.strip().replace('\n', ' ')\
.replace(',', ' ').replace(';', ' ')
@@ -1047,7 +1094,7 @@ class SystemValues:
self.cmd1[name] = self.dictify(info, delta)
elif not debug and delta and name in self.cmd1:
before, after = self.cmd1[name], self.dictify(info, delta)
- dinfo = ('\t%s\n' % before['@']) if '@' in before else ''
+ dinfo = ('\t%s\n' % before['@']) if '@' in before and len(before) > 1 else ''
prefix = self.commonPrefix(list(before.keys()))
for key in sorted(before):
if key in after and before[key] != after[key]:
@@ -1128,6 +1175,22 @@ class SystemValues:
val = valline[idx]
out.append('%s=%s' % (key, val))
return '|'.join(out)
+ def netfixon(self, net='both'):
+ cmd = self.getExec('netfix')
+ if not cmd:
+ return ''
+ fp = Popen([cmd, '-s', net, 'on'], stdout=PIPE, stderr=PIPE).stdout
+ out = ascii(fp.read()).strip()
+ fp.close()
+ return out
+ def wifiRepair(self):
+ out = self.netfixon('wifi')
+ if not out or 'error' in out.lower():
+ return ''
+ m = re.match('WIFI \S* ONLINE (?P<action>\S*)', out)
+ if not m:
+ return 'dead'
+ return m.group('action')
def wifiDetails(self, dev):
try:
info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip()
@@ -1144,12 +1207,12 @@ class SystemValues:
except:
return ''
for line in reversed(w.split('\n')):
- m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', w.split('\n')[-1])
+ m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line)
if not m or (dev and dev != m.group('dev')):
continue
return m.group('dev')
return ''
- def pollWifi(self, dev, timeout=60):
+ def pollWifi(self, dev, timeout=10):
start = time.time()
while (time.time() - start) < timeout:
w = self.checkWifi(dev)
@@ -1157,6 +1220,11 @@ class SystemValues:
return '%s reconnected %.2f' % \
(self.wifiDetails(dev), max(0, time.time() - start))
time.sleep(0.01)
+ if self.netfix:
+ res = self.wifiRepair()
+ if res:
+ timeout = max(0, time.time() - start)
+ return '%s %s %d' % (self.wifiDetails(dev), res, timeout)
return '%s timeout %d' % (self.wifiDetails(dev), timeout)
def errorSummary(self, errinfo, msg):
found = False
@@ -1283,10 +1351,10 @@ sysvals = SystemValues()
switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
switchoff = ['disable', 'off', 'false', '0']
suspendmodename = {
- 'freeze': 'Freeze (S0)',
- 'standby': 'Standby (S1)',
- 'mem': 'Suspend (S3)',
- 'disk': 'Hibernate (S4)'
+ 'standby': 'standby (S1)',
+ 'freeze': 'freeze (S2idle)',
+ 'mem': 'suspend (S3)',
+ 'disk': 'hibernate (S4)'
}
# Class: DevProps
@@ -1376,6 +1444,7 @@ class Data:
'INVALID' : r'(?i).*\bINVALID\b.*',
'CRASH' : r'(?i).*\bCRASHED\b.*',
'TIMEOUT' : r'(?i).*\bTIMEOUT\b.*',
+ 'ABORT' : r'(?i).*\bABORT\b.*',
'IRQ' : r'.*\bgenirq: .*',
'TASKFAIL': r'.*Freezing of tasks *.*',
'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*',
@@ -1724,9 +1793,9 @@ class Data:
if 'waking' in self.dmesg[lp]:
tCnt = self.dmesg[lp]['waking'][0]
if self.dmesg[lp]['waking'][1] >= 0.001:
- tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
+ tTry = '%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
else:
- tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
+ tTry = '%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt)
else:
text = '%.0f' % (tL * 1000)
@@ -2107,6 +2176,30 @@ class Data:
# set resume complete to end at end marker
if 'resume_complete' in dm:
dm['resume_complete']['end'] = time
+ def initcall_debug_call(self, line, quick=False):
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ 'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ 'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\
+ '(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line)
+ if m:
+ return True if quick else m.group('t', 'f', 'n', 'p')
+ return False if quick else ('', '', '', '')
+ def initcall_debug_return(self, line, quick=False):
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\
+ '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\
+ '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line)
+ if not m:
+ m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\
+ '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line)
+ if m:
+ return True if quick else m.group('t', 'f', 'dt')
+ return False if quick else ('', '', '')
def debugPrint(self):
for p in self.sortedPhases():
list = self.dmesg[p]['list']
@@ -2880,10 +2973,11 @@ class TestProps:
cmdlinefmt = '^# command \| (?P<cmd>.*)'
kparamsfmt = '^# kparams \| (?P<kp>.*)'
devpropfmt = '# Device Properties: .*'
- pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9]*): (?P<info>.*)'
+ pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)'
tracertypefmt = '# tracer: (?P<t>.*)'
firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
procexecfmt = 'ps - (?P<ps>.*)$'
+ procmultifmt = '@(?P<n>[0-9]*)\|(?P<ps>.*)$'
ftrace_line_fmt_fg = \
'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2893,6 +2987,9 @@ class TestProps:
'(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\
'(?P<msg>.*)'
machinesuspend = 'machine_suspend\[.*'
+ multiproclist = dict()
+ multiproctime = 0.0
+ multiproccnt = 0
def __init__(self):
self.stamp = ''
self.sysinfo = ''
@@ -3063,6 +3160,7 @@ class TestRun:
self.ttemp = dict()
class ProcessMonitor:
+ maxchars = 512
def __init__(self):
self.proclist = dict()
self.running = False
@@ -3088,19 +3186,23 @@ class ProcessMonitor:
if ujiff > 0 or kjiff > 0:
running[pid] = ujiff + kjiff
process.wait()
- out = ''
+ out = ['']
for pid in running:
jiffies = running[pid]
val = self.proclist[pid]
- if out:
- out += ','
- out += '%s-%s %d' % (val['name'], pid, jiffies)
- return 'ps - '+out
+ if len(out[-1]) > self.maxchars:
+ out.append('')
+ elif len(out[-1]) > 0:
+ out[-1] += ','
+ out[-1] += '%s-%s %d' % (val['name'], pid, jiffies)
+ if len(out) > 1:
+ for line in out:
+ sysvals.fsetVal('ps - @%d|%s' % (len(out), line), 'trace_marker')
+ else:
+ sysvals.fsetVal('ps - %s' % out[0], 'trace_marker')
def processMonitor(self, tid):
while self.running:
- out = self.procstat()
- if out:
- sysvals.fsetVal(out, 'trace_marker')
+ self.procstat()
def start(self):
self.thread = Thread(target=self.processMonitor, args=(0,))
self.running = True
@@ -3144,7 +3246,6 @@ def doesTraceLogHaveTraceEvents():
# Function: appendIncompleteTraceLog
# Description:
-# [deprecated for kernel 3.15 or newer]
# Adds callgraph data which lacks trace event data. This is only
# for timelines generated from 3.15 or older
# Arguments:
@@ -3246,6 +3347,61 @@ def appendIncompleteTraceLog(testruns):
dev['ftrace'] = cg
break
+# Function: loadTraceLog
+# Description:
+# load the ftrace file into memory and fix up any ordering issues
+# Output:
+# TestProps instance and an array of lines in proper order
+def loadTraceLog():
+ tp, data, lines, trace = TestProps(), dict(), [], []
+ tf = sysvals.openlog(sysvals.ftracefile, 'r')
+ for line in tf:
+ # remove any latent carriage returns
+ line = line.replace('\r\n', '')
+ if tp.stampInfo(line, sysvals):
+ continue
+ # ignore all other commented lines
+ if line[0] == '#':
+ continue
+ # ftrace line: parse only valid lines
+ m = re.match(tp.ftrace_line_fmt, line)
+ if(not m):
+ continue
+ dur = m.group('dur') if tp.cgformat else 'traceevent'
+ info = (m.group('time'), m.group('proc'), m.group('pid'),
+ m.group('msg'), dur)
+ # group the data by timestamp
+ t = float(info[0])
+ if t in data:
+ data[t].append(info)
+ else:
+ data[t] = [info]
+ # we only care about trace event ordering
+ if (info[3].startswith('suspend_resume:') or \
+ info[3].startswith('tracing_mark_write:')) and t not in trace:
+ trace.append(t)
+ tf.close()
+ for t in sorted(data):
+ first, last, blk = [], [], data[t]
+ if len(blk) > 1 and t in trace:
+ # move certain lines to the start or end of a timestamp block
+ for i in range(len(blk)):
+ if 'SUSPEND START' in blk[i][3]:
+ first.append(i)
+ elif re.match('.* timekeeping_freeze.*begin', blk[i][3]):
+ last.append(i)
+ elif re.match('.* timekeeping_freeze.*end', blk[i][3]):
+ first.append(i)
+ elif 'RESUME COMPLETE' in blk[i][3]:
+ last.append(i)
+ if len(first) == 1 and len(last) == 0:
+ blk.insert(0, blk.pop(first[0]))
+ elif len(last) == 1 and len(first) == 0:
+ blk.append(blk.pop(last[0]))
+ for info in blk:
+ lines.append(info)
+ return (tp, lines)
+
# Function: parseTraceLog
# Description:
# Analyze an ftrace log output file generated from this app during
@@ -3271,32 +3427,12 @@ def parseTraceLog(live=False):
# extract the callgraph and traceevent data
s2idle_enter = hwsus = False
- tp = TestProps()
testruns, testdata = [], []
testrun, data, limbo = 0, 0, True
- tf = sysvals.openlog(sysvals.ftracefile, 'r')
phase = 'suspend_prepare'
- for line in tf:
- # remove any latent carriage returns
- line = line.replace('\r\n', '')
- if tp.stampInfo(line, sysvals):
- continue
- # ignore all other commented lines
- if line[0] == '#':
- continue
- # ftrace line: parse only valid lines
- m = re.match(tp.ftrace_line_fmt, line)
- if(not m):
- continue
+ tp, tf = loadTraceLog()
+ for m_time, m_proc, m_pid, m_msg, m_param3 in tf:
# gather the basic message data from the line
- m_time = m.group('time')
- m_proc = m.group('proc')
- m_pid = m.group('pid')
- m_msg = m.group('msg')
- if(tp.cgformat):
- m_param3 = m.group('dur')
- else:
- m_param3 = 'traceevent'
if(m_time and m_pid and m_msg):
t = FTraceLine(m_time, m_msg, m_param3)
pid = int(m_pid)
@@ -3322,14 +3458,29 @@ def parseTraceLog(live=False):
if t.type == 'tracing_mark_write':
m = re.match(tp.procexecfmt, t.name)
if(m):
- proclist = dict()
- for ps in m.group('ps').split(','):
+ parts, msg = 1, m.group('ps')
+ m = re.match(tp.procmultifmt, msg)
+ if(m):
+ parts, msg = int(m.group('n')), m.group('ps')
+ if tp.multiproccnt == 0:
+ tp.multiproctime = t.time
+ tp.multiproclist = dict()
+ proclist = tp.multiproclist
+ tp.multiproccnt += 1
+ else:
+ proclist = dict()
+ tp.multiproccnt = 0
+ for ps in msg.split(','):
val = ps.split()
- if not val:
+ if not val or len(val) != 2:
continue
name = val[0].replace('--', '-')
proclist[name] = int(val[1])
- data.pstl[t.time] = proclist
+ if parts == 1:
+ data.pstl[t.time] = proclist
+ elif parts == tp.multiproccnt:
+ data.pstl[tp.multiproctime] = proclist
+ tp.multiproccnt = 0
continue
# find the end of resume
if(t.endMarker()):
@@ -3545,7 +3696,6 @@ def parseTraceLog(live=False):
testrun.ftemp[key].append(FTraceCallGraph(pid, sysvals))
if(res == -1):
testrun.ftemp[key][-1].addLine(t)
- tf.close()
if len(testdata) < 1:
sysvals.vprint('WARNING: ftrace start marker is missing')
if data and not data.devicegroups:
@@ -3667,7 +3817,13 @@ def parseTraceLog(live=False):
if p not in data.dmesg:
if not terr:
ph = p if 'machine' in p else lp
- terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, ph)
+ if p == 'suspend_machine':
+ sm = sysvals.suspendmode
+ if sm in suspendmodename:
+ sm = suspendmodename[sm]
+ terr = 'test%s did not enter %s power mode' % (tn, sm)
+ else:
+ terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, ph)
pprint('TEST%s FAILED: %s' % (tn, terr))
error.append(terr)
if data.tSuspended == 0:
@@ -3708,9 +3864,7 @@ def parseTraceLog(live=False):
# Function: loadKernelLog
# Description:
-# [deprecated for kernel 3.15.0 or newer]
# load the dmesg file into memory and fix up any ordering issues
-# The dmesg filename is taken from sysvals
# Output:
# An array of empty Data objects with only their dmesgtext attributes set
def loadKernelLog():
@@ -3736,7 +3890,8 @@ def loadKernelLog():
if(not m):
continue
msg = m.group("msg")
- if(re.match('PM: Syncing filesystems.*', msg)):
+ if re.match('PM: Syncing filesystems.*', msg) or \
+ re.match('PM: suspend entry.*', msg):
if(data):
testruns.append(data)
data = Data(len(testruns))
@@ -3747,11 +3902,17 @@ def loadKernelLog():
if(m):
sysvals.stamp['kernel'] = m.group('k')
m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg)
- if(m):
+ if not m:
+ m = re.match('PM: Preparing system for sleep \((?P<m>.*)\)', msg)
+ if m:
sysvals.stamp['mode'] = sysvals.suspendmode = m.group('m')
data.dmesgtext.append(line)
lf.close()
+ if sysvals.suspendmode == 's2idle':
+ sysvals.suspendmode = 'freeze'
+ elif sysvals.suspendmode == 'deep':
+ sysvals.suspendmode = 'mem'
if data:
testruns.append(data)
if len(testruns) < 1:
@@ -3762,12 +3923,9 @@ def loadKernelLog():
for data in testruns:
last = ''
for line in data.dmesgtext:
- mc = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\
- '(?P<f>.*)\+ @ .*, parent: .*', line)
- mr = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\
- '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', last)
- if(mc and mr and (mc.group('t') == mr.group('t')) and
- (mc.group('f') == mr.group('f'))):
+ ct, cf, n, p = data.initcall_debug_call(line)
+ rt, rf, l = data.initcall_debug_return(last)
+ if ct and rt and ct == rt and cf == rf:
i = data.dmesgtext.index(last)
j = data.dmesgtext.index(line)
data.dmesgtext[i] = line
@@ -3777,7 +3935,6 @@ def loadKernelLog():
# Function: parseKernelLog
# Description:
-# [deprecated for kernel 3.15.0 or newer]
# Analyse a dmesg log output file generated from this app during
# the execution phase. Create a set of device structures in memory
# for subsequent formatting in the html output file
@@ -3796,30 +3953,30 @@ def parseKernelLog(data):
# dmesg phase match table
dm = {
- 'suspend_prepare': ['PM: Syncing filesystems.*'],
- 'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*'],
- 'suspend_late': ['PM: suspend of devices complete after.*'],
- 'suspend_noirq': ['PM: late suspend of devices complete after.*'],
- 'suspend_machine': ['PM: noirq suspend of devices complete after.*'],
- 'resume_machine': ['ACPI: Low-level resume complete.*'],
- 'resume_noirq': ['ACPI: Waking up from system sleep state.*'],
- 'resume_early': ['PM: noirq resume of devices complete after.*'],
- 'resume': ['PM: early resume of devices complete after.*'],
- 'resume_complete': ['PM: resume of devices complete after.*'],
+ 'suspend_prepare': ['PM: Syncing filesystems.*', 'PM: suspend entry.*'],
+ 'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*',
+ 'PM: Suspending system .*'],
+ 'suspend_late': ['PM: suspend of devices complete after.*',
+ 'PM: freeze of devices complete after.*'],
+ 'suspend_noirq': ['PM: late suspend of devices complete after.*',
+ 'PM: late freeze of devices complete after.*'],
+ 'suspend_machine': ['PM: suspend-to-idle',
+ 'PM: noirq suspend of devices complete after.*',
+ 'PM: noirq freeze of devices complete after.*'],
+ 'resume_machine': ['PM: Timekeeping suspended for.*',
+ 'ACPI: Low-level resume complete.*',
+ 'ACPI: resume from mwait',
+ 'Suspended for [0-9\.]* seconds'],
+ 'resume_noirq': ['PM: resume from suspend-to-idle',
+ 'ACPI: Waking up from system sleep state.*'],
+ 'resume_early': ['PM: noirq resume of devices complete after.*',
+ 'PM: noirq restore of devices complete after.*'],
+ 'resume': ['PM: early resume of devices complete after.*',
+ 'PM: early restore of devices complete after.*'],
+ 'resume_complete': ['PM: resume of devices complete after.*',
+ 'PM: restore of devices complete after.*'],
'post_resume': ['.*Restarting tasks \.\.\..*'],
}
- if(sysvals.suspendmode == 'standby'):
- dm['resume_machine'] = ['PM: Restoring platform NVS memory']
- elif(sysvals.suspendmode == 'disk'):
- dm['suspend_late'] = ['PM: freeze of devices complete after.*']
- dm['suspend_noirq'] = ['PM: late freeze of devices complete after.*']
- dm['suspend_machine'] = ['PM: noirq freeze of devices complete after.*']
- dm['resume_machine'] = ['PM: Restoring platform NVS memory']
- dm['resume_early'] = ['PM: noirq restore of devices complete after.*']
- dm['resume'] = ['PM: early restore of devices complete after.*']
- dm['resume_complete'] = ['PM: restore of devices complete after.*']
- elif(sysvals.suspendmode == 'freeze'):
- dm['resume_machine'] = ['ACPI: resume from mwait']
# action table (expected events that occur and show up in dmesg)
at = {
@@ -3867,12 +4024,13 @@ def parseKernelLog(data):
for s in dm[p]:
if(re.match(s, msg)):
phasechange, phase = True, p
+ dm[p] = [s]
break
# hack for determining resume_machine end for freeze
if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
and phase == 'resume_machine' and \
- re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
+ data.initcall_debug_call(line, True)):
data.setPhase(phase, ktime, False)
phase = 'resume_noirq'
data.setPhase(phase, ktime, True)
@@ -3945,26 +4103,18 @@ def parseKernelLog(data):
# -- device callbacks --
if(phase in data.sortedPhases()):
# device init call
- if(re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
- sm = re.match('calling (?P<f>.*)\+ @ '+\
- '(?P<n>.*), parent: (?P<p>.*)', msg);
- f = sm.group('f')
- n = sm.group('n')
- p = sm.group('p')
- if(f and n and p):
- data.newAction(phase, f, int(n), p, ktime, -1, '')
- # device init return
- elif(re.match('call (?P<f>.*)\+ returned .* after '+\
- '(?P<t>.*) usecs', msg)):
- sm = re.match('call (?P<f>.*)\+ returned .* after '+\
- '(?P<t>.*) usecs(?P<a>.*)', msg);
- f = sm.group('f')
- t = sm.group('t')
- list = data.dmesg[phase]['list']
- if(f in list):
- dev = list[f]
- dev['length'] = int(t)
- dev['end'] = ktime
+ t, f, n, p = data.initcall_debug_call(line)
+ if t and f and n and p:
+ data.newAction(phase, f, int(n), p, ktime, -1, '')
+ else:
+ # device init return
+ t, f, l = data.initcall_debug_return(line)
+ if t and f and l:
+ list = data.dmesg[phase]['list']
+ if(f in list):
+ dev = list[f]
+ dev['length'] = int(l)
+ dev['end'] = ktime
# if trace events are not available, these are better than nothing
if(not sysvals.usetraceevents):
@@ -4006,6 +4156,8 @@ def parseKernelLog(data):
# fill in any missing phases
phasedef = data.phasedef
terr, lp = '', 'suspend_prepare'
+ if lp not in data.dmesg:
+ doError('dmesg log format has changed, could not find start of suspend')
for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
if p not in data.dmesg:
if not terr:
@@ -5302,7 +5454,7 @@ def executeSuspend(quiet=False):
sv.dlog('read dmesg')
sv.initdmesg()
# start ftrace
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if not quiet:
pprint('START TRACING')
sv.dlog('start ftrace tracing')
@@ -5334,8 +5486,7 @@ def executeSuspend(quiet=False):
sv.dlog('enable RTC wake alarm')
sv.rtcWakeAlarmOn()
# start of suspend trace marker
- if(sv.usecallgraph or sv.usetraceevents):
- sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
+ sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
# predelay delay
if(count == 1 and sv.predelay > 0):
sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker')
@@ -5384,11 +5535,17 @@ def executeSuspend(quiet=False):
sv.fsetVal('WAIT END', 'trace_marker')
# return from suspend
pprint('RESUME COMPLETE')
- if(sv.usecallgraph or sv.usetraceevents):
- sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+ sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
if sv.wifi and wifi:
tdata['wifi'] = sv.pollWifi(wifi)
sv.dlog('wifi check, %s' % tdata['wifi'])
+ if sv.netfix:
+ netfixout = sv.netfixon('wired')
+ elif sv.netfix:
+ netfixout = sv.netfixon()
+ if sv.netfix and netfixout:
+ tdata['netfix'] = netfixout
+ sv.dlog('netfix, %s' % tdata['netfix'])
if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
sv.dlog('read the ACPI FPDT')
tdata['fw'] = getFPDT(False)
@@ -5396,7 +5553,7 @@ def executeSuspend(quiet=False):
sv.dlog('run the cmdinfo list after')
cmdafter = sv.cmdinfo(False)
# stop ftrace
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if sv.useprocmon:
sv.dlog('stop the process monitor')
pm.stop()
@@ -5407,7 +5564,7 @@ def executeSuspend(quiet=False):
sysvals.dlog('EXECUTION TRACE END')
sv.getdmesg(testdata)
# grab a copy of the ftrace output
- if(sv.usecallgraph or sv.usetraceevents):
+ if sv.useftrace:
if not quiet:
pprint('CAPTURING TRACE')
op = sv.writeDatafileHeader(sv.ftracefile, testdata)
@@ -5838,13 +5995,19 @@ def statusCheck(probecheck=False):
pprint(' please choose one with -m')
# check if ftrace is available
- res = sysvals.colorText('NO')
- ftgood = sysvals.verifyFtrace()
- if(ftgood):
- res = 'YES'
- elif(sysvals.usecallgraph):
- status = 'ftrace is not properly supported'
- pprint(' is ftrace supported: %s' % res)
+ if sysvals.useftrace:
+ res = sysvals.colorText('NO')
+ sysvals.useftrace = sysvals.verifyFtrace()
+ efmt = '"{0}" uses ftrace, and it is not properly supported'
+ if sysvals.useftrace:
+ res = 'YES'
+ elif sysvals.usecallgraph:
+ status = efmt.format('-f')
+ elif sysvals.usedevsrc:
+ status = efmt.format('-dev')
+ elif sysvals.useprocmon:
+ status = efmt.format('-proc')
+ pprint(' is ftrace supported: %s' % res)
# check if kprobes are available
if sysvals.usekprobes:
@@ -5857,8 +6020,8 @@ def statusCheck(probecheck=False):
pprint(' are kprobes supported: %s' % res)
# what data source are we using
- res = 'DMESG'
- if(ftgood):
+ res = 'DMESG (very limited, ftrace is preferred)'
+ if sysvals.useftrace:
sysvals.usetraceevents = True
for e in sysvals.traceevents:
if not os.path.exists(sysvals.epath+e):
@@ -5879,7 +6042,7 @@ def statusCheck(probecheck=False):
pprint(' optional commands this tool may use for info:')
no = sysvals.colorText('MISSING')
yes = sysvals.colorText('FOUND', 32)
- for c in ['turbostat', 'mcelog', 'lspci', 'lsusb']:
+ for c in ['turbostat', 'mcelog', 'lspci', 'lsusb', 'netfix']:
if c == 'turbostat':
res = yes if sysvals.haveTurbostat() else no
else:
@@ -5971,7 +6134,7 @@ def processData(live=False, quiet=False):
if not sysvals.stamp:
pprint('ERROR: data does not include the expected stamp')
return (testruns, {'error': 'timeline generation failed'})
- shown = ['bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr',
+ shown = ['os', 'bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr',
'memsz', 'mode', 'numcpu', 'plat', 'time', 'wifi']
sysvals.vprint('System Info:')
for key in sorted(sysvals.stamp):
@@ -6052,6 +6215,8 @@ def runTest(n=0, quiet=False):
if sysvals.display:
ret = sysvals.displayControl('init')
sysvals.dlog('xset display init, ret = %d' % ret)
+ sysvals.testVal(sysvals.pmdpath, 'basic', '1')
+ sysvals.testVal(sysvals.s0ixpath, 'basic', 'Y')
sysvals.dlog('initialize ftrace')
sysvals.initFtrace(quiet)
@@ -6145,9 +6310,12 @@ def data_from_html(file, outpath, issues, fulldetail=False):
elist[err[0]] += 1
for i in elist:
ilist.append('%sx%d' % (i, elist[i]) if elist[i] > 1 else i)
- wifi = find_in_html(html, 'Wifi Resume: ', '</td>')
- if wifi:
- extra['wifi'] = wifi
+ line = find_in_html(log, '# wifi ', '\n')
+ if line:
+ extra['wifi'] = line
+ line = find_in_html(log, '# netfix ', '\n')
+ if line:
+ extra['netfix'] = line
low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
for lowstr in ['waking', '+']:
if not low:
@@ -6243,7 +6411,7 @@ def genHtml(subdir, force=False):
sysvals.ftracefile = file
sysvals.setOutputFile()
if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \
- (force or not sysvals.usable(sysvals.htmlfile)):
+ (force or not sysvals.usable(sysvals.htmlfile, True)):
pprint('FTRACE: %s' % sysvals.ftracefile)
if sysvals.dmesgfile:
pprint('DMESG : %s' % sysvals.dmesgfile)
@@ -6533,6 +6701,7 @@ def printHelp():
' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
' -result fn Export a results table to a text file for parsing.\n'\
' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\
+ ' -netfix Use netfix to reset the network in the event it fails to resume.\n'\
' [testprep]\n'\
' -sync Sync the filesystems before starting the test\n'\
' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\
@@ -6615,6 +6784,8 @@ if __name__ == '__main__':
elif(arg == '-v'):
pprint("Version %s" % sysvals.version)
sys.exit(0)
+ elif(arg == '-debugtiming'):
+ debugtiming = True
elif(arg == '-x2'):
sysvals.execcount = 2
elif(arg == '-x2delay'):
@@ -6657,6 +6828,8 @@ if __name__ == '__main__':
sysvals.sync = True
elif(arg == '-wifi'):
sysvals.wifi = True
+ elif(arg == '-netfix'):
+ sysvals.netfix = True
elif(arg == '-gzip'):
sysvals.gzip = True
elif(arg == '-info'):
@@ -6819,7 +6992,7 @@ if __name__ == '__main__':
sysvals.outdir = val
sysvals.notestrun = True
if(os.path.isdir(val) == False):
- doError('%s is not accessible' % val)
+ doError('%s is not accesible' % val)
elif(arg == '-filter'):
try:
val = next(args)
@@ -6942,12 +7115,11 @@ if __name__ == '__main__':
time.sleep(sysvals.multitest['delay'])
fmt = 'suspend-%y%m%d-%H%M%S'
sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
- ret = runTest(i+1, True)
+ ret = runTest(i+1, not sysvals.verbose)
failcnt = 0 if not ret else failcnt + 1
if sysvals.maxfail > 0 and failcnt >= sysvals.maxfail:
pprint('Maximum fail count of %d reached, aborting multitest' % (sysvals.maxfail))
break
- time.sleep(5)
sysvals.resetlog()
sysvals.multistat(False, i, finish)
if 'time' in sysvals.multitest and datetime.now() >= finish:
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 1e7d3de55a94..c7b26a3603af 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -92,40 +92,66 @@ displays the statistics gathered since it was forked.
.SH ROW DESCRIPTIONS
The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
.SH COLUMN DESCRIPTIONS
-.nf
+.PP
\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+.PP
\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
+.PP
\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
+.PP
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
+.PP
\fBPackage\fP processor package number -- not present on systems with a single processor package.
+.PP
\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed.
+.PP
\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+.PP
\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
+.PP
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+.PP
\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary.
+.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+.PP
\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+.PP
\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+.PP
\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
+.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
+.PP
\fBPkgWatt\fP Watts consumed by the whole package.
+.PP
\fBCorWatt\fP Watts consumed by the core part of the package.
+.PP
\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
+.PP
\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
+.PP
\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
+.PP
\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
-.fi
+.PP
+\fBUncMHz\fP uncore MHz, instantaneous sample.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
.PP
When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list.
.PP
-To find out what columns are available for --show and --hide, the "--list" option is available. For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+To find out what columns are available for --show and --hide, the "--list" option is available. Usually, the CATEGORY names above are used to refer to groups of counters. Also, for convenience, the special string "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+.PP
.nf
sudo ./turbostat --show sysfs --quiet sleep 10
10.003837 sec
@@ -158,32 +184,29 @@ Without a command to fork, turbostat displays statistics ever 5 seconds.
Periodic output goes to stdout, by default, unless --out is used to specify an output file.
The 5-second interval can be changed with the "-i sec" option.
.nf
-sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
- Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
- - - 488 12.52 3900 3498 12.50 0.00 0.00 74.98
- 0 0 5 0.13 3900 3498 99.87 0.00 0.00 0.00
- 0 4 3897 99.99 3900 3498 0.01
- 1 1 0 0.00 3856 3498 0.01 0.00 0.00 99.98
- 1 5 0 0.00 3861 3498 0.01
- 2 2 1 0.02 3889 3498 0.03 0.00 0.00 99.95
- 2 6 0 0.00 3863 3498 0.05
- 3 3 0 0.01 3869 3498 0.02 0.00 0.00 99.97
- 3 7 0 0.00 3878 3498 0.03
- Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
- - - 491 12.59 3900 3498 12.42 0.00 0.00 74.99
- 0 0 27 0.69 3900 3498 99.31 0.00 0.00 0.00
- 0 4 3898 99.99 3900 3498 0.01
- 1 1 0 0.00 3883 3498 0.01 0.00 0.00 99.99
- 1 5 0 0.00 3898 3498 0.01
- 2 2 0 0.01 3889 3498 0.02 0.00 0.00 99.98
- 2 6 0 0.00 3889 3498 0.02
- 3 3 0 0.00 3856 3498 0.01 0.00 0.00 99.99
- 3 7 0 0.00 3897 3498 0.01
+sudo turbostat --quiet --show CPU,frequency
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz
+ - - 524 12.48 4198 3096 74.53 3800
+ 0 0 4 0.09 4081 3096 98.88 3800
+ 0 4 1 0.02 4063 3096
+ 1 1 2 0.06 4063 3096 99.60
+ 1 5 2 0.05 4070 3096
+ 2 2 4178 99.52 4199 3096 0.00
+ 2 6 3 0.08 4159 3096
+ 3 3 1 0.04 4046 3096 99.66
+ 3 7 0 0.01 3989 3096
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz
+ - - 525 12.52 4198 3096 74.54 3800
+ 0 0 4 0.10 4051 3096 99.49 3800
+ 0 4 2 0.04 3993 3096
+ 1 1 3 0.07 4054 3096 99.56
+ 1 5 4 0.10 4018 3096
+ 2 2 4178 99.51 4199 3096 0.00
+ 2 6 4 0.09 4143 3096
+ 3 3 2 0.06 4026 3096 99.10
+ 3 7 7 0.17 4074 3096
.fi
-This example also shows the use of the --hide option to skip columns that are not wanted.
-Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
-Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
-because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
+This example also shows the use of the --show option to show only the desired columns.
.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
@@ -191,61 +214,86 @@ By default, turbostat always dumps system configuration information
before taking measurements. In the example above, "--quiet" is used
to suppress that output. Here is an example of the configuration information:
.nf
-turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
-CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
-CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
-CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
-cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
-CPUID(7): No-SGX
-cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
-RAPL: 3121 sec. Joule Counter Range, at 84 Watts
-cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>
+Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.18.0-rc6-00001-ge6891250e3b5 ...
+CPUID(0): GenuineIntel 0x16 CPUID levels
+CPUID(1): family:model:stepping 0x6:9e:9 (6:158:9) microcode 0xea
+CPUID(0x80000000): max_extended_levels: 0x80000008
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM HT TM
+CPUID(6): APERF, TURBO, DTS, PTM, HWP, HWPnotify, HWPwindow, HWPepp, No-HWPpkg, EPB
+cpu7: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT PREFETCH TURBO)
+CPUID(7): SGX
+cpu7: MSR_IA32_FEATURE_CONTROL: 0x00000005 (Locked )
+CPUID(0x15): eax_crystal: 2 ebx_tsc: 258 ecx_crystal_hz: 0
+TSC: 3096 MHz (24000000 Hz * 258 / 2 / 1000000)
+CPUID(0x16): base_mhz: 3100 max_mhz: 4200 bus_mhz: 100
+cpu7: MSR_MISC_PWR_MGMT: 0x00401cc0 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
+RAPL: 5825 sec. Joule Counter Range, at 45 Watts
+cpu7: MSR_PLATFORM_INFO: 0x80839f1011f00
8 * 100.0 = 800.0 MHz max efficiency frequency
-35 * 100.0 = 3500.0 MHz base frequency
-cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
-cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
-37 * 100.0 = 3700.0 MHz max turbo 4 active cores
-38 * 100.0 = 3800.0 MHz max turbo 3 active cores
-39 * 100.0 = 3900.0 MHz max turbo 2 active cores
-39 * 100.0 = 3900.0 MHz max turbo 1 active cores
-cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
-cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
-cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
-cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
-cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
-cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu4: POLL: CPUIDLE CORE POLL IDLE
-cpu4: C1: MWAIT 0x00
-cpu4: C1E: MWAIT 0x01
-cpu4: C3: MWAIT 0x10
-cpu4: C6: MWAIT 0x20
-cpu4: C7s: MWAIT 0x32
-cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
-cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
-cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
-cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
+31 * 100.0 = 3100.0 MHz base frequency
+cpu7: MSR_IA32_POWER_CTL: 0x002c005d (C1E auto-promotion: DISabled)
+cpu7: MSR_TURBO_RATIO_LIMIT: 0x2728292a
+39 * 100.0 = 3900.0 MHz max turbo 4 active cores
+40 * 100.0 = 4000.0 MHz max turbo 3 active cores
+41 * 100.0 = 4100.0 MHz max turbo 2 active cores
+42 * 100.0 = 4200.0 MHz max turbo 1 active cores
+cpu7: MSR_CONFIG_TDP_NOMINAL: 0x0000001f (base_ratio=31)
+cpu7: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu7: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu7: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu7: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu7: MSR_PKG_CST_CONFIG_CONTROL: 0x1e008008 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked, pkg-cstate-limit=8 (unlimited))
+Uncore Frequency pkg0 die0: 800 - 3900 MHz (800 - 3900 MHz)
+/dev/cpu_dma_latency: 2000000000 usec (default)
+current_driver: intel_idle
+current_governor: menu
+current_governor_ro: menu
+cpu7: POLL: CPUIDLE CORE POLL IDLE
+cpu7: C1: MWAIT 0x00
+cpu7: C1E: MWAIT 0x01
+cpu7: C3: MWAIT 0x10
+cpu7: C6: MWAIT 0x20
+cpu7: C7s: MWAIT 0x33
+cpu7: C8: MWAIT 0x40
+cpu7: C9: MWAIT 0x50
+cpu7: C10: MWAIT 0x60
+cpu7: cpufreq driver: intel_pstate
+cpu7: cpufreq governor: performance
+cpufreq intel_pstate no_turbo: 0
+cpu7: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
+cpu0: MSR_PM_ENABLE: 0x00000001 (HWP)
+cpu0: MSR_HWP_CAPABILITIES: 0x01101f53 (high 83 guar 31 eff 16 low 1)
+cpu0: MSR_HWP_REQUEST: 0x00005353 (min 83 max 83 des 0 epp 0x0 window 0x0 pkg 0x0)
+cpu0: MSR_HWP_INTERRUPT: 0x00000001 (EN_Guaranteed_Perf_Change, Dis_Excursion_Min)
+cpu0: MSR_HWP_STATUS: 0x00000004 (No-Guaranteed_Perf_Change, No-Excursion_Min)
+cpu0: EPB: 6 (balanced)
cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
-cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
-cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
-cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
-cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
+cpu0: MSR_PKG_POWER_INFO: 0x00000168 (45 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x42820800218208 (UNlocked)
+cpu0: PKG Limit #1: ENabled (65.000 Watts, 64.000000 sec, clamp ENabled)
+cpu0: PKG Limit #2: ENabled (65.000 Watts, 0.002441* sec, clamp DISabled)
+cpu0: MSR_VR_CURRENT_CONFIG: 0x00000000
+cpu0: PKG Limit #4: 0.000000 Watts (UNlocked)
+cpu0: MSR_DRAM_POWER_LIMIT: 0x5400de00000000 (UNlocked)
+cpu0: DRAM Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_PP0_POLICY: 0
cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
-cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: Cores Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
cpu0: MSR_PP1_POLICY: 0
cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
-cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
-cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
-cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
-cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
-cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
+cpu0: GFX Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00640000 (100 C) (100 default - 0 offset)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88200800 (68 C)
+cpu0: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x00000003 (100 C, 100 C)
+cpu7: MSR_PKGC3_IRTL: 0x0000884e (valid, 79872 ns)
+cpu7: MSR_PKGC6_IRTL: 0x00008876 (valid, 120832 ns)
+cpu7: MSR_PKGC7_IRTL: 0x00008894 (valid, 151552 ns)
+cpu7: MSR_PKGC8_IRTL: 0x000088fa (valid, 256000 ns)
+cpu7: MSR_PKGC9_IRTL: 0x0000894c (valid, 339968 ns)
+cpu7: MSR_PKGC10_IRTL: 0x00008bf2 (valid, 1034240 ns)
.fi
+.PP
The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
available at the minimum package voltage. The \fBTSC frequency\fP is the base
frequency of the processor -- this should match the brand string
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ede31a4287a0..831dc32d45fa 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -126,6 +126,7 @@ struct msr_counter bic[] = {
{ 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -183,10 +184,11 @@ struct msr_counter bic[] = {
#define BIC_GFXACTMHz (1ULL << 51)
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
+#define BIC_UNCORE_MHZ (1ULL << 54)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
@@ -228,6 +230,7 @@ unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
+unsigned int is_hybrid;
unsigned int do_irtl_snb;
unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
@@ -393,6 +396,7 @@ struct pkg_data {
unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
+ unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *package_even, *package_odd;
@@ -988,6 +992,9 @@ void print_header(char *delim)
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
}
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
+
for (mp = sys.pp; mp; mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 64)
@@ -1370,6 +1377,9 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ /* UncMHz */
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
@@ -1471,6 +1481,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
else
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
+ old->uncore_mhz = new->uncore_mhz;
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
@@ -1689,6 +1700,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
+ p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
@@ -1788,6 +1800,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.energy_gfx += p->energy_gfx;
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
+ average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
@@ -1948,6 +1961,16 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
return 0;
}
+unsigned long long get_uncore_mhz(int package, int die)
+{
+ char path[128];
+
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ die);
+
+ return (snapshot_sysfs_counter(path) / 1000);
+}
+
int get_epb(int cpu)
{
char path[128 + PATH_BYTES];
@@ -2035,9 +2058,9 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
if (!fp)
return -1;
ret = fscanf(fp, "%lld", &tmp);
+ fclose(fp);
if (ret != 1)
return -1;
- fclose(fp);
*cnt = tmp;
return 0;
@@ -2297,6 +2320,10 @@ retry:
if (DO_BIC(BIC_GFX_rc6))
p->gfx_rc6_ms = gfx_cur_rc6_ms;
+ /* n.b. assume die0 uncore frequency applies to whole package */
+ if (DO_BIC(BIC_UNCORE_MHZ))
+ p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+
if (DO_BIC(BIC_GFXMHz))
p->gfx_mhz = gfx_cur_mhz;
@@ -2494,6 +2521,7 @@ int has_turbo_ratio_group_limits(int family, int model)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
@@ -2502,13 +2530,14 @@ int has_turbo_ratio_group_limits(int family, int model)
}
}
-static void dump_turbo_ratio_limits(int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
{
unsigned long long msr, core_counts;
- unsigned int ratio, group_size;
+ int shift;
- get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+ get_msr(base_cpu, trl_msr_offset, &msr);
+ fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
+ base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr);
if (has_turbo_ratio_group_limits(family, model)) {
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
@@ -2517,53 +2546,16 @@ static void dump_turbo_ratio_limits(int family, int model)
core_counts = 0x0807060504030201;
}
- ratio = (msr >> 56) & 0xFF;
- group_size = (core_counts >> 56) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
+ for (shift = 56; shift >= 0; shift -= 8) {
+ unsigned int ratio, group_size;
- ratio = (msr >> 48) & 0xFF;
- group_size = (core_counts >> 48) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 40) & 0xFF;
- group_size = (core_counts >> 40) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 32) & 0xFF;
- group_size = (core_counts >> 32) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 24) & 0xFF;
- group_size = (core_counts >> 24) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 16) & 0xFF;
- group_size = (core_counts >> 16) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
-
- ratio = (msr >> 8) & 0xFF;
- group_size = (core_counts >> 8) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
+ ratio = (msr >> shift) & 0xFF;
+ group_size = (core_counts >> shift) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+ }
- ratio = (msr >> 0) & 0xFF;
- group_size = (core_counts >> 0) & 0xFF;
- if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, group_size);
return;
}
@@ -2976,7 +2968,7 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
}
}
}
- } while (!strncmp(&character, ",", 1));
+ } while (character == ',');
fclose(filep);
return CPU_COUNT_S(size, thiscpu->put_ids);
@@ -3742,6 +3734,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
has_misc_feature_control = 1;
break;
case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
pkg_cstate_limits = skx_pkg_cstate_limits;
has_misc_feature_control = 1;
break;
@@ -3871,6 +3864,22 @@ int is_icx(unsigned int family, unsigned int model)
return 0;
}
+int is_spr(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ return 1;
+ }
+ return 0;
+}
+
int is_ehl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
@@ -3988,6 +3997,7 @@ int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
return 1;
default:
return 0;
@@ -4015,7 +4025,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
-
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
return 1;
default:
@@ -4083,8 +4093,12 @@ static void dump_cstate_pstate_config_info(unsigned int family, unsigned int mod
if (has_ivt_turbo_ratio_limit(family, model))
dump_ivt_turbo_ratio_limits();
- if (has_turbo_ratio_limit(family, model))
- dump_turbo_ratio_limits(family, model);
+ if (has_turbo_ratio_limit(family, model)) {
+ dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
+
+ if (is_hybrid)
+ dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
+ }
if (has_atom_turbo_ratio_limit(family, model))
dump_atom_turbo_ratio_limits();
@@ -4098,6 +4112,24 @@ static void dump_cstate_pstate_config_info(unsigned int family, unsigned int mod
dump_nhm_cst_cfg();
}
+static int read_sysfs_int(char *path)
+{
+ FILE *input;
+ int retval = -1;
+
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return (-1);
+ }
+ if (fscanf(input, "%d", &retval) != 1)
+ err(1, "%s: failed to read int from file", path);
+ fclose(input);
+
+ return (retval);
+}
+
static void dump_sysfs_file(char *path)
{
FILE *input;
@@ -4116,6 +4148,48 @@ static void dump_sysfs_file(char *path)
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
+static void intel_uncore_frequency_probe(void)
+{
+ int i, j;
+ char path[128];
+
+ if (!genuine_intel)
+ return;
+
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
+ return;
+
+ if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ BIC_PRESENT(BIC_UNCORE_MHZ);
+
+ if (quiet)
+ return;
+
+ for (i = 0; i < topo.num_packages; ++i) {
+ for (j = 0; j < topo.num_die; ++j) {
+ int k, l;
+
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
+ i, j);
+ k = read_sysfs_int(path);
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
+ i, j);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+
+ sprintf(path,
+ "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
+ i, j);
+ k = read_sysfs_int(path);
+ sprintf(path,
+ "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
+ i, j);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ }
+ }
+}
+
static void dump_sysfs_cstate_config(void)
{
char path[64];
@@ -4486,6 +4560,7 @@ static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
return (rapl_energy_units);
@@ -4575,6 +4650,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
case INTEL_FAM6_BROADWELL_X: /* BDX */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
do_rapl =
RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
@@ -4740,13 +4816,19 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
{
- if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
+ if (family != 6)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SKYLAKE_X:
has_automatic_cstate_conversion = 1;
+ }
}
void prewake_cstate_probe(unsigned int family, unsigned int model)
{
- if (is_icx(family, model))
+ if (is_icx(family, model) || is_spr(family, model))
dis_cstate_prewake = 1;
}
@@ -4975,6 +5057,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
@@ -5361,13 +5444,15 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_LAKEFIELD:
case INTEL_FAM6_ALDERLAKE:
case INTEL_FAM6_ALDERLAKE_L:
+ case INTEL_FAM6_ALDERLAKE_N:
+ case INTEL_FAM6_RAPTORLAKE:
+ case INTEL_FAM6_RAPTORLAKE_P:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_L:
return INTEL_FAM6_ATOM_TREMONT;
case INTEL_FAM6_ICELAKE_D:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
return INTEL_FAM6_ICELAKE_X;
}
return model;
@@ -5398,7 +5483,7 @@ void print_dev_latency(void)
}
/*
- * Linux-perf manages the the HW instructions-retired counter
+ * Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
@@ -5543,7 +5628,10 @@ void process_cpuid()
__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
has_sgx = ebx & (1 << 2);
- fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
+
+ is_hybrid = edx & (1 << 15);
+
+ fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
if (has_sgx)
decode_feature_control_msr();
@@ -5654,7 +5742,7 @@ void process_cpuid()
BIC_NOT_PRESENT(BIC_Pkgpc7);
use_c1_residency_msr = 1;
}
- if (is_skx(family, model) || is_icx(family, model)) {
+ if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
BIC_NOT_PRESENT(BIC_CPU_c3);
BIC_NOT_PRESENT(BIC_Pkgpc3);
BIC_NOT_PRESENT(BIC_CPU_c7);
@@ -5699,6 +5787,7 @@ void process_cpuid()
if (!quiet)
dump_cstate_pstate_config_info(family, model);
+ intel_uncore_frequency_probe();
if (!quiet)
print_dev_latency();
@@ -6128,7 +6217,30 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2022.07.28 - Len Brown <lenb@kernel.org>\n");
+}
+
+#define COMMAND_LINE_SIZE 2048
+
+void print_bootcmd(void)
+{
+ char bootcmd[COMMAND_LINE_SIZE];
+ FILE *fp;
+ int ret;
+
+ memset(bootcmd, 0, COMMAND_LINE_SIZE);
+ fp = fopen("/proc/cmdline", "r");
+ if (!fp)
+ return;
+
+ ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
+ if (ret) {
+ bootcmd[ret] = '\0';
+ /* the last character is already '\n' */
+ fprintf(outf, "Kernel command line: %s", bootcmd);
+ }
+
+ fclose(fp);
}
int add_counter(unsigned int msr_num, char *path, char *name,
@@ -6602,8 +6714,10 @@ int main(int argc, char **argv)
outf = stderr;
cmdline(argc, argv);
- if (!quiet)
+ if (!quiet) {
print_version();
+ print_bootcmd();
+ }
probe_sysfs();
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 83844f8b862a..b0ca44c70e83 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -417,6 +417,7 @@ int main(int argc, char *argv[])
{
int ret = 0;
int fd;
+ uint32_t request;
parse_opts(argc, argv);
@@ -430,13 +431,23 @@ int main(int argc, char *argv[])
/*
* spi mode
*/
+ /* WR is make a request to assign 'mode' */
+ request = mode;
ret = ioctl(fd, SPI_IOC_WR_MODE32, &mode);
if (ret == -1)
pabort("can't set spi mode");
+ /* RD is read what mode the device actually is in */
ret = ioctl(fd, SPI_IOC_RD_MODE32, &mode);
if (ret == -1)
pabort("can't get spi mode");
+ /* Drivers can reject some mode bits without returning an error.
+ * Read the current value to identify what mode it is in, and if it
+ * differs from the requested mode, warn the user.
+ */
+ if (request != mode)
+ printf("WARNING device does not support requested mode 0x%x\n",
+ request);
/*
* bits per word
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 71b306602368..616ed4019655 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -3,6 +3,6 @@
TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
-CFLAGS += -O2 -g -Wall -I../../../../usr/include/
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES)
include ../lib.mk
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 4158da0da2bb..2237d1aac801 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -82,8 +82,9 @@ static int next_cpu(int cpu)
return cpu;
}
-static void *migration_worker(void *ign)
+static void *migration_worker(void *__rseq_tid)
{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
cpu_set_t allowed_mask;
int r, i, cpu;
@@ -106,7 +107,7 @@ static void *migration_worker(void *ign)
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
@@ -231,7 +232,8 @@ int main(int argc, char *argv[])
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
- pthread_create(&migration_thread, NULL, migration_worker, 0);
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)gettid());
for (i = 0; !done; i++) {
vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index db05b3764b77..9a4b30bd3a9e 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -59,6 +59,7 @@ TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += io_uring_zerocopy_tx
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
new file mode 100644
index 000000000000..9d64c560a2d6
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/io_uring.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#define NOTIF_TAG 0xfffffffULL
+#define NONZC_TAG 0
+#define ZC_TAG 1
+
+enum {
+ MODE_NONZC = 0,
+ MODE_ZC = 1,
+ MODE_ZC_FIXED = 2,
+ MODE_MIXED = 3,
+};
+
+static bool cfg_flush = false;
+static bool cfg_cork = false;
+static int cfg_mode = MODE_ZC_FIXED;
+static int cfg_nr_reqs = 8;
+static int cfg_family = PF_UNSPEC;
+static int cfg_payload_len;
+static int cfg_port = 8000;
+static int cfg_runtime_ms = 4200;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
+
+struct io_sq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ unsigned *flags;
+ unsigned *array;
+};
+
+struct io_cq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ struct io_uring_cqe *cqes;
+};
+
+struct io_uring_sq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *kflags;
+ unsigned *kdropped;
+ unsigned *array;
+ struct io_uring_sqe *sqes;
+
+ unsigned sqe_head;
+ unsigned sqe_tail;
+
+ size_t ring_sz;
+};
+
+struct io_uring_cq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *koverflow;
+ struct io_uring_cqe *cqes;
+
+ size_t ring_sz;
+};
+
+struct io_uring {
+ struct io_uring_sq sq;
+ struct io_uring_cq cq;
+ int ring_fd;
+};
+
+#ifdef __alpha__
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 535
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 536
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 537
+# endif
+#else /* !__alpha__ */
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup 425
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter 426
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register 427
+# endif
+#endif
+
+#if defined(__x86_64) || defined(__i386__)
+#define read_barrier() __asm__ __volatile__("":::"memory")
+#define write_barrier() __asm__ __volatile__("":::"memory")
+#else
+
+#define read_barrier() __sync_synchronize()
+#define write_barrier() __sync_synchronize()
+#endif
+
+static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+ return syscall(__NR_io_uring_setup, entries, p);
+}
+
+static int io_uring_enter(int fd, unsigned int to_submit,
+ unsigned int min_complete,
+ unsigned int flags, sigset_t *sig)
+{
+ return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
+ flags, sig, _NSIG / 8);
+}
+
+static int io_uring_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned nr_iovecs)
+{
+ int ret;
+
+ ret = syscall(__NR_io_uring_register, ring->ring_fd,
+ IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
+ return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_register_notifications(struct io_uring *ring,
+ unsigned nr,
+ struct io_uring_notification_slot *slots)
+{
+ int ret;
+ struct io_uring_notification_register r = {
+ .nr_slots = nr,
+ .data = (unsigned long)slots,
+ };
+
+ ret = syscall(__NR_io_uring_register, ring->ring_fd,
+ IORING_REGISTER_NOTIFIERS, &r, sizeof(r));
+ return (ret < 0) ? -errno : ret;
+}
+
+static int io_uring_mmap(int fd, struct io_uring_params *p,
+ struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+ size_t size;
+ void *ptr;
+ int ret;
+
+ sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
+ ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+ if (ptr == MAP_FAILED)
+ return -errno;
+ sq->khead = ptr + p->sq_off.head;
+ sq->ktail = ptr + p->sq_off.tail;
+ sq->kring_mask = ptr + p->sq_off.ring_mask;
+ sq->kring_entries = ptr + p->sq_off.ring_entries;
+ sq->kflags = ptr + p->sq_off.flags;
+ sq->kdropped = ptr + p->sq_off.dropped;
+ sq->array = ptr + p->sq_off.array;
+
+ size = p->sq_entries * sizeof(struct io_uring_sqe);
+ sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+ if (sq->sqes == MAP_FAILED) {
+ ret = -errno;
+err:
+ munmap(sq->khead, sq->ring_sz);
+ return ret;
+ }
+
+ cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
+ ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+ if (ptr == MAP_FAILED) {
+ ret = -errno;
+ munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
+ goto err;
+ }
+ cq->khead = ptr + p->cq_off.head;
+ cq->ktail = ptr + p->cq_off.tail;
+ cq->kring_mask = ptr + p->cq_off.ring_mask;
+ cq->kring_entries = ptr + p->cq_off.ring_entries;
+ cq->koverflow = ptr + p->cq_off.overflow;
+ cq->cqes = ptr + p->cq_off.cqes;
+ return 0;
+}
+
+static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags)
+{
+ struct io_uring_params p;
+ int fd, ret;
+
+ memset(ring, 0, sizeof(*ring));
+ memset(&p, 0, sizeof(p));
+ p.flags = flags;
+
+ fd = io_uring_setup(entries, &p);
+ if (fd < 0)
+ return fd;
+ ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
+ if (!ret)
+ ring->ring_fd = fd;
+ else
+ close(fd);
+ return ret;
+}
+
+static int io_uring_submit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ const unsigned mask = *sq->kring_mask;
+ unsigned ktail, submitted, to_submit;
+ int ret;
+
+ read_barrier();
+ if (*sq->khead != *sq->ktail) {
+ submitted = *sq->kring_entries;
+ goto submit;
+ }
+ if (sq->sqe_head == sq->sqe_tail)
+ return 0;
+
+ ktail = *sq->ktail;
+ to_submit = sq->sqe_tail - sq->sqe_head;
+ for (submitted = 0; submitted < to_submit; submitted++) {
+ read_barrier();
+ sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
+ }
+ if (!submitted)
+ return 0;
+
+ if (*sq->ktail != ktail) {
+ write_barrier();
+ *sq->ktail = ktail;
+ write_barrier();
+ }
+submit:
+ ret = io_uring_enter(ring->ring_fd, submitted, 0,
+ IORING_ENTER_GETEVENTS, NULL);
+ return ret < 0 ? -errno : ret;
+}
+
+static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = (__u8) IORING_OP_SEND;
+ sqe->fd = sockfd;
+ sqe->addr = (unsigned long) buf;
+ sqe->len = len;
+ sqe->msg_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags,
+ unsigned slot_idx, unsigned zc_flags)
+{
+ io_uring_prep_send(sqe, sockfd, buf, len, flags);
+ sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF;
+ sqe->notification_idx = slot_idx;
+ sqe->ioprio = zc_flags;
+}
+
+static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+
+ if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
+ return NULL;
+ return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
+}
+
+static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
+{
+ struct io_uring_cq *cq = &ring->cq;
+ const unsigned mask = *cq->kring_mask;
+ unsigned head = *cq->khead;
+ int ret;
+
+ *cqe_ptr = NULL;
+ do {
+ read_barrier();
+ if (head != *cq->ktail) {
+ *cqe_ptr = &cq->cqes[head & mask];
+ break;
+ }
+ ret = io_uring_enter(ring->ring_fd, 0, 1,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0)
+ return -errno;
+ } while (1);
+
+ return 0;
+}
+
+static inline void io_uring_cqe_seen(struct io_uring *ring)
+{
+ *(&ring->cq)->khead += 1;
+ write_barrier();
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+ if (setsockopt(fd, level, optname, &val, sizeof(val)))
+ error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+ int fd;
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+
+ if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+ return fd;
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+ struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}};
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ unsigned long packets = 0, bytes = 0;
+ struct io_uring ring;
+ struct iovec iov;
+ uint64_t tstop;
+ int i, fd, ret;
+ int compl_cqes = 0;
+
+ fd = do_setup_tx(domain, type, protocol);
+
+ ret = io_uring_queue_init(512, &ring, 0);
+ if (ret)
+ error(1, ret, "io_uring: queue init");
+
+ ret = io_uring_register_notifications(&ring, 1, b);
+ if (ret)
+ error(1, ret, "io_uring: tx ctx registration");
+
+ iov.iov_base = payload;
+ iov.iov_len = cfg_payload_len;
+
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret)
+ error(1, ret, "io_uring: buffer registration");
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ unsigned zc_flags = 0;
+ unsigned buf_idx = 0;
+ unsigned slot_idx = 0;
+ unsigned mode = cfg_mode;
+ unsigned msg_flags = 0;
+
+ if (cfg_mode == MODE_MIXED)
+ mode = rand() % 3;
+
+ sqe = io_uring_get_sqe(&ring);
+
+ if (mode == MODE_NONZC) {
+ io_uring_prep_send(sqe, fd, payload,
+ cfg_payload_len, msg_flags);
+ sqe->user_data = NONZC_TAG;
+ } else {
+ if (cfg_flush) {
+ zc_flags |= IORING_RECVSEND_NOTIF_FLUSH;
+ compl_cqes++;
+ }
+ io_uring_prep_sendzc(sqe, fd, payload,
+ cfg_payload_len,
+ msg_flags, slot_idx, zc_flags);
+ if (mode == MODE_ZC_FIXED) {
+ sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+ sqe->buf_index = buf_idx;
+ }
+ sqe->user_data = ZC_TAG;
+ }
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret != cfg_nr_reqs)
+ error(1, ret, "submit");
+
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+
+ if (cqe->user_data == NOTIF_TAG) {
+ compl_cqes--;
+ i--;
+ } else if (cqe->user_data != NONZC_TAG &&
+ cqe->user_data != ZC_TAG) {
+ error(1, cqe->res, "invalid user_data");
+ } else if (cqe->res <= 0 && cqe->res != -EAGAIN) {
+ error(1, cqe->res, "send failed");
+ } else {
+ if (cqe->res > 0) {
+ packets++;
+ bytes += cqe->res;
+ }
+ /* failed requests don't flush */
+ if (cfg_flush &&
+ cqe->res <= 0 &&
+ cqe->user_data == ZC_TAG)
+ compl_cqes--;
+ }
+ io_uring_cqe_seen(&ring);
+ }
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+ } while (gettimeofday_ms() < tstop);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+ packets, bytes >> 20,
+ packets / (cfg_runtime_ms / 1000),
+ (bytes >> 20) / (cfg_runtime_ms / 1000));
+
+ while (compl_cqes) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+ io_uring_cqe_seen(&ring);
+ compl_cqes--;
+ }
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+ int i;
+
+ for (i = 0; i < IP_MAXPACKET; i++)
+ payload[i] = 'a' + (i % 26);
+ do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] "
+ "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
+ struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
+ char *daddr = NULL;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 't':
+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'n':
+ cfg_nr_reqs = strtoul(optarg, NULL, 0);
+ break;
+ case 'f':
+ cfg_flush = 1;
+ break;
+ case 'c':
+ cfg_cork = strtol(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mode = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ switch (cfg_family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", daddr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", daddr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+ if (cfg_mode == MODE_NONZC && cfg_flush)
+ error(1, 0, "-f: only zerocopy modes support notifications");
+ if (optind != argc - 1)
+ usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+
+ parse_opts(argc, argv);
+
+ if (!strcmp(cfg_test, "tcp"))
+ do_test(cfg_family, SOCK_STREAM, 0);
+ else if (!strcmp(cfg_test, "udp"))
+ do_test(cfg_family, SOCK_DGRAM, 0);
+ else
+ error(1, 0, "unknown cfg_test %s", cfg_test);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
new file mode 100755
index 000000000000..6a65e4437640
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN_TX="./io_uring_zerocopy_tx"
+readonly BIN_RX="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ IPs=( "4" "6" )
+ protocols=( "tcp" "udp" )
+
+ for IP in "${IPs[@]}"; do
+ for proto in "${protocols[@]}"; do
+ for mode in $(seq 1 3); do
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f
+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f
+ done
+ done
+ done
+
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+ exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+ readonly SADDR="${SADDR4}"
+ readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+ readonly SADDR="${SADDR6}"
+ readonly DADDR="${DADDR6}"
+else
+ echo "Invalid IP version ${IP}"
+ exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet: use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+ RXMODE='raw'
+ ;;
+*)
+ RXMODE="${TXMODE}"
+ ;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+ sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+ local readonly ARGS="$1"
+
+ echo "ipv${IP} ${TXMODE} ${ARGS}"
+ ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" &
+ sleep 0.2
+ ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}"
+ wait
+}
+
+do_test "${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index b86642f90d7f..3a391c9bf468 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -86,7 +86,7 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
+ "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
__rseq_str(label) ":\n"
@@ -103,17 +103,17 @@ do { \
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
- "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
- "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
__rseq_str(label) "\n"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
@@ -127,12 +127,12 @@ do { \
REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
- "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
+ "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
RSEQ_ASM_TMP_REG_1 "\n" \
REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
#define RSEQ_ASM_OP_R_ADD(count) \
- "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
", %[" __rseq_str(count) "]\n"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
@@ -194,8 +194,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -251,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expectnot] "r" (expectnot),
[load] "m" (*load),
@@ -301,8 +301,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[count] "r" (count)
RSEQ_INJECT_INPUT
@@ -352,8 +352,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -411,8 +411,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -472,8 +472,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[v] "m" (*v),
[expect] "r" (expect),
[v2] "m" (*v2),
@@ -532,8 +532,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -593,8 +593,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[expect] "r" (expect),
[v] "m" (*v),
[newv] "r" (newv),
@@ -651,8 +651,8 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [current_cpu_id] "m" (rseq_get_abi()->cpu_id),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[ptr] "r" (ptr),
[off] "er" (off),
[inc] "er" (inc)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 986b9458efb2..4177f9507bbe 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -111,7 +111,8 @@ void rseq_init(void)
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
- if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
rseq_offset = *libc_rseq_offset_p;
rseq_size = *libc_rseq_size_p;
diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile
index fa02c4d5ec13..e815bbf2d0f4 100644
--- a/tools/testing/selftests/safesetid/Makefile
+++ b/tools/testing/selftests/safesetid/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for mount selftests.
+# Makefile for SafeSetID selftest.
CFLAGS = -Wall -O2
LDLIBS = -lcap
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 4b809c93ba36..eb9bf0aee951 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <errno.h>
#include <pwd.h>
+#include <grp.h>
#include <string.h>
#include <syscall.h>
#include <sys/capability.h>
@@ -16,17 +17,28 @@
#include <stdbool.h>
#include <stdarg.h>
+/*
+ * NOTES about this test:
+ * - requries libcap-dev to be installed on test system
+ * - requires securityfs to me mounted at /sys/kernel/security, e.g.:
+ * mount -n -t securityfs -o nodev,noexec,nosuid securityfs /sys/kernel/security
+ * - needs CONFIG_SECURITYFS and CONFIG_SAFESETID to be enabled
+ */
+
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
-#define ROOT_USER 0
-#define RESTRICTED_PARENT 1
-#define ALLOWED_CHILD1 2
-#define ALLOWED_CHILD2 3
-#define NO_POLICY_USER 4
+#define ROOT_UGID 0
+#define RESTRICTED_PARENT_UGID 1
+#define ALLOWED_CHILD1_UGID 2
+#define ALLOWED_CHILD2_UGID 3
+#define NO_POLICY_UGID 4
+
+#define UGID_POLICY_STRING "1:2\n1:3\n2:2\n3:3\n"
-char* add_whitelist_policy_file = "/sys/kernel/security/safesetid/add_whitelist_policy";
+char* add_uid_whitelist_policy_file = "/sys/kernel/security/safesetid/uid_allowlist_policy";
+char* add_gid_whitelist_policy_file = "/sys/kernel/security/safesetid/gid_allowlist_policy";
static void die(char *fmt, ...)
{
@@ -106,9 +118,10 @@ static void ensure_user_exists(uid_t uid)
die("couldn't open file\n");
if (fseek(fd, 0, SEEK_END))
die("couldn't fseek\n");
- snprintf(name_str, 10, "%d", uid);
+ snprintf(name_str, 10, "user %d", uid);
p.pw_name=name_str;
p.pw_uid=uid;
+ p.pw_gid=uid;
p.pw_gecos="Test account";
p.pw_dir="/dev/null";
p.pw_shell="/bin/false";
@@ -120,9 +133,36 @@ static void ensure_user_exists(uid_t uid)
}
}
+static void ensure_group_exists(gid_t gid)
+{
+ struct group g;
+
+ FILE *fd;
+ char name_str[10];
+
+ if (getgrgid(gid) == NULL) {
+ memset(&g,0x00,sizeof(g));
+ fd=fopen("/etc/group","a");
+ if (fd == NULL)
+ die("couldn't open group file\n");
+ if (fseek(fd, 0, SEEK_END))
+ die("couldn't fseek group file\n");
+ snprintf(name_str, 10, "group %d", gid);
+ g.gr_name=name_str;
+ g.gr_gid=gid;
+ g.gr_passwd=NULL;
+ g.gr_mem=NULL;
+ int value = putgrent(&g,fd);
+ if (value != 0)
+ die("putgrent failed\n");
+ if (fclose(fd))
+ die("fclose failed\n");
+ }
+}
+
static void ensure_securityfs_mounted(void)
{
- int fd = open(add_whitelist_policy_file, O_WRONLY);
+ int fd = open(add_uid_whitelist_policy_file, O_WRONLY);
if (fd < 0) {
if (errno == ENOENT) {
// Need to mount securityfs
@@ -135,39 +175,60 @@ static void ensure_securityfs_mounted(void)
} else {
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
+ }
+}
+
+static void write_uid_policies()
+{
+ static char *policy_str = UGID_POLICY_STRING;
+ ssize_t written;
+ int fd;
+
+ fd = open(add_uid_whitelist_policy_file, O_WRONLY);
+ if (fd < 0)
+ die("can't open add_uid_whitelist_policy file\n");
+ written = write(fd, policy_str, strlen(policy_str));
+ if (written != strlen(policy_str)) {
+ if (written >= 0) {
+ die("short write to %s\n", add_uid_whitelist_policy_file);
+ } else {
+ die("write to %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
}
}
+ if (close(fd) != 0) {
+ die("close of %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
}
-static void write_policies(void)
+static void write_gid_policies()
{
- static char *policy_str =
- "1:2\n"
- "1:3\n"
- "2:2\n"
- "3:3\n";
+ static char *policy_str = UGID_POLICY_STRING;
ssize_t written;
int fd;
- fd = open(add_whitelist_policy_file, O_WRONLY);
+ fd = open(add_gid_whitelist_policy_file, O_WRONLY);
if (fd < 0)
- die("can't open add_whitelist_policy file\n");
+ die("can't open add_gid_whitelist_policy file\n");
written = write(fd, policy_str, strlen(policy_str));
if (written != strlen(policy_str)) {
if (written >= 0) {
- die("short write to %s\n", add_whitelist_policy_file);
+ die("short write to %s\n", add_gid_whitelist_policy_file);
} else {
die("write to %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
+
static bool test_userns(bool expect_success)
{
uid_t uid;
@@ -194,7 +255,7 @@ static bool test_userns(bool expect_success)
printf("preparing file name string failed");
return false;
}
- success = write_file(map_file_name, "0 0 1", uid);
+ success = write_file(map_file_name, "0 %d 1", uid);
return success == expect_success;
}
@@ -258,13 +319,144 @@ static void test_setuid(uid_t child_uid, bool expect_success)
die("should not reach here\n");
}
+static void test_setgid(gid_t child_gid, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgid(child_gid) < 0)
+ exit(EXIT_FAILURE);
+ if (getgid() == child_gid)
+ exit(EXIT_SUCCESS);
+ else
+ exit(EXIT_FAILURE);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+static void test_setgroups(gid_t* child_groups, size_t len, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+ gid_t groupset[len];
+ int i, j;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgroups(len, child_groups) != 0)
+ exit(EXIT_FAILURE);
+ if (getgroups(len, groupset) != len)
+ exit(EXIT_FAILURE);
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < len; j++) {
+ if (child_groups[i] == groupset[j])
+ break;
+ if (j == len - 1)
+ exit(EXIT_FAILURE);
+ }
+ }
+ exit(EXIT_SUCCESS);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+
static void ensure_users_exist(void)
{
- ensure_user_exists(ROOT_USER);
- ensure_user_exists(RESTRICTED_PARENT);
- ensure_user_exists(ALLOWED_CHILD1);
- ensure_user_exists(ALLOWED_CHILD2);
- ensure_user_exists(NO_POLICY_USER);
+ ensure_user_exists(ROOT_UGID);
+ ensure_user_exists(RESTRICTED_PARENT_UGID);
+ ensure_user_exists(ALLOWED_CHILD1_UGID);
+ ensure_user_exists(ALLOWED_CHILD2_UGID);
+ ensure_user_exists(NO_POLICY_UGID);
+}
+
+static void ensure_groups_exist(void)
+{
+ ensure_group_exists(ROOT_UGID);
+ ensure_group_exists(RESTRICTED_PARENT_UGID);
+ ensure_group_exists(ALLOWED_CHILD1_UGID);
+ ensure_group_exists(ALLOWED_CHILD2_UGID);
+ ensure_group_exists(NO_POLICY_UGID);
}
static void drop_caps(bool setid_retained)
@@ -283,41 +475,52 @@ static void drop_caps(bool setid_retained)
int main(int argc, char **argv)
{
+ ensure_groups_exist();
ensure_users_exist();
ensure_securityfs_mounted();
- write_policies();
+ write_uid_policies();
+ write_gid_policies();
if (prctl(PR_SET_KEEPCAPS, 1L))
die("Error with set keepcaps\n");
- // First test to make sure we can write userns mappings from a user
- // that doesn't have any restrictions (as long as it has CAP_SETUID);
- if (setuid(NO_POLICY_USER) < 0)
- die("Error with set uid(%d)\n", NO_POLICY_USER);
- if (setgid(NO_POLICY_USER) < 0)
- die("Error with set gid(%d)\n", NO_POLICY_USER);
-
+ // First test to make sure we can write userns mappings from a non-root
+ // user that doesn't have any restrictions (as long as it has
+ // CAP_SETUID);
+ if (setgid(NO_POLICY_UGID) < 0)
+ die("Error with set gid(%d)\n", NO_POLICY_UGID);
+ if (setuid(NO_POLICY_UGID) < 0)
+ die("Error with set uid(%d)\n", NO_POLICY_UGID);
// Take away all but setid caps
drop_caps(true);
-
// Need PR_SET_DUMPABLE flag set so we can write /proc/[pid]/uid_map
// from non-root parent process.
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
die("Error with set dumpable\n");
-
if (!test_userns(true)) {
die("test_userns failed when it should work\n");
}
- if (setuid(RESTRICTED_PARENT) < 0)
- die("Error with set uid(%d)\n", RESTRICTED_PARENT);
- if (setgid(RESTRICTED_PARENT) < 0)
- die("Error with set gid(%d)\n", RESTRICTED_PARENT);
+ // Now switch to a user/group with restrictions
+ if (setgid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set gid(%d)\n", RESTRICTED_PARENT_UGID);
+ if (setuid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set uid(%d)\n", RESTRICTED_PARENT_UGID);
+
+ test_setuid(ROOT_UGID, false);
+ test_setuid(ALLOWED_CHILD1_UGID, true);
+ test_setuid(ALLOWED_CHILD2_UGID, true);
+ test_setuid(NO_POLICY_UGID, false);
+
+ test_setgid(ROOT_UGID, false);
+ test_setgid(ALLOWED_CHILD1_UGID, true);
+ test_setgid(ALLOWED_CHILD2_UGID, true);
+ test_setgid(NO_POLICY_UGID, false);
- test_setuid(ROOT_USER, false);
- test_setuid(ALLOWED_CHILD1, true);
- test_setuid(ALLOWED_CHILD2, true);
- test_setuid(NO_POLICY_USER, false);
+ gid_t allowed_supp_groups[2] = {ALLOWED_CHILD1_UGID, ALLOWED_CHILD2_UGID};
+ gid_t disallowed_supp_groups[2] = {ROOT_UGID, NO_POLICY_UGID};
+ test_setgroups(allowed_supp_groups, 2, true);
+ test_setgroups(disallowed_supp_groups, 2, false);
if (!test_userns(false)) {
die("test_userns worked when it should fail\n");
@@ -328,8 +531,12 @@ int main(int argc, char **argv)
test_setuid(2, false);
test_setuid(3, false);
test_setuid(4, false);
+ test_setgid(2, false);
+ test_setgid(3, false);
+ test_setgid(4, false);
// NOTE: this test doesn't clean up users that were created in
// /etc/passwd or flush policies that were added to the LSM.
+ printf("test successful!\n");
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 136df5b76319..4ae6c8991307 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -809,7 +809,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
- int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index 3a5936cc10ab..f0d51d4d2c87 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
new file mode 100644
index 000000000000..e6ccd900f30a
--- /dev/null
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define OFFSET (36000)
+
+int main(int argc, char *argv[])
+{
+ struct timespec now, tst;
+ int status, i;
+ pid_t pid;
+
+ if (argc > 1) {
+ if (sscanf(argv[1], "%ld", &now.tv_sec) != 1)
+ return pr_perror("sscanf");
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
+ }
+ return 0;
+ }
+
+ nscheck();
+
+ ksft_set_plan(1);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, OFFSET))
+ return 1;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec, tst.tv_sec);
+ }
+
+ pid = vfork();
+ if (pid < 0)
+ return pr_perror("fork");
+
+ if (pid == 0) {
+ char now_str[64];
+ char *cargv[] = {"exec", now_str, NULL};
+ char *cenv[] = {NULL};
+
+ // Check that we are still in the source timens.
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now.tv_sec) > 5)
+ return pr_fail("%ld %ld\n",
+ now.tv_sec, tst.tv_sec);
+ }
+
+ /* Check for proper vvar offsets after execve. */
+ snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET);
+ execve("/proc/self/exe", cargv, cenv);
+ return pr_perror("execve");
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("waitpid");
+
+ if (status)
+ ksft_exit_fail();
+
+ ksft_test_result_pass("exec\n");
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/thermal/tmon/pid.c b/tools/thermal/tmon/pid.c
index 296f69c00c57..da20088285bd 100644
--- a/tools/thermal/tmon/pid.c
+++ b/tools/thermal/tmon/pid.c
@@ -27,7 +27,7 @@
/**************************************************************************
* PID (Proportional-Integral-Derivative) controller is commonly used in
- * linear control system, consider the the process.
+ * linear control system, consider the process.
* G(s) = U(s)/E(s)
* kp = proportional gain
* ki = integral gain
diff --git a/tools/thermal/tmon/tmon.h b/tools/thermal/tmon/tmon.h
index c9066ec104dd..44d16d778f04 100644
--- a/tools/thermal/tmon/tmon.h
+++ b/tools/thermal/tmon/tmon.h
@@ -27,6 +27,9 @@
#define NR_LINES_TZDATA 1
#define TMON_LOG_FILE "/var/tmp/tmon.log"
+#include <sys/time.h>
+#include <pthread.h>
+
extern unsigned long ticktime;
extern double time_elapsed;
extern unsigned long target_temp_user;
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 9b68658b6bb8..5b98f3ee58a5 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
return l;
}
+static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name)
+{
+ char x[128];
+ FILE *f;
+ size_t l;
+
+ snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
/*
* Put a size string together
@@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s)
{
printf("\n%s: Kernel object allocation\n", s->name);
printf("-----------------------------------------------------------------------\n");
- if (read_slab_obj(s, "alloc_calls"))
+ if (read_debug_slab_obj(s, "alloc_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "alloc_calls"))
printf("%s", buffer);
else
printf("No Data\n");
printf("\n%s: Kernel object freeing\n", s->name);
printf("------------------------------------------------------------------------\n");
- if (read_slab_obj(s, "free_calls"))
+ if (read_debug_slab_obj(s, "free_traces"))
+ printf("%s", buffer);
+ else if (read_slab_obj(s, "free_calls"))
printf("%s", buffer);
else
printf("No Data\n");