From 8eaf8ec3c09b88e35c1c3c761ac4188ee425aeb6 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 2 Feb 2023 17:56:15 +0530 Subject: perf session: Show branch speculation info in raw dump Show the branch speculation info if provided by the branch recording hardware feature. This can be useful for purposes of code optimization. E.g. $ perf record -j any,u ./test_branch $ perf report --dump-raw-trace Before: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] After: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 SPEC_CORRECT_PATH ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 NON_SPEC_CORRECT_PATH ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] With the addition of new branch flags, the "brstacksym" fields in perf script output now shows speculation information after the branch type. Change the regular expressions accordingly for the test to pass. Since branch speculation information may vary across platforms, the test does not look for specific values. E.g. $ perf test -v 110 Before: 110: Check branch stack sampling : --- start --- test child forked, pid 54154 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$ /tmp/__perf_test.program.AfhUI/perf.script + cleanup + rm -rf /tmp/__perf_test.program.AfhUI test child finished with -1 ---- end ---- Check branch stack sampling: FAILED! After: 110: Check branch stack sampling : --- start --- test child forked, pid 43716 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x66/brstack_foo+0x0/P/-/-/0/IND_CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x1b/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x58/brstack_foo+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x5d/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bar+0x31/brstack_foo+0x20/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x36/brstack_bench+0x5d/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x76/brstack_bench+0x7d/P/-/-/0/COND/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack+0x5a/brstack+0x41/P/-/-/0/UNCOND/NON_SPEC_CORRECT_PATH + set +x Testing branch stack filtering permutation (any_call,CALL|IND_CALL|COND_CALL|SYSCALL|IRQ) Testing branch stack filtering permutation (call,CALL|SYSCALL) Testing branch stack filtering permutation (cond,COND) Testing branch stack filtering permutation (any_ret,RET|COND_RET|SYSRET|ERET) Testing branch stack filtering permutation (call,cond,CALL|SYSCALL|COND) Testing branch stack filtering permutation (any_call,cond,CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND) Testing branch stack filtering permutation (cond,any_call,any_ret,COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET) test child finished with 0 ---- end ---- Check branch stack sampling: Ok Signed-off-by: Sandipan Das Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Borislav Petkov Cc: Dave Hansen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Thomas Richter Cc: x86@kernel.org Link: https://lore.kernel.org/r/048d67c9de3cc8e3dbf19aaa7ff718dec91364c5.1675333809.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_brstack.sh | 18 +++++++++--------- tools/perf/util/session.c | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 59195eb80052..1c49d8293003 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -30,14 +30,14 @@ test_user_branches() { # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL set -x - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script - grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script set +x # some branch types are still not being tested: @@ -57,7 +57,7 @@ test_filter() { # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) - if grep -E -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then + if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then return 1 fi } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fdfe772f2699..749d5b5c135b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1180,7 +1180,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *e = &entries[i]; if (!callstack) { - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n", + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n", i, e->from, e->to, (unsigned short)e->flags.cycles, e->flags.mispred ? "M" : " ", @@ -1188,7 +1188,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", (unsigned)e->flags.reserved, - get_branch_type(e)); + get_branch_type(e), + e->flags.spec ? branch_spec_desc(e->flags.spec) : ""); } else { if (i == 0) { printf("..... %2"PRIu64": %016" PRIx64 "\n" -- cgit v1.2.3