From ea39b8ce32e90cfc75836505c1c8b2536c5200db Mon Sep 17 00:00:00 2001 From: Aaron Merey Date: Tue, 27 Aug 2019 15:45:36 -0400 Subject: [PATCH 1/9] Upstream patches for PR23879, PR24875,PR24904 --- PR23879-PR24875.patch | 42 ++++++++++++ PR24904-changes-wit.patch | 26 ++++++++ PR24904-changes.patch | 130 ++++++++++++++++++++++++++++++++++++++ systemtap.spec | 16 ++++- 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 PR23879-PR24875.patch create mode 100644 PR24904-changes-wit.patch create mode 100644 PR24904-changes.patch diff --git a/PR23879-PR24875.patch b/PR23879-PR24875.patch new file mode 100644 index 0000000..c024b7c --- /dev/null +++ b/PR23879-PR24875.patch @@ -0,0 +1,42 @@ +From 4ae4592f1106e941023a5768d34c2381cc869631 Mon Sep 17 00:00:00 2001 +From: "Frank Ch. Eigler" +Date: Wed, 21 Aug 2019 19:29:45 -0400 +Subject: [PATCH] PR23879, PR24875: fix task-finder-vma on f29+ + +It was reported & rediscovered that some vma-dependent runtime +facilities have been broken: @vma() and *ubacktrace(). It turns out +that modern gcc/ld.so links/loads binaries in slightly different ways +than older toolchains. Specifically, the first page of ELF files is +now loaded only r--p instead of r-xp protection flags. The +_stp_vma_mmap_cb() routine now accepts the r--p case too. It now +ignores the flags entirely. +--- + runtime/vma.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/runtime/vma.c b/runtime/vma.c +index 7021725d6..02f9bf849 100644 +--- a/runtime/vma.c ++++ b/runtime/vma.c +@@ -157,10 +157,15 @@ static int _stp_vma_mmap_cb(struct stap_task_finder_target *tgt, + dbug_task_vma(1, + "mmap_cb: tsk %d:%d path %s, addr 0x%08lx, length 0x%08lx, offset 0x%lx, flags 0x%lx\n", + tsk->pid, tsk->tgid, path, addr, length, offset, vm_flags); +- // We are only interested in the first load of the whole module that +- // is executable. We register whether or not we know the module, ++ ++ // We used to be only interested in the first load of the whole module that ++ // is executable. But with modern enough gcc/ld.so, executables are mapped ++ // in more small pieces (r--p,r-xp,rw-p, instead of r-xp, rw-p). To establish ++ // the virtual base address, we initially look for an offset=0 mapping. ++ // ++ // We register whether or not we know the module, + // so we can later lookup the name given an address for this task. +- if (path != NULL && offset == 0 && (vm_flags & VM_EXEC) ++ if (path != NULL && offset == 0 + && stap_find_vma_map_info(tsk, addr, NULL, NULL, NULL, NULL) != 0) { + for (i = 0; i < _stp_num_modules; i++) { + // PR20433: papering over possibility of NULL pointers +-- +2.21.0 + diff --git a/PR24904-changes-wit.patch b/PR24904-changes-wit.patch new file mode 100644 index 0000000..e2dc237 --- /dev/null +++ b/PR24904-changes-wit.patch @@ -0,0 +1,26 @@ +From f4f0da6db3be523472b25a219ea245fd6d3489f9 Mon Sep 17 00:00:00 2001 +From: "Frank Ch. Eigler" +Date: Tue, 20 Aug 2019 22:17:02 -0400 +Subject: [PATCH] PR24904: support linux 5.2's stacktrace.c changes with + -DDEBUG_UNWIND too + +--- + runtime/stack.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/runtime/stack.c b/runtime/stack.c +index bf59b2909..6ec7b2602 100644 +--- a/runtime/stack.c ++++ b/runtime/stack.c +@@ -56,7 +56,7 @@ _stp_init_stack(void) + { + stack_trace_save_regs_fn = (void*) kallsyms_lookup_name("stack_trace_save_regs"); + dbug_unwind(1, "stack_trace_saves_regs_fn=%lx for _stp_stack_print_fallback().\n", +- (unsigned long) save_trace_save_regs_fn); ++ (unsigned long) stack_trace_save_regs_fn); + return 0; + } + +-- +2.21.0 + diff --git a/PR24904-changes.patch b/PR24904-changes.patch new file mode 100644 index 0000000..5a9005d --- /dev/null +++ b/PR24904-changes.patch @@ -0,0 +1,130 @@ +From 49fa913a61e7f2941bb59c11d72a1aafa6930162 Mon Sep 17 00:00:00 2001 +From: "Frank Ch. Eigler" +Date: Tue, 20 Aug 2019 21:20:40 -0400 +Subject: [PATCH] PR24904: support linux 5.2's stacktrace.c changes + +The following kernel commit disabled the older struct stack_trace APIs +on architectures that support the newer stackwalk APIs. Provide an +adaptation layer to stack_trace_save_regs(). + +commit 214d8ca6ee854f696f75e75511fe66b409e656db +Author: Thomas Gleixner +Date: Thu Apr 25 11:45:21 2019 +0200 + + stacktrace: Provide common infrastructure +--- + buildrun.cxx | 2 + + .../linux/autoconf-stack-trace-save-regs.c | 8 ++++ + runtime/stack.c | 37 +++++++++++++++++-- + 3 files changed, 44 insertions(+), 3 deletions(-) + create mode 100644 runtime/linux/autoconf-stack-trace-save-regs.c + +diff --git a/buildrun.cxx b/buildrun.cxx +index 5e8d3b961..6ed744707 100644 +--- a/buildrun.cxx ++++ b/buildrun.cxx +@@ -485,6 +485,8 @@ compile_pass (systemtap_session& s) + output_autoconf(s, o, "autoconf-bio-bi_opf.c", "STAPCONF_BIO_BI_OPF", NULL); + output_autoconf(s, o, "autoconf-linux-sched_headers.c", + "STAPCONF_LINUX_SCHED_HEADERS", NULL); ++ output_autoconf(s, o, "autoconf-stack-trace-save-regs.c", ++ "STAPCONF_STACK_TRACE_SAVE_REGS", NULL); + + // used by runtime/linux/netfilter.c + output_exportconf(s, o, "nf_register_hook", "STAPCONF_NF_REGISTER_HOOK"); +diff --git a/runtime/linux/autoconf-stack-trace-save-regs.c b/runtime/linux/autoconf-stack-trace-save-regs.c +new file mode 100644 +index 000000000..8bf33391f +--- /dev/null ++++ b/runtime/linux/autoconf-stack-trace-save-regs.c +@@ -0,0 +1,8 @@ ++#include ++ ++unsigned int foo () ++{ ++ unsigned long e[10]; ++ struct pt_regs* r = 0; ++ return stack_trace_save_regs (r, & e[0], 10, 0); ++} +diff --git a/runtime/stack.c b/runtime/stack.c +index 0f649e8da..bf59b2909 100644 +--- a/runtime/stack.c ++++ b/runtime/stack.c +@@ -39,6 +39,7 @@ + #include "linux/uprobes-inc.h" + + #include ++ + #if defined(STAPCONF_KERNEL_STACKTRACE) || defined(STAPCONF_KERNEL_STACKTRACE_NO_BP) + #include + #endif +@@ -47,6 +48,20 @@ + #include + #endif + ++#if defined(STAPCONF_STACK_TRACE_SAVE_REGS) /* linux 5.2+ apprx. */ ++static __typeof__(stack_trace_save_regs) (*stack_trace_save_regs_fn); /* not exported */ ++ ++static int ++_stp_init_stack(void) ++{ ++ stack_trace_save_regs_fn = (void*) kallsyms_lookup_name("stack_trace_save_regs"); ++ dbug_unwind(1, "stack_trace_saves_regs_fn=%lx for _stp_stack_print_fallback().\n", ++ (unsigned long) save_trace_save_regs_fn); ++ return 0; ++} ++ ++#else /* ! STAPCONF_STACK_TRACE_SAVE_REGS */ ++ + static void (*(save_stack_trace_regs_fn))(struct pt_regs *regs, + struct stack_trace *trace); + +@@ -60,6 +75,10 @@ _stp_init_stack(void) + return 0; + } + ++#endif /* STAPCONF_STACK_TRACE_SAVE_REGS */ ++ ++ ++ + static void _stp_stack_print_fallback(unsigned long, struct pt_regs*, int, int, int); + + #ifdef STP_USE_DWARF_UNWINDER +@@ -168,9 +187,19 @@ static void _stp_stack_print_fallback(unsigned long sp, struct pt_regs *regs, + int sym_flags, + int levels, int skip) { + unsigned long entries[MAXBACKTRACE]; +- struct stack_trace trace; +- int i; ++ unsigned i; ++ unsigned num_entries; ++ ++#if defined(STAPCONF_STACK_TRACE_SAVE_REGS) /* linux 5.2+ apprx. */ ++ if (!stack_trace_save_regs_fn) { ++ dbug_unwind(1, "no fallback kernel stacktrace (giving up)\n"); ++ _stp_print_addr(0, sym_flags | _STP_SYM_INEXACT, NULL); ++ return; ++ } + ++ num_entries = (*stack_trace_save_regs_fn)(regs, &entries[0], MAXBACKTRACE, skip); ++#else ++ struct stack_trace trace; + /* If don't have save_stack_trace_regs unwinder, just give up. */ + if (!save_stack_trace_regs_fn) { + dbug_unwind(1, "no fallback kernel stacktrace (giving up)\n"); +@@ -189,9 +218,11 @@ static void _stp_stack_print_fallback(unsigned long sp, struct pt_regs *regs, + dbug_unwind(1, "trace.nr_entries: %d\n", trace.nr_entries); + dbug_unwind(1, "trace.max_entries: %d\n", trace.max_entries); + dbug_unwind(1, "trace.skip %d\n", trace.skip); ++ num_entries = trace.nr_entries; ++#endif + + /* save_stack_trace_reg() adds a ULONG_MAX after last valid entry. Ignore it. */ +- for (i=0; i - 4.2-0.20190827 +- PR23879,PR24875 - fix task finder vma on f29 +- PR24904 - support linux 5.2 stacktrace.c changes +- PR24904 - support linux 5.2 stacktrace.c changes with -DDEBUG_UNWIND too + * Tue May 07 2019 Serguei Makarov - 4.1-1 - Upstream release. From 2197f02370918bb6f4dd1011c37265afb5930fe2 Mon Sep 17 00:00:00 2001 From: Aaron Merey Date: Tue, 27 Aug 2019 17:16:01 -0400 Subject: [PATCH 2/9] systemtap.spec: increment version to 4.2, copy sources file from origin/f30 --- sources | 2 +- systemtap.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sources b/sources index 6498a8f..5c7361c 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (systemtap-4.2-0.20190807gitebfc300ec2ad.tar.gz) = 378d8ba68809318c0a962dbbc2f98034f2396b87dd8bef4e5d81e98cba1aa8dc53e7b56ec2e8cc9a87a14c0d16b2921aaca6dfd63fc871fee3bab070015d3482 +SHA512 (systemtap-4.1.tar.gz) = bc31a817fb58dcb95ad0715a9b3524c2206f08c36f6590c112088e813ef12b4398cbba9abc81dde1709e11a6b18556768d4317a52a4fc75c6812397450ea77b7 diff --git a/systemtap.spec b/systemtap.spec index 79ef6a6..eec916d 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -87,7 +87,7 @@ %define __brp_mangle_shebangs_exclude_from .stp$ Name: systemtap -Version: 4.1 +Version: 4.2 Release: 2%{?release_override}%{?dist} # for version, see also configure.ac From b841d816d95fa6e4482db168d6d963cec6f8182d Mon Sep 17 00:00:00 2001 From: Aaron Merey Date: Tue, 27 Aug 2019 17:30:53 -0400 Subject: [PATCH 3/9] Update release number --- systemtap.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/systemtap.spec b/systemtap.spec index eec916d..8c6f6ae 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -88,7 +88,7 @@ Name: systemtap Version: 4.2 -Release: 2%{?release_override}%{?dist} +Release: 0%{?release_override}%{?dist} # for version, see also configure.ac From b1c053243f57a1f2c1dbf6374d0a5210ecb32789 Mon Sep 17 00:00:00 2001 From: Aaron Merey Date: Tue, 27 Aug 2019 17:44:01 -0400 Subject: [PATCH 4/9] update sources to 4.2.0 --- sources | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources b/sources index 5c7361c..ce404e1 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (systemtap-4.1.tar.gz) = bc31a817fb58dcb95ad0715a9b3524c2206f08c36f6590c112088e813ef12b4398cbba9abc81dde1709e11a6b18556768d4317a52a4fc75c6812397450ea77b7 +SHA512 (systemtap-4.2-0.20190827git8ffab23ff.tar.gz) = 4b7a25c12ab5ef40718678639c104a6ace208ab3bd4572bfdaffcd65c6a36ee1b543e06a9720914dcb53b876f62f73f65d264b43f03b63dad03a535b8295e301 From 9efb826d9f50e7a855ce068aa603cba64512ec78 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Tue, 27 Aug 2019 20:21:17 -0400 Subject: [PATCH 5/9] f31 Release/sources agreement --- systemtap.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/systemtap.spec b/systemtap.spec index 8c6f6ae..a2c4b35 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -88,7 +88,7 @@ Name: systemtap Version: 4.2 -Release: 0%{?release_override}%{?dist} +Release: 0.20190827git8ffab23ff.1%{?release_override}%{?dist} # for version, see also configure.ac @@ -122,7 +122,7 @@ Release: 0%{?release_override}%{?dist} Summary: Programmable system-wide instrumentation system License: GPLv2+ URL: http://sourceware.org/systemtap/ -Source: %{name}-%{version}-0.20190807gitebfc300ec2ad.tar.gz +Source: %{name}-%{version}-0.20190827git8ffab23ff.tar.gz Patch10: PR23879-PR24875.patch Patch11: PR24904-changes.patch From 8ea7d6908a3b4b1c909238c0c450c016e0a7ee48 Mon Sep 17 00:00:00 2001 From: Aaron Merey Date: Thu, 29 Aug 2019 11:48:17 -0400 Subject: [PATCH 6/9] Drop patches, copy use rawhide's systemtap.spec --- PR23879-PR24875.patch | 42 ------------ PR24904-changes-wit.patch | 26 -------- PR24904-changes.patch | 130 -------------------------------------- systemtap.spec | 25 ++++---- 4 files changed, 11 insertions(+), 212 deletions(-) delete mode 100644 PR23879-PR24875.patch delete mode 100644 PR24904-changes-wit.patch delete mode 100644 PR24904-changes.patch diff --git a/PR23879-PR24875.patch b/PR23879-PR24875.patch deleted file mode 100644 index c024b7c..0000000 --- a/PR23879-PR24875.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 4ae4592f1106e941023a5768d34c2381cc869631 Mon Sep 17 00:00:00 2001 -From: "Frank Ch. Eigler" -Date: Wed, 21 Aug 2019 19:29:45 -0400 -Subject: [PATCH] PR23879, PR24875: fix task-finder-vma on f29+ - -It was reported & rediscovered that some vma-dependent runtime -facilities have been broken: @vma() and *ubacktrace(). It turns out -that modern gcc/ld.so links/loads binaries in slightly different ways -than older toolchains. Specifically, the first page of ELF files is -now loaded only r--p instead of r-xp protection flags. The -_stp_vma_mmap_cb() routine now accepts the r--p case too. It now -ignores the flags entirely. ---- - runtime/vma.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/runtime/vma.c b/runtime/vma.c -index 7021725d6..02f9bf849 100644 ---- a/runtime/vma.c -+++ b/runtime/vma.c -@@ -157,10 +157,15 @@ static int _stp_vma_mmap_cb(struct stap_task_finder_target *tgt, - dbug_task_vma(1, - "mmap_cb: tsk %d:%d path %s, addr 0x%08lx, length 0x%08lx, offset 0x%lx, flags 0x%lx\n", - tsk->pid, tsk->tgid, path, addr, length, offset, vm_flags); -- // We are only interested in the first load of the whole module that -- // is executable. We register whether or not we know the module, -+ -+ // We used to be only interested in the first load of the whole module that -+ // is executable. But with modern enough gcc/ld.so, executables are mapped -+ // in more small pieces (r--p,r-xp,rw-p, instead of r-xp, rw-p). To establish -+ // the virtual base address, we initially look for an offset=0 mapping. -+ // -+ // We register whether or not we know the module, - // so we can later lookup the name given an address for this task. -- if (path != NULL && offset == 0 && (vm_flags & VM_EXEC) -+ if (path != NULL && offset == 0 - && stap_find_vma_map_info(tsk, addr, NULL, NULL, NULL, NULL) != 0) { - for (i = 0; i < _stp_num_modules; i++) { - // PR20433: papering over possibility of NULL pointers --- -2.21.0 - diff --git a/PR24904-changes-wit.patch b/PR24904-changes-wit.patch deleted file mode 100644 index e2dc237..0000000 --- a/PR24904-changes-wit.patch +++ /dev/null @@ -1,26 +0,0 @@ -From f4f0da6db3be523472b25a219ea245fd6d3489f9 Mon Sep 17 00:00:00 2001 -From: "Frank Ch. Eigler" -Date: Tue, 20 Aug 2019 22:17:02 -0400 -Subject: [PATCH] PR24904: support linux 5.2's stacktrace.c changes with - -DDEBUG_UNWIND too - ---- - runtime/stack.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/runtime/stack.c b/runtime/stack.c -index bf59b2909..6ec7b2602 100644 ---- a/runtime/stack.c -+++ b/runtime/stack.c -@@ -56,7 +56,7 @@ _stp_init_stack(void) - { - stack_trace_save_regs_fn = (void*) kallsyms_lookup_name("stack_trace_save_regs"); - dbug_unwind(1, "stack_trace_saves_regs_fn=%lx for _stp_stack_print_fallback().\n", -- (unsigned long) save_trace_save_regs_fn); -+ (unsigned long) stack_trace_save_regs_fn); - return 0; - } - --- -2.21.0 - diff --git a/PR24904-changes.patch b/PR24904-changes.patch deleted file mode 100644 index 5a9005d..0000000 --- a/PR24904-changes.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 49fa913a61e7f2941bb59c11d72a1aafa6930162 Mon Sep 17 00:00:00 2001 -From: "Frank Ch. Eigler" -Date: Tue, 20 Aug 2019 21:20:40 -0400 -Subject: [PATCH] PR24904: support linux 5.2's stacktrace.c changes - -The following kernel commit disabled the older struct stack_trace APIs -on architectures that support the newer stackwalk APIs. Provide an -adaptation layer to stack_trace_save_regs(). - -commit 214d8ca6ee854f696f75e75511fe66b409e656db -Author: Thomas Gleixner -Date: Thu Apr 25 11:45:21 2019 +0200 - - stacktrace: Provide common infrastructure ---- - buildrun.cxx | 2 + - .../linux/autoconf-stack-trace-save-regs.c | 8 ++++ - runtime/stack.c | 37 +++++++++++++++++-- - 3 files changed, 44 insertions(+), 3 deletions(-) - create mode 100644 runtime/linux/autoconf-stack-trace-save-regs.c - -diff --git a/buildrun.cxx b/buildrun.cxx -index 5e8d3b961..6ed744707 100644 ---- a/buildrun.cxx -+++ b/buildrun.cxx -@@ -485,6 +485,8 @@ compile_pass (systemtap_session& s) - output_autoconf(s, o, "autoconf-bio-bi_opf.c", "STAPCONF_BIO_BI_OPF", NULL); - output_autoconf(s, o, "autoconf-linux-sched_headers.c", - "STAPCONF_LINUX_SCHED_HEADERS", NULL); -+ output_autoconf(s, o, "autoconf-stack-trace-save-regs.c", -+ "STAPCONF_STACK_TRACE_SAVE_REGS", NULL); - - // used by runtime/linux/netfilter.c - output_exportconf(s, o, "nf_register_hook", "STAPCONF_NF_REGISTER_HOOK"); -diff --git a/runtime/linux/autoconf-stack-trace-save-regs.c b/runtime/linux/autoconf-stack-trace-save-regs.c -new file mode 100644 -index 000000000..8bf33391f ---- /dev/null -+++ b/runtime/linux/autoconf-stack-trace-save-regs.c -@@ -0,0 +1,8 @@ -+#include -+ -+unsigned int foo () -+{ -+ unsigned long e[10]; -+ struct pt_regs* r = 0; -+ return stack_trace_save_regs (r, & e[0], 10, 0); -+} -diff --git a/runtime/stack.c b/runtime/stack.c -index 0f649e8da..bf59b2909 100644 ---- a/runtime/stack.c -+++ b/runtime/stack.c -@@ -39,6 +39,7 @@ - #include "linux/uprobes-inc.h" - - #include -+ - #if defined(STAPCONF_KERNEL_STACKTRACE) || defined(STAPCONF_KERNEL_STACKTRACE_NO_BP) - #include - #endif -@@ -47,6 +48,20 @@ - #include - #endif - -+#if defined(STAPCONF_STACK_TRACE_SAVE_REGS) /* linux 5.2+ apprx. */ -+static __typeof__(stack_trace_save_regs) (*stack_trace_save_regs_fn); /* not exported */ -+ -+static int -+_stp_init_stack(void) -+{ -+ stack_trace_save_regs_fn = (void*) kallsyms_lookup_name("stack_trace_save_regs"); -+ dbug_unwind(1, "stack_trace_saves_regs_fn=%lx for _stp_stack_print_fallback().\n", -+ (unsigned long) save_trace_save_regs_fn); -+ return 0; -+} -+ -+#else /* ! STAPCONF_STACK_TRACE_SAVE_REGS */ -+ - static void (*(save_stack_trace_regs_fn))(struct pt_regs *regs, - struct stack_trace *trace); - -@@ -60,6 +75,10 @@ _stp_init_stack(void) - return 0; - } - -+#endif /* STAPCONF_STACK_TRACE_SAVE_REGS */ -+ -+ -+ - static void _stp_stack_print_fallback(unsigned long, struct pt_regs*, int, int, int); - - #ifdef STP_USE_DWARF_UNWINDER -@@ -168,9 +187,19 @@ static void _stp_stack_print_fallback(unsigned long sp, struct pt_regs *regs, - int sym_flags, - int levels, int skip) { - unsigned long entries[MAXBACKTRACE]; -- struct stack_trace trace; -- int i; -+ unsigned i; -+ unsigned num_entries; -+ -+#if defined(STAPCONF_STACK_TRACE_SAVE_REGS) /* linux 5.2+ apprx. */ -+ if (!stack_trace_save_regs_fn) { -+ dbug_unwind(1, "no fallback kernel stacktrace (giving up)\n"); -+ _stp_print_addr(0, sym_flags | _STP_SYM_INEXACT, NULL); -+ return; -+ } - -+ num_entries = (*stack_trace_save_regs_fn)(regs, &entries[0], MAXBACKTRACE, skip); -+#else -+ struct stack_trace trace; - /* If don't have save_stack_trace_regs unwinder, just give up. */ - if (!save_stack_trace_regs_fn) { - dbug_unwind(1, "no fallback kernel stacktrace (giving up)\n"); -@@ -189,9 +218,11 @@ static void _stp_stack_print_fallback(unsigned long sp, struct pt_regs *regs, - dbug_unwind(1, "trace.nr_entries: %d\n", trace.nr_entries); - dbug_unwind(1, "trace.max_entries: %d\n", trace.max_entries); - dbug_unwind(1, "trace.skip %d\n", trace.skip); -+ num_entries = trace.nr_entries; -+#endif - - /* save_stack_trace_reg() adds a ULONG_MAX after last valid entry. Ignore it. */ -- for (i=0; i - 4.2-0.20190827 -- PR23879,PR24875 - fix task finder vma on f29 -- PR24904 - support linux 5.2 stacktrace.c changes -- PR24904 - support linux 5.2 stacktrace.c changes with -DDEBUG_UNWIND too +* Tue Aug 27 2019 Aaron Merey - 4.2-0.20190827git8ffab23ff +- Automated weekly rawhide release +- Applied spec changes from upstream git + +* Tue Aug 27 2019 Aaron Merey - 4.2-0.20190827git8ffab23ff +- Automated weekly rawhide release +- Applied spec changes from upstream git + +* Mon Aug 19 2019 Miro Hrončok - 4.2-0.20190808gitebfc300ec2ad +- Rebuilt for Python 3.8 * Wed Aug 07 2019 Frank Ch. Eigler - 4.2-0.20190807gitebfc300ec2ad - Automated weekly rawhide release From cee6dbeed4b6607114bc6cf7db0e5ee14ff171a0 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Thu, 11 Jun 2020 12:30:34 -0400 Subject: [PATCH 7/9] release 4.3 --- sources | 2 +- systemtap.spec | 53 +++++--------------------------------------------- 2 files changed, 6 insertions(+), 49 deletions(-) diff --git a/sources b/sources index 5c23613..77da3ac 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (systemtap-4.2.tar.gz) = 50d426e8c3d976fd635f6867fb4202832be455d869178a384159d1b2cd2a25642b07da2bd3184081b13bfb95df48a97f430b06713eaee0d0f67e27f43266cfd6 +SHA512 (systemtap-4.3.tar.gz) = db992adaa827601d1e3f28f6d70611b515b5e2e934d4251a07d5798f1d42a59351beb1422ab8df6ee634476b51b2127462e7bf7bf26afaa83cce91ffe59f3696 diff --git a/systemtap.spec b/systemtap.spec index bc90c42..a9492f0 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -10,7 +10,6 @@ %{!?with_crash: %global with_crash 1} %endif %{!?with_rpm: %global with_rpm 1} -%{!?with_bundled_elfutils: %global with_bundled_elfutils 0} %{!?elfutils_version: %global elfutils_version 0.142} %{!?pie_supported: %global pie_supported 1} %{!?with_boost: %global with_boost 0} @@ -87,7 +86,7 @@ %define __brp_mangle_shebangs_exclude_from .stp$ Name: systemtap -Version: 4.2 +Version: 4.3 Release: 1%{?release_override}%{?dist} # for version, see also configure.ac @@ -154,14 +153,7 @@ BuildRequires: crash-devel zlib-devel %if %{with_rpm} BuildRequires: rpm-devel %endif -%if %{with_bundled_elfutils} -Source1: elfutils-%{elfutils_version}.tar.gz -Patch1: elfutils-portability.patch -BuildRequires: m4 -%global setup_elfutils -a1 -%else BuildRequires: elfutils-devel >= %{elfutils_version} -%endif %if %{with_docs} BuildRequires: /usr/bin/latex /usr/bin/dvips /usr/bin/ps2pdf %if 0%{?fedora} >= 18 || 0%{?rhel} >= 7 @@ -514,35 +506,10 @@ systemtap-runtime-virthost machine to execute systemtap scripts. # ------------------------------------------------------------------------ %prep -%setup -q %{?setup_elfutils} - -%if %{with_bundled_elfutils} -cd elfutils-%{elfutils_version} -%patch1 -p1 -sleep 1 -find . \( -name Makefile.in -o -name aclocal.m4 \) -print | xargs touch -sleep 1 -find . \( -name configure -o -name config.h.in \) -print | xargs touch -cd .. -%endif +%setup -q %build -%if %{with_bundled_elfutils} -# Build our own copy of elfutils. -%global elfutils_config --with-elfutils=elfutils-%{elfutils_version} - -# We have to prevent the standard dependency generation from identifying -# our private elfutils libraries in our provides and requires. -%global _use_internal_dependency_generator 0 -%global filter_eulibs() /bin/sh -c "%{1} | sed '/libelf/d;/libdw/d;/libebl/d'" -%global __find_provides %{filter_eulibs /usr/lib/rpm/find-provides} -%global __find_requires %{filter_eulibs /usr/lib/rpm/find-requires} - -# This will be needed for running stap when not installed, for the test suite. -%global elfutils_mflags LD_LIBRARY_PATH=`pwd`/lib-elfutils -%endif - # Enable/disable the dyninst pure-userspace backend %if %{with_dyninst} %global dyninst_config --with-dyninst @@ -638,12 +605,9 @@ cd .. # We don't ship compileworthy python code, just oddball samples %global py_auto_byte_compile 0 -%configure %{?elfutils_config} %{dyninst_config} %{sqlite_config} %{crash_config} %{docs_config} %{pie_config} %{rpm_config} %{java_config} %{virt_config} %{dracut_config} %{python3_config} %{python2_probes_config} %{python3_probes_config} %{httpd_config} %{bpf_config} --disable-silent-rules --with-extra-version="rpm %{version}-%{release}" +%configure %{dyninst_config} %{sqlite_config} %{crash_config} %{docs_config} %{pie_config} %{rpm_config} %{java_config} %{virt_config} %{dracut_config} %{python3_config} %{python2_probes_config} %{python3_probes_config} %{httpd_config} %{bpf_config} --disable-silent-rules --with-extra-version="rpm %{version}-%{release}" make %{?_smp_mflags} -%if %{with_emacsvim} -%{_emacs_bytecompile} emacs/systemtap-mode.el -%endif %install make DESTDIR=$RPM_BUILD_ROOT install @@ -1090,10 +1054,6 @@ done %dir %{_libexecdir}/systemtap %{_libexecdir}/systemtap/libHelperSDT_*.so %endif -%if %{with_bundled_elfutils} -%dir %{_libdir}/systemtap -%{_libdir}/systemtap/lib*.so* -%endif %if %{with_emacsvim} %{_emacs_sitelispdir}/*.el* %{_emacs_sitestartdir}/systemtap-init.el @@ -1266,13 +1226,10 @@ done # PRERELEASE %changelog -* Mon Nov 18 2019 Sagar Patel - 4.2-1 +* Thu Jun 11 2020 Frank Ch. Eigler - 4.3-1 - Upstream release. -* Thu Oct 03 2019 Miro Hrončok - 4.2-0.20190828git8ffab23ff -- Rebuilt for Python 3.8.0rc1 (#1748018) - -* Tue May 07 2019 Serguei Makarov - 4.1-1 +* Mon Nov 18 2019 Sagar Patel - 4.2-1 - Upstream release. * Tue May 07 2019 Serguei Makarov - 4.1-1 From 6c61a4d01f1100065b4d71bf90df332cf8dd6dbf Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Tue, 28 Jul 2020 13:41:00 -0400 Subject: [PATCH 8/9] Backport two important fixes from upstream. --- rhbz1847676,1857749.patch | 1403 +++++++++++++++++++++++++++++++++++++ rhbz1855264.patch | 874 +++++++++++++++++++++++ systemtap.spec | 11 +- 3 files changed, 2287 insertions(+), 1 deletion(-) create mode 100644 rhbz1847676,1857749.patch create mode 100644 rhbz1855264.patch diff --git a/rhbz1847676,1857749.patch b/rhbz1847676,1857749.patch new file mode 100644 index 0000000..80721d9 --- /dev/null +++ b/rhbz1847676,1857749.patch @@ -0,0 +1,1403 @@ +commit 11c39a7375bd2759b53b89236e755c91a4f5aad8 +Author: Frank Ch. Eigler +Date: Tue Jun 16 20:35:53 2020 -0400 + + RHBZ1847676: uprobes-inode tweaks redux + + Added (back) a spinlock to manage the stapiu_consumer -> process_list + structure, since it is occasionally travered from uprobe pre-handlers, + which are sometimes entered in atomic context (e.g. on rhel7). There, + the normal mutex_t is unsafe. So restoring a spinlock_t just for + those shortlived traversals, rhel7 and rawhide are both happy. + +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index 156360e..922c9f1 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -143,7 +143,8 @@ struct stapiu_consumer { + struct list_head instance_list_head; // the resulting uprobe instances for this consumer + + struct list_head process_list_head; // the processes for this consumer +- ++ spinlock_t process_list_lock; // protect list; used briefly from even atomic contexts ++ + // List of perf counters used by each probe + // This list is an index into struct stap_perf_probe, + long perf_counters_dim; +@@ -174,16 +175,19 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) + + // First find the related process, set by stapiu_change_plus. + // NB: This is a linear search performed for every probe hit! +- // This could be an algorithmic problem if the list gets large, but +- // we'll wait until this is demonstratedly a hotspot before optimizing. +- mutex_lock(&c->consumer_lock); ++ // This could be an algorithmic problem if the list gets large, ++ // but we'll wait until this is demonstratedly a hotspot before ++ // optimizing. NB: on rhel7 sometimes we're invoked from atomic ++ // context, so must be careful to use the spinlock, not the ++ // mutex. ++ spin_lock(&c->process_list_lock); + list_for_each_entry(p, &c->process_list_head, process_list) { + if (p->tgid == current->tgid) { + process = p; + break; + } + } +- mutex_unlock(&c->consumer_lock); ++ spin_unlock(&c->process_list_lock); + if (!process) { + #ifdef UPROBE_HANDLER_REMOVE + /* Once we're past the starting phase, we can be sure that any +@@ -344,7 +348,7 @@ static void + stapiu_decrement_semaphores(struct stapiu_consumer *consumers, size_t nconsumers) + { + size_t i; +- /* NB: no stapiu_process_slots_lock needed, as the task_finder engine is ++ /* NB: no process_list_lock use needed as the task_finder engine is + * already stopped by now, so no one else will mess with us. We need + * to be sleepable for access_process_vm. */ + for (i = 0; i < nconsumers; ++i) { +@@ -433,7 +437,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) + INIT_LIST_HEAD(&c->instance_list_head); + INIT_LIST_HEAD(&c->process_list_head); + mutex_init(&c->consumer_lock); +- ++ spin_lock_init(&c->process_list_lock); ++ + dbug_uprobes("registering task-finder for procname:%s buildid:%s\n", + ((char*)c->finder.procname ?: (char*)""), + ((char*)c->finder.build_id ?: (char*)"")); +@@ -560,7 +565,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + * calls us in this case with relocation=offset=0, so + * we don't have to worry about it. */ + p->base = relocation - offset; ++ spin_lock (&c->process_list_lock); + list_add(&p->process_list, &c->process_list_head); ++ spin_unlock (&c->process_list_lock); + + rc = 0; + mutex_unlock(&c->consumer_lock); +@@ -587,28 +594,40 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + { + int rc = 0; + struct stapiu_process *p; ++ int any_found; + + if (! c->sdt_sem_offset) // nothing to do + return 0; + +- /* NB: no lock after this point, as we need to be sleepable for +- * get/put_user semaphore action. The given process should be frozen +- * while we're busy, so it's not an issue. +- */ +- +- mutex_lock(&c->consumer_lock); +- ++ // NB: we mustn't hold a lock while changing the task memory, ++ // but we need a lock to protect the process_list from concurrent ++ // add/delete. So hold a spinlock during iteration until the first ++ // hit, then unlock & process. NB: We could in principle have multiple ++ // instances of the same process in the list (e.g., if the process ++ // somehow maps in the same solib multiple times). We can't easily ++ // both iterate this list (in a spinlock-protected safe way), and ++ // relax the spinlock enough to do a safe stapiu_write_task_semaphore() ++ // call within the loop. So we will hit only the copy in our list. ++ any_found = 0; ++ spin_lock(&c->process_list_lock); + /* Look through all the consumer's processes and increment semaphores. */ + list_for_each_entry(p, &c->process_list_head, process_list) { + unsigned long addr = p->base + c->sdt_sem_offset; + if (addr >= relocation && addr < relocation + length) { +- int rc2 = stapiu_write_task_semaphore(task, addr, +1); ++ int rc2; ++ // unlock list and process write for this entry ++ spin_unlock(&c->process_list_lock); ++ any_found=1; ++ rc2 = stapiu_write_task_semaphore(task, addr, +1); + if (!rc) +- rc = rc2; ++ rc = rc2; ++ break; // exit list_for_each loop + } + } +- +- mutex_unlock(&c->consumer_lock); ++ if (! any_found) ++ spin_unlock(&c->process_list_lock); ++ else ++ ; // already unlocked + + return rc; + } +@@ -635,8 +654,9 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, + // process is dying anyway + // - the stapiu_consumer's process_list linked list will have a record + // of the dead process: well, not great, it'll be cleaned up eventually, +- // and cleaning it up NOW is tricky - need some spin lock to protect the list, +- // but not out sleepy mutex: ++ // and cleaning it up NOW is tricky - we could use the process_list_lock ++ // to protect the list (as done in stapiu_change_semaphore_plus), ++ // but not our sleepy mutex: + // + // [ 1955.410237] ? stapiu_change_minus+0x38/0xf0 [stap_54a723c01c50d972590a5c901516849_15522] + // [ 1955.411583] __mutex_lock+0x35/0x820 + +commit 4ccdfe4536d702612912e96d7b6278b169917eaa +Author: Frank Ch. Eigler +Date: Mon Jul 6 13:27:46 2020 -0400 + + RHBZ1847676 cont'd: more uprobes-inode/onthefly concurrency controls + + The systemtap.onthefly/*.exp tests had recently become hang-prone on + some kernels, for reasons still not completely understood. This set + of patches adds: + + - irq*-block spinlocks into uprobes-invoked paths, in case there is + peculiar reentrancy (from irq-related tracepoints) + + - a mutex lock/unlock into the stapiu_exit() path, in case there is + a concurrent stapiu_refresh() invoked by onthefly machinery around + exit time + + - restrictions into the onthefly module_refresh() translator code to + preclude STAP_SESSION_STOPPING as a time to do any sort of refresh + operation. Now probes that were disarmed will stay disarmed during + probe-end/error/etc. processing, which is always valid with the + spec, and avoids a class of late module-refresh ops + + Testing on rhel7 and rawhide indicates the reproducible hang is gone. + Our testsuite already tortures this code; invoke by hand via: + + % sudo make installcheck RUNTESTFLAGS="-v affection.exp hrtimer_onthefly.exp kprobes_onthefly.exp tracepoint_onthefly.exp uprobes_onthefly.exp" + +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index 922c9f1..3de7281 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -172,6 +172,7 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) + if (_stp_target) // need we filter by pid at all? + { + struct stapiu_process *p, *process = NULL; ++ unsigned long flags; + + // First find the related process, set by stapiu_change_plus. + // NB: This is a linear search performed for every probe hit! +@@ -180,14 +181,14 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) + // optimizing. NB: on rhel7 sometimes we're invoked from atomic + // context, so must be careful to use the spinlock, not the + // mutex. +- spin_lock(&c->process_list_lock); ++ spin_lock_irqsave(&c->process_list_lock, flags); + list_for_each_entry(p, &c->process_list_head, process_list) { + if (p->tgid == current->tgid) { + process = p; + break; + } + } +- spin_unlock(&c->process_list_lock); ++ spin_unlock_irqrestore(&c->process_list_lock, flags); + if (!process) { + #ifdef UPROBE_HANDLER_REMOVE + /* Once we're past the starting phase, we can be sure that any +@@ -398,7 +399,7 @@ static void + stapiu_consumer_refresh(struct stapiu_consumer *c) + { + struct stapiu_instance *inst; +- ++ + mutex_lock(& c->consumer_lock); + + list_for_each_entry(inst, &c->instance_list_head, instance_list) { +@@ -420,7 +421,10 @@ stapiu_exit(struct stapiu_consumer *consumers, size_t nconsumers) + stapiu_decrement_semaphores(consumers, nconsumers); + for (i = 0; i < nconsumers; ++i) { + struct stapiu_consumer *c = &consumers[i]; ++ // protect against conceivable stapiu_refresh() at same time ++ mutex_lock(& c->consumer_lock); + stapiu_consumer_unreg(c); ++ mutex_unlock(& c->consumer_lock); + /* NB: task_finder needs no unregister. */ + } + } +@@ -480,6 +484,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + struct stapiu_instance *inst = NULL; + struct stapiu_process *p; + int j; ++ unsigned long flags; + + if (! inode) { + rc = -EINVAL; +@@ -565,9 +570,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + * calls us in this case with relocation=offset=0, so + * we don't have to worry about it. */ + p->base = relocation - offset; +- spin_lock (&c->process_list_lock); ++ spin_lock_irqsave (&c->process_list_lock, flags); + list_add(&p->process_list, &c->process_list_head); +- spin_unlock (&c->process_list_lock); ++ spin_unlock_irqrestore (&c->process_list_lock, flags); + + rc = 0; + mutex_unlock(&c->consumer_lock); +@@ -595,6 +600,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + int rc = 0; + struct stapiu_process *p; + int any_found; ++ unsigned long flags; + + if (! c->sdt_sem_offset) // nothing to do + return 0; +@@ -609,14 +615,14 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + // relax the spinlock enough to do a safe stapiu_write_task_semaphore() + // call within the loop. So we will hit only the copy in our list. + any_found = 0; +- spin_lock(&c->process_list_lock); ++ spin_lock_irqsave(&c->process_list_lock, flags); + /* Look through all the consumer's processes and increment semaphores. */ + list_for_each_entry(p, &c->process_list_head, process_list) { + unsigned long addr = p->base + c->sdt_sem_offset; + if (addr >= relocation && addr < relocation + length) { + int rc2; + // unlock list and process write for this entry +- spin_unlock(&c->process_list_lock); ++ spin_unlock_irqrestore(&c->process_list_lock, flags); + any_found=1; + rc2 = stapiu_write_task_semaphore(task, addr, +1); + if (!rc) +@@ -625,7 +631,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + } + } + if (! any_found) +- spin_unlock(&c->process_list_lock); ++ spin_unlock_irqrestore(&c->process_list_lock, flags); + else + ; // already unlocked + +diff --git a/translate.cxx b/translate.cxx +index 10b3d32..b10af5a 100644 +--- a/translate.cxx ++++ b/translate.cxx +@@ -2144,19 +2144,13 @@ c_unparser::emit_module_refresh () + o->newline() << "mutex_lock(&module_refresh_mutex);"; + + /* If we're not in STARTING/RUNNING state, don't try doing any work. +- PR16766 */ ++ PR16766. We don't want to run refresh ops during e.g. STOPPING, ++ so as to possibly activate uprobes near shutdown. */ + o->newline() << "state = atomic_read (session_state());"; +- o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING && state != STAP_SESSION_ERROR) {"; +- // cannot _stp_warn etc. since we're not in probe context +- o->newline(1) << "#if defined(__KERNEL__)"; +- o->newline() << "if (state != STAP_SESSION_STOPPING)"; +- o->newline(1) << "printk (KERN_ERR \"stap module notifier triggered in unexpected state %d\\n\", state);"; +- o->indent(-1); +- o->newline() << "#endif"; +- ++ o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING) {"; ++ o->newline(1); + if (!session->runtime_usermode_p()) + o->newline() << "mutex_unlock(&module_refresh_mutex);"; +- + o->newline() << "return;"; + o->newline(-1) << "}"; + + +commit 046fa017d2ab7fea1a4ba2295c31f768c072855e +Author: Frank Ch. Eigler +Date: Sun Jul 12 09:57:15 2020 -0400 + + RHBZ1847676 cont'd: one more uprobes-inode/onthefly concurrency control + + In uprobes-inode.c (stapiu_change_plus), the runtime can react to + arrivals of new mappings of a solib or executable by registering new + uprobes. Due to an assumption that this could not happen at + inconvenient times (such as a stapiu_refresh or near shutdown times), + the actual uprobes registration operation was done outside the + consumer_lock mutex being held. But it appears this can happen at bad + times, so the mutex needs to be held, just like within + stapiu_consumer_refresh(). + + The onthefly tests now survive iterating testing on rawhide+lockdep + and rhel7+lockdep. + +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index 3de7281..01c8a07 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -575,12 +575,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + spin_unlock_irqrestore (&c->process_list_lock, flags); + + rc = 0; +- mutex_unlock(&c->consumer_lock); +- + // Register actual uprobe if cond_enabled right now + if (c->probe->cond_enabled) + (void) stapiu_register(inst, c); +- goto out; ++ goto out1; + + out2: + _stp_kfree(inst); + +commit a9a0131eb59e8abc197d3d2a553a86bcdec3dd70 +Author: Frank Ch. Eigler +Date: Fri Jul 17 22:33:04 2020 -0400 + + rhbz1857749: uprobes-inode regression in sdt semaphore setting + + Previous code neglected to set sdt.h semaphores for more than the + first process systemtap happened to encounter. This was from a + mistaken understanding of what it meant for stapiu_change_plus() to be + called with the same inode/consumer combination. Even though uprobes + are automatically shared, each new process still needs its perfctr and + sdt-semaphores individually set, so we do that now (as before the + rework of this code). Mechanized testing incoming shortly. + +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index 01c8a07..de81839 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -190,6 +190,10 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) + } + spin_unlock_irqrestore(&c->process_list_lock, flags); + if (!process) { ++ /* We know that we're in -c/-x mode, but this process is not ++ in the process hierarchy, so the uprobe should be ignored ++ and future hits prevented. PR15278 ++ */ + #ifdef UPROBE_HANDLER_REMOVE + /* Once we're past the starting phase, we can be sure that any + * processes which are executing code in a mapping have already +@@ -242,8 +246,8 @@ stapiu_register (struct stapiu_instance* inst, struct stapiu_consumer* c) + (unsigned long) inst->inode->i_ino, + (void*) (uintptr_t) c->offset, + c->probe->index, +- ((char*)c->finder.procname ?: (char*)""), +- ((char*)c->finder.build_id ?: (char*)"")); ++ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), ++ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); + + if (!c->return_p) { + inst->kconsumer.handler = stapiu_probe_prehandler; +@@ -444,8 +448,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) + spin_lock_init(&c->process_list_lock); + + dbug_uprobes("registering task-finder for procname:%s buildid:%s\n", +- ((char*)c->finder.procname ?: (char*)""), +- ((char*)c->finder.build_id ?: (char*)"")); ++ ((char*)c->finder.procname ?: ""), ++ ((char*)c->finder.build_id ?: "")); + + ret = stap_register_task_finder_target(&c->finder); + if (ret != 0) { +@@ -499,22 +503,22 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + if (rc) + goto out; + +- dbug_uprobes("notified for inode-offset u%sprobe " ++ dbug_uprobes("notified for inode-offset arrival u%sprobe " + "%lu:%p pidx %zu target procname:%s buildid:%s\n", + c->return_p ? "ret" : "", + (unsigned long) inode->i_ino, + (void*) (uintptr_t) c->offset, + c->probe->index, +- ((char*)c->finder.procname ?: (char*)""), +- ((char*)c->finder.build_id ?: (char*)"")); ++ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), ++ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); + + /* Check the buildid of the target (if we haven't already). We + * lock the target so we don't have concurrency issues. */ + mutex_lock(&c->consumer_lock); + +- // Check if we already have an instance for this inode, as though we +- // were called twice by task-finder mishap, or (hypothetically) the +- // shlib was mmapped twice. ++ // Check if we already have an instance for this inode. This is normal: ++ // if a different process maps the same solib, or forks into the same ++ // executable. In this case, we must not re-register the same uprobe. + list_for_each_entry(i, &c->instance_list_head, instance_list) { + if (i->inode == inode) { + inst = i; +@@ -522,28 +526,33 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + } + } + +- if (inst) { // wouldn't expect a re-notification +- if (inst->registered_p != c->probe->cond_enabled) +- // ... this should not happen +- ; +- goto out1; +- } +- +- // Normal case: need a new one. +- inst = _stp_kzalloc(sizeof(struct stapiu_instance)); +- if (! inst) { +- rc = -ENOMEM; +- goto out1; +- } ++ if (!inst) { // new instance; need new uprobe etc. ++ // Normal case: need a new one. ++ inst = _stp_kzalloc(sizeof(struct stapiu_instance)); ++ if (! inst) { ++ rc = -ENOMEM; ++ goto out1; ++ } + +- inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback ++ inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback ++ ++ /* Grab the inode first (to prevent TOCTTOU problems). */ ++ inst->inode = igrab(inode); ++ if (!inst->inode) { ++ rc = -EINVAL; ++ goto out2; ++ } ++ ++ // Add the inode/instance to the list ++ list_add(&inst->instance_list, &c->instance_list_head); + +- /* Grab the inode first (to prevent TOCTTOU problems). */ +- inst->inode = igrab(inode); +- if (!inst->inode) { +- rc = -EINVAL; +- goto out2; ++ // Register the actual uprobe if cond_enabled already ++ if (c->probe->cond_enabled) ++ (void) stapiu_register(inst, c); + } ++ ++ // ... but we may have to do per-process work anyway: perfctr ++ // initialization and sdt.h semaphore manipulation! + + // Perform perfctr registration if required + for (j=0; j < c->perf_counters_dim; j++) { +@@ -551,12 +560,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + (void) _stp_perf_read_init ((c->perf_counters)[j], task); + } + +- // Add the inode/instance to the list +- list_add(&inst->instance_list, &c->instance_list_head); +- + // Associate this consumer with this process. If we encounter + // resource problems here, we don't really have to undo the uprobe +- // registrations etc. already in effect. ++ // registrations etc. already in effect. It may break correct ++ // tracking of process hierarchy in -c/-x operation, but too bad. + p = _stp_kzalloc(sizeof(struct stapiu_process)); + if (! p) { + rc = -ENOMEM; +@@ -573,11 +580,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + spin_lock_irqsave (&c->process_list_lock, flags); + list_add(&p->process_list, &c->process_list_head); + spin_unlock_irqrestore (&c->process_list_lock, flags); +- ++ // NB: actual semaphore value bumping is done later ++ + rc = 0; + // Register actual uprobe if cond_enabled right now +- if (c->probe->cond_enabled) +- (void) stapiu_register(inst, c); + goto out1; + + out2: +@@ -617,11 +623,21 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + /* Look through all the consumer's processes and increment semaphores. */ + list_for_each_entry(p, &c->process_list_head, process_list) { + unsigned long addr = p->base + c->sdt_sem_offset; ++ if (p->tgid != task->tgid) // skip other processes in the list ++ continue; + if (addr >= relocation && addr < relocation + length) { + int rc2; + // unlock list and process write for this entry + spin_unlock_irqrestore(&c->process_list_lock, flags); + any_found=1; ++ ++ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " ++ "pidx %zu address %lx\n", ++ c->return_p ? "ret" : "", ++ (long) task->tgid, ++ c->probe->index, ++ (unsigned long) addr); ++ + rc2 = stapiu_write_task_semaphore(task, addr, +1); + if (!rc) + rc = rc2; +@@ -641,15 +657,8 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + * about the semaphores, so we can just release the process slot. */ + static int + stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, +- unsigned long relocation, unsigned long length) ++ unsigned long addr, unsigned long length) + { +- dbug_uprobes("notified for inode-offset departure u%sprobe " +- "pidx %zu target procname:%s buildid:%s\n", +- c->return_p ? "ret" : "", +- c->probe->index, +- ((char*)c->finder.procname ?: (char*)""), +- ((char*)c->finder.build_id ?: (char*)"")); +- + // We don't need do anything really. + // A process going away means: + // - its uprobes will no longer fire: no problem, the uprobe inode +@@ -674,6 +683,36 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, + // [ 1955.436334] ? __x64_sys_execve+0x27/0x30 + // [ 1955.437700] ? do_syscall_64+0x5c/0xa0 + ++ // But as an optimization - to avoid having them build up indefinitely, ++ // and make semaphore operations go slowly, we will nuke matching entries anyway. ++ unsigned long flags; ++ struct stapiu_process *p, *tmp; ++ unsigned nmatch=0; ++ ++ spin_lock_irqsave(&c->process_list_lock, flags); ++ list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) { ++ // we nuke by matching semaphore address (where ..._semaphore_plus wrote) ++ // against the address range being unmapped ++ unsigned long semaddr = p->base + c->sdt_sem_offset; ++ if (p->tgid != task->tgid) // skip other processes in the list ++ continue; ++ if (semaddr >= addr && semaddr < addr + length) { ++ list_del(&p->process_list); ++ _stp_kfree (p); ++ nmatch ++; ++ } ++ } ++ spin_unlock_irqrestore(&c->process_list_lock, flags); ++ ++ if (nmatch > 0) ++ dbug_uprobes("notified for inode-offset departure u%sprobe " ++ "pidx %zu matches:%u procname:%s buildid:%s\n", ++ c->return_p ? "ret" : "", ++ c->probe->index, ++ nmatch, ++ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), ++ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); ++ + return 0; + } + + +commit e90530877ee21cffa2a9d53567ba5b5de1dd9b32 +Author: Frank Ch. Eigler +Date: Mon Jul 27 07:58:30 2020 -0400 + + PR25568 / RHBZ1857749: buildid/uprobes/inode rework, task_finder etc. side + + During work on a new stress tests for build-id based probes (coming in + next commit), it was found that the task_finder2 logic for buildid + verification didn't, well, work, because it was never run (due to an + erroneous pathlen conditional), and couldn't be safely run where it + was (because it was under spinlock but would have done + access_process_vm). Reworked the relevant bits of task_finder2 to + perform build-id verification for processes later - during the quiesce + callback periods. (Buildid verification for solibs is already done + in the task_finder2 consumer uprobes-inode.c.) + + Testing with sdt_misc indicated a case where a preexisting process + (with solib sdt.h semaphores) was being attached to by a new stap + binary. task_finder2's enumeration of the preexising processes' + memory map segments violated assumptions by recent code related to + tracking in stapiu_process[] lists. (It did not mirror the temporal + ld.so mmap sequence.) Changed this tracking to use the inode* as the + key, and stop trying to track mapping lengths, to make positive + matches and eliminate duplicate stapiu_process[] entries for the same + (process,solib) permutation. Reworked stapiu_process[] accumulation + generally to move to the two immediate task_finder callbacks, out of + stapiu_change_plus(). + + Added lots of commentary and diagnostics throughout. stap + -DDEBUG_UPROBES give meaningful info about uprobes & sdt semaphores; + with -DDEBUG_TASK_FINDER, more but not overwhelming relevant info + appears. + +diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c +index 9777efb..8b8057a 100644 +--- a/runtime/linux/task_finder2.c ++++ b/runtime/linux/task_finder2.c +@@ -652,8 +652,8 @@ __verify_build_id(struct task_struct *tsk, unsigned long addr, + tsk_build_id[build_id_len] = '\0'; + + if (strcmp(build_id, tsk_build_id)) { +- dbug_task(2, "target build-id not matched: [%s] != [%s]\n", +- build_id, tsk_build_id); ++ dbug_task(2, "target build-id not matched: [%s] @ 0x%lx != [%s]\n", ++ build_id, addr, tsk_build_id); + return false; + } + +@@ -884,16 +884,9 @@ __stp_utrace_attach_match_filename(struct task_struct *tsk, + // procname/build-id and match an "all thread" probe. + if (tgt == NULL) + continue; +- /* buildid-based target */ +- else if (tgt->build_id_len > 0 && tgt->procname > 0 +- && !__verify_build_id(tsk, +- tgt->build_id_vaddr, +- tgt->build_id, +- tgt->build_id_len)) +- { +- continue; +- } +- else if (tgt->build_id_len == 0 && tgt->pathlen > 0 ++ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */ ++ /* procname-based target */ ++ else if (tgt->pathlen > 0 + && (tgt->pathlen != filelen + || strcmp(tgt->procname, filename) != 0)) + { +@@ -1341,6 +1334,34 @@ __stp_tf_quiesce_worker(struct task_work *work) + return; + } + ++ /* If we had a build-id based executable probe (so we have a ++ * tgt->build_id) set, we could not check it back in ++ * __stp_utrace_attach_* because we can't do sleepy ++ * access_process_vm() calls from there. BUt now that we're ++ * in process context, quiesced, finally we can check. If we ++ * were build-id based, and the build-id does not match, then ++ * we UTRACE_DETACH from this process and skip the callbacks. ++ * ++ * XXX: For processes that do match, we redo this check every ++ * time this callbacks is encountered somehow. That's ++ * probably unnecessary. ++ */ ++ if (tgt->build_id_len > 0) { ++ int ok = __verify_build_id(current, ++ tgt->build_id_vaddr, ++ tgt->build_id, ++ tgt->build_id_len); ++ ++ dbug_task(2, "verified buildid-target process pid=%ld ok=%d\n", ++ (long) current->tgid, ok); ++ if (!ok) { ++ // stap_utrace_detach (current, & tgt->ops); ++ /* Remember that this task_work_func is finished. */ ++ stp_task_work_func_done(); ++ return; ++ } ++ } ++ + __stp_tf_handler_start(); + + /* NB make sure we run mmap callbacks before other callbacks +@@ -1434,6 +1455,21 @@ __stp_utrace_task_finder_target_quiesce(u32 action, + } + } + else { ++ /* Like in __stp_tf_quiesce_worker(), verify build-id now if belated. */ ++ if (tgt->build_id_len > 0) { ++ int ok = __verify_build_id(current, ++ tgt->build_id_vaddr, ++ tgt->build_id, ++ tgt->build_id_len); ++ ++ dbug_task(2, "verified2 buildid-target process pid=%ld ok=%d\n", ++ (long) current->tgid, ok); ++ if (!ok) { ++ __stp_tf_handler_end(); ++ return UTRACE_RESUME; // NB: not _DETACH; that interferes with other engines ++ } ++ } ++ + /* NB make sure we run mmap callbacks before other callbacks + * like 'probe process.begin' handlers so that the vma tracker + * is already initialized in the latter contexts */ +@@ -1797,15 +1833,7 @@ stap_start_task_finder(void) + struct stap_task_finder_target, list); + if (tgt == NULL) + continue; +- /* buildid-based target */ +- else if (tgt->build_id_len > 0 && tgt->procname > 0 +- && !__verify_build_id(tsk, +- tgt->build_id_vaddr, +- tgt->build_id, +- tgt->build_id_len)) +- { +- continue; +- } ++ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */ + /* procname-based target */ + else if (tgt->build_id == 0 && tgt->pathlen > 0 + && (tgt->pathlen != mmpathlen +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index de81839..757da30 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -76,7 +76,7 @@ struct stapiu_instance { + struct list_head instance_list; // to find other instances e.g. during shutdown + + struct uprobe_consumer kconsumer; // the kernel-side struct for uprobe callbacks etc. +- struct inode *inode; // XXX: refcount? ++ struct inode *inode; // refcounted + unsigned registered_p:1; // whether the this kconsumer is registered (= armed, live) + + struct stapiu_consumer *sconsumer; // whose instance are we +@@ -86,10 +86,14 @@ struct stapiu_instance { + /* A snippet to record the per-process vm where a particular + executable/solib was mapped. Used for sdt semaphore setting, and + for identifying processes of our interest (vs. disinterest) for +- uprobe hits. This object is owned by a stapiu_consumer. */ ++ uprobe hits. This object is owned by a stapiu_consumer. We use ++ the same inode* as the stapiu_instance, and have the same lifespan, ++ so don't bother separately refcount it. ++*/ + struct stapiu_process { + struct list_head process_list; // to find other processes + ++ struct inode *inode; // the inode* for solib or executable + unsigned long relocation; // the mmap'ed .text address + unsigned long base; // the address to apply sdt offsets against + pid_t tgid; // pid +@@ -392,6 +396,7 @@ stapiu_consumer_unreg(struct stapiu_consumer *c) + // multiple times in the list. Don't break after the first. + list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) { + list_del(&p->process_list); ++ // no refcount used for the inode field + _stp_kfree (p); + } + } +@@ -498,6 +503,8 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + /* Do the buildid check. NB: on F29+, offset may not equal + 0 for LOADable "R E" segments, because the read-only .note.* + stuff may have been loaded earlier, separately. PR23890. */ ++ // NB: this is not really necessary for buildid-based probes, ++ // which had this verified already. + rc = _stp_usermodule_check(task, c->module_name, + relocation - offset); + if (rc) +@@ -527,7 +534,6 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + } + + if (!inst) { // new instance; need new uprobe etc. +- // Normal case: need a new one. + inst = _stp_kzalloc(sizeof(struct stapiu_instance)); + if (! inst) { + rc = -ENOMEM; +@@ -560,30 +566,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + (void) _stp_perf_read_init ((c->perf_counters)[j], task); + } + +- // Associate this consumer with this process. If we encounter +- // resource problems here, we don't really have to undo the uprobe +- // registrations etc. already in effect. It may break correct +- // tracking of process hierarchy in -c/-x operation, but too bad. +- p = _stp_kzalloc(sizeof(struct stapiu_process)); +- if (! p) { +- rc = -ENOMEM; +- goto out1; +- } +- p->tgid = task->tgid; +- p->relocation = relocation; +- /* The base is used for relocating semaphores. If the +- * probe is in an ET_EXEC binary, then that offset +- * already is a real address. But stapiu_process_found +- * calls us in this case with relocation=offset=0, so +- * we don't have to worry about it. */ +- p->base = relocation - offset; +- spin_lock_irqsave (&c->process_list_lock, flags); +- list_add(&p->process_list, &c->process_list_head); +- spin_unlock_irqrestore (&c->process_list_lock, flags); +- // NB: actual semaphore value bumping is done later ++ // NB: process_list[] already extended up in stapiu_mmap_found(). + + rc = 0; +- // Register actual uprobe if cond_enabled right now + goto out1; + + out2: +@@ -599,7 +584,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, + * Increment the semaphore now. */ + static int + stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task, +- unsigned long relocation, unsigned long length) ++ unsigned long relocation, struct inode* inode) + { + int rc = 0; + struct stapiu_process *p; +@@ -609,6 +594,13 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + if (! c->sdt_sem_offset) // nothing to do + return 0; + ++ dbug_uprobes("considering semaphore (u%sprobe) pid %ld inode 0x%lx" ++ "pidx %zu\n", ++ c->return_p ? "ret" : "", ++ (long) task->tgid, ++ (unsigned long) inode, ++ c->probe->index); ++ + // NB: we mustn't hold a lock while changing the task memory, + // but we need a lock to protect the process_list from concurrent + // add/delete. So hold a spinlock during iteration until the first +@@ -617,32 +609,31 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task + // somehow maps in the same solib multiple times). We can't easily + // both iterate this list (in a spinlock-protected safe way), and + // relax the spinlock enough to do a safe stapiu_write_task_semaphore() +- // call within the loop. So we will hit only the copy in our list. ++ // call within the loop. So we will hit only the first copy in our list. + any_found = 0; + spin_lock_irqsave(&c->process_list_lock, flags); + /* Look through all the consumer's processes and increment semaphores. */ + list_for_each_entry(p, &c->process_list_head, process_list) { + unsigned long addr = p->base + c->sdt_sem_offset; +- if (p->tgid != task->tgid) // skip other processes in the list +- continue; +- if (addr >= relocation && addr < relocation + length) { +- int rc2; +- // unlock list and process write for this entry +- spin_unlock_irqrestore(&c->process_list_lock, flags); +- any_found=1; +- +- dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " +- "pidx %zu address %lx\n", +- c->return_p ? "ret" : "", +- (long) task->tgid, +- c->probe->index, +- (unsigned long) addr); +- +- rc2 = stapiu_write_task_semaphore(task, addr, +1); +- if (!rc) +- rc = rc2; +- break; // exit list_for_each loop +- } ++ int rc2; ++ if (p->tgid != task->tgid) continue; // skip other processes in the list ++ if (p->inode != inode) continue; // skip other inodes ++ ++ // unlock list and process write for this entry ++ spin_unlock_irqrestore(&c->process_list_lock, flags); ++ any_found=1; ++ ++ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " ++ "pidx %zu address 0x%lx\n", ++ c->return_p ? "ret" : "", ++ (long) task->tgid, ++ c->probe->index, ++ (unsigned long) addr); ++ ++ rc2 = stapiu_write_task_semaphore(task, addr, +1); ++ if (!rc) ++ rc = rc2; ++ break; // exit list_for_each loop + } + if (! any_found) + spin_unlock_irqrestore(&c->process_list_lock, flags); +@@ -755,17 +746,41 @@ stapiu_process_found(struct stap_task_finder_target *tf_target, + + if (!process_p) + return 0; /* ignore threads */ +- ++ ++ dbug_uprobes("process_found pid=%ld f.p=%s f.b=%s c.p=%s c.b=%s\n", ++ (long)task->tgid, ++ ((char*)c->finder.procname ?: ""), ++ ((char*)c->finder.build_id ?: ""), ++ ((char*)c->solib_pathname ?: ""), ++ ((char*)c->solib_build_id ?: "")); ++ + /* ET_EXEC events are like shlib events, but with 0 relocation bases */ + if (register_p) { + int rc = -EINVAL; + struct inode *inode = stapiu_get_task_inode(task); + + if (inode) { +- rc = stapiu_change_plus(c, task, 0, TASK_SIZE, +- 0, 0, inode); +- stapiu_change_semaphore_plus(c, task, 0, +- TASK_SIZE); ++ // Add a stapiu_process record to the consumer, so that ++ // the semaphore increment logic will accept this task. ++ struct stapiu_process* p; ++ unsigned long flags; ++ p = _stp_kzalloc(sizeof(struct stapiu_process)); ++ if (p) { ++ p->tgid = task->tgid; ++ p->relocation = 0; ++ p->inode = inode; ++ p->base = 0; ++ spin_lock_irqsave (&c->process_list_lock, flags); ++ list_add(&p->process_list, &c->process_list_head); ++ spin_unlock_irqrestore (&c->process_list_lock, flags); ++ } else { ++ _stp_warn("out of memory tracking executable in process %ld\n", ++ (long) task->tgid); ++ } ++ ++ rc = stapiu_change_plus(c, task, 0, TASK_SIZE, 0, 0, inode); ++ ++ stapiu_change_semaphore_plus(c, task, 0, inode); + } + return rc; + } else +@@ -776,6 +791,8 @@ stapiu_process_found(struct stap_task_finder_target *tf_target, + bool + __verify_build_id (struct task_struct *tsk, unsigned long addr, + unsigned const char *build_id, int build_id_len); ++// defined in task_finder2.c ++ + + + /* The task_finder_mmap_callback. These callbacks are NOT +@@ -791,28 +808,119 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target, + struct stapiu_consumer *c = + container_of(tf_target, struct stapiu_consumer, finder); + int rc = 0; ++ struct stapiu_process* p; ++ int known_mapping_p; ++ unsigned long flags; + +- /* The file path or build-id must match. The build-id address +- * is calculated using start address of this vma, the file +- * offset of the vma start address and the file offset of +- * the build-id. */ +- if (c->solib_pathname && path && strcmp (path, c->solib_pathname)) +- return 0; +- if (c->solib_build_id_len > 0 && !__verify_build_id(task, +- addr - offset + c->solib_build_id_vaddr, +- c->solib_build_id, +- c->solib_build_id_len)) +- return 0; ++ /* ++ We need to verify that this file/mmap corresponds to the given stapiu_consumer. ++ One could compare (inode) file name, but that won't work with buildid-based ++ uprobes. For those, one cannot just ++ ++ __verify_build_id(... addr - offset + c->solib_build_id_vaddr ...) ++ ++ because dlopen()ing a shared library involves multiple mmaps, including ++ some at repeating/offset addresses. See glibc _dl_map_segments() in various ++ versions. So by the fourth call (!) on modern glibc's, we get a VM_WRITE-able ++ data segment mapped, but that's at a load/mapping address that is offset by a ++ page from the base (file offset=0) mapping. ++ ++ e.g. on Fedora 32 / glibc 2.31, with testsuite/libsdt_buildid.so: ++ ++ Program Headers: ++ Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align ++ LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0004b8 0x0004b8 R 0x1000 ++ LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000161 0x000161 R E 0x1000 ++ LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x0000cc 0x0000cc R 0x1000 ++ LOAD 0x002df8 0x0000000000003df8 0x0000000000003df8 0x000232 0x000238 RW 0x1000 ++ DYNAMIC 0x002e10 0x0000000000003e10 0x0000000000003e10 0x0001d0 0x0001d0 RW 0x8 ++ ++ strace: ++ openat(AT_FDCWD, ".../libsdt_buildid.so", O_RDONLY|O_CLOEXEC) = 3 ++ mmap(NULL, 16432, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x148c764ac000 ++ mmap(0x148c764ad000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x148c764ad000 ++ mmap(0x148c764ae000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764ae000 ++ mmap(0x148c764af000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764af000 ++ ++ Note how the virtual mapping for the fourth mmap (also) maps file-offset 0x2000 at ++ vm offset 0x3000. ++ ++ So what we do is rely on the name/buildid validation tests being run ++ -earlier- in the dlopen/mmap sequence to validate near-future ++ mmap()s. We search the c->process_list[] for a mapping that already ++ overlaps the new range, and if so, consider it validated ... whether ++ for the solib_pathname or the solib_build_id case. ++ ++ This is complicated for startup-time traversal of processes/mmaps, ++ where it seems sometimes we get notifications out of temporal sequence. ++ */ + +- /* 1 - shared libraries' executable segments load from offset 0 +- * - ld.so convention offset != 0 is now allowed +- * so stap_uprobe_change_plus can set a semaphore, +- * i.e. a static extern, in a shared object +- * 2 - the shared library we're interested in +- * 3 - mapping should be executable or writeable (for +- * semaphore in .so) +- * NB: or both, on kernels that lack noexec mapping +- */ ++ known_mapping_p = 0; ++ spin_lock_irqsave(&c->process_list_lock, flags); ++ list_for_each_entry(p, &c->process_list_head, process_list) { ++ if (p->tgid != task->tgid) continue; ++ if (p->inode != dentry->d_inode) continue; ++ known_mapping_p = 1; ++ break; ++ } ++ spin_unlock_irqrestore(&c->process_list_lock, flags); ++ ++ ++ // Check if this mapping (solib) is of interest: whether we expect ++ // it by buildid or name. ++ ++ if (! known_mapping_p) { ++ /* The file path or build-id must match. The build-id address ++ * is calculated using start address of this vma, the file ++ * offset of the vma start address and the file offset of ++ * the build-id. */ ++ if (c->solib_pathname && path && strcmp (path, c->solib_pathname)) ++ return 0; ++ if (c->solib_build_id_len > 0 && !__verify_build_id(task, ++ addr - offset + c->solib_build_id_vaddr, ++ c->solib_build_id, ++ c->solib_build_id_len)) ++ return 0; ++ } ++ ++ // If we made it this far, we have an interesting solib. ++ ++ dbug_uprobes("mmap_found pid=%ld path=%s addr=0x%lx length=%lu offset=%lu flags=0x%lx known=%d\n", ++ (long) task->tgid, path, addr, length, offset, vm_flags, known_mapping_p); ++ ++ if (! known_mapping_p) { ++ // OK, let's add it. The first mapping should be a VM_READ mapping ++ // of the entire solib file, which will also serve as the apprx. ++ // outer bounds of the repeatedly-mapped segments. ++ ++#if 0 ++ // Consider an assumption about the dlopen/mmap sequence ++ // If it comes out of sequence, we could get length/base wrong in the stored ++ // stapiu_process, which could lead us to miscalculate semaphore addresses. ++ // ++ // However, this has been observed on task-finder initial-enumeration case, ++ // (sdt_misc.exp, where a solib test is already running when stap starts). ++ if (offset != 0) ++ return 0; ++#endif ++ ++ // Associate this consumer with this process. If we encounter ++ // resource problems here, we don't really have to undo the uprobe ++ // registrations etc. already in effect. It may break correct ++ // tracking of process hierarchy in -c/-x operation, but too bad. ++ p = _stp_kzalloc(sizeof(struct stapiu_process)); ++ if (p) { ++ p->tgid = task->tgid; ++ p->relocation = addr; ++ p->inode = dentry->d_inode; ++ p->base = addr-offset; // ... in case caught this during the second mmap ++ spin_lock_irqsave (&c->process_list_lock, flags); ++ list_add(&p->process_list, &c->process_list_head); ++ spin_unlock_irqrestore (&c->process_list_lock, flags); ++ } else ++ _stp_warn("out of memory tracking solib %s in process %ld\n", ++ path, (long) task->tgid); ++ } + + /* Check non-writable, executable sections for probes. */ + if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) +@@ -827,7 +935,7 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target, + */ + + if ((rc == 0) && (vm_flags & VM_WRITE)) +- rc = stapiu_change_semaphore_plus(c, task, addr, length); ++ rc = stapiu_change_semaphore_plus(c, task, addr, dentry->d_inode); + + return rc; + } +diff --git a/runtime/sym.c b/runtime/sym.c +index be09ec8..21d820a 100644 +--- a/runtime/sym.c ++++ b/runtime/sym.c +@@ -713,9 +713,10 @@ static int _stp_build_id_check (struct _stp_module *m, + // NB: It is normal for different binaries with the same file path + // coexist in the same system via chroot or namespaces, therefore + // we make sure below is really a warning. +- _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" address " ++ _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address " + "%#lx, expected %s actual %s\n", +- m->path, notes_addr, hexstring_theory, hexstring_practice); ++ m->path, (long) tsk->tgid, ++ notes_addr, hexstring_theory, hexstring_practice); + return 1; + } + + +commit 5e1ef9d7f2a5ea6e5511ef5228cf05dda1c570b3 +Author: Frank Ch. Eigler +Date: Mon Jul 27 07:58:30 2020 -0400 + + PR25568 / RHBZ1857749: sdt_buildid.exp test case + + Add new test that checks for combinations of buildid and pathname + based uprobes for executables and shared libraries. + +diff --git a/testsuite/systemtap.base/sdt_buildid.c b/testsuite/systemtap.base/sdt_buildid.c +new file mode 100644 +index 0000000..ccbb2f2 +--- /dev/null ++++ b/testsuite/systemtap.base/sdt_buildid.c +@@ -0,0 +1,26 @@ ++#include ++#include ++#include ++ ++void bar (); ++ ++#ifndef ONLY_MAIN ++#include "sdt_buildid_.h" ++ ++void ++bar () ++{ ++ printf("%s=%ld\n", "test_probe_0_semaphore", SDT_BUILDID_TEST_PROBE_0_ENABLED()); ++ if (SDT_BUILDID_TEST_PROBE_0_ENABLED()) ++ SDT_BUILDID_TEST_PROBE_0(); ++} ++#endif ++ ++#ifndef NO_MAIN ++int ++main () ++{ ++ bar(); ++ return 0; ++} ++#endif +diff --git a/testsuite/systemtap.base/sdt_buildid.exp b/testsuite/systemtap.base/sdt_buildid.exp +new file mode 100644 +index 0000000..3141fd6 +--- /dev/null ++++ b/testsuite/systemtap.base/sdt_buildid.exp +@@ -0,0 +1,214 @@ ++set test "sdt_buildid" ++ ++set pbtype_flags {{additional_flags=-g} {} {}} ++set fail_count 0 ++ ++# Compile a C program to use as the user-space probing target ++set stap_path $env(SYSTEMTAP_PATH)/stap ++set sup_dpath "[pwd]/sdt_buildid_.d" ++set sup_hpath "[pwd]/sdt_buildid_.h" ++set sup_opath "[pwd]/sdt_buildid_.o" ++ ++# Run dtrace ++if {[installtest_p]} { ++ set dtrace $env(SYSTEMTAP_PATH)/dtrace ++} else { ++ set dtrace ../dtrace ++} ++ ++verbose -log "$dtrace --types -h -s $srcdir/$subdir/sdt_buildid_.d" ++if {[catch {exec $dtrace --types -h -s \ ++ $srcdir/$subdir/sdt_buildid_.d} res]} { ++ verbose -log "unable to run $dtrace: $res" ++} ++verbose -log "$dtrace --types -G -s $srcdir/$subdir/sdt_buildid_.d" ++if {[catch {exec $dtrace --types -G -s \ ++ $srcdir/$subdir/sdt_buildid_.d} res]} { ++ verbose -log "unable to run $dtrace: $res" ++} ++if {[file exists $sup_hpath] && [file exists $sup_opath]} then { ++ pass "$test dtrace" ++} else { ++ incr fail_count ++ fail "$test dtrace" ++ return ++} ++ ++set sup_flags [sdt_includes] ++set sup_flags "$sup_flags additional_flags=-Wall" ++set sup_flags "$sup_flags additional_flags=-Werror" ++set sup_flags "$sup_flags additional_flags=$sup_opath" ++set sup_flags "$sup_flags additional_flags=-I." ++set sup_exepath "[pwd]/sdt_buildid.x" ++ ++set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exepath \ ++ executable $sup_flags] ++if { $res != "" } { ++ incr fail_count ++ verbose "target_compile failed: $res" 2 ++ fail "$test compiling" ++ return ++} else { ++ pass "$test compiling" ++} ++ ++ ++set sup41_flags "$sup_flags additional_flags=-shared" ++set sup41_flags "$sup41_flags additional_flags=-fPIC" ++set sup41_flags "$sup41_flags additional_flags=-DNO_MAIN" ++set sup_sopath "[pwd]/libsdt_buildid.so" ++set sup_exe2path "[pwd]/sdt_buildid_shared.x" ++set res0 [target_compile $srcdir/$subdir/sdt_buildid.c $sup_sopath \ ++ executable $sup41_flags ] ++set sup42_flags "additional_flags=-Wl,-rpath,[pwd]" ++set sup42_flags "$sup42_flags additional_flags=-L[pwd] additional_flags=-lsdt_buildid" ++set sup42_flags "$sup42_flags additional_flags=-DONLY_MAIN" ++set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exe2path \ ++ executable $sup42_flags ] ++if { $res0 != "" || $res != "" } { ++ incr fail_count ++ verbose "target_compile failed: $res0 $res" 2 ++ fail "$test compiling -shared" ++ return ++} else { ++ pass "$test compiling -shared" ++} ++ ++catch { exec eu-readelf -n $sup_exepath | grep Build.ID | awk "{print \$NF}" } bid1 ++catch { exec eu-readelf -n $sup_sopath | grep Build.ID | awk "{print \$NF}" } bidso ++catch { exec eu-readelf -n $sup_exe2path | grep Build.ID | awk "{print \$NF}" } bid2 ++verbose -log "buildid: $sup_exepath $bid1" ++verbose -log "buildid: $sup_sopath $bidso" ++verbose -log "buildid: $sup_exe2path $bid2" ++# though we won't use the $bid2 ++ ++if {![installtest_p]} { ++ untested $test ++ return ++} ++ ++# To test via build-id, we need a debuginfod server to scan the testsuite build ++# directory. ++ ++ ++if [catch {exec /usr/bin/which debuginfod} debuginfod] then { ++ untested "$test debuginfod" ++} else { ++ set port [expr {10000 + int(rand()*10000)}] ++ spawn $debuginfod -p $port -d :memory: -F . ++ set debuginfod_pid [exp_pid $spawn_id] ++ # give it time to scan the build directory ++ sleep 10 ++ # XXX: we could expect some verbose traffic ++ set env(DEBUGINFOD_URLS) "http://localhost:$port $env(DEBUGINFOD_URLS)" ++ verbose -log "started debuginfod on port $port" ++ ++ set subtest "$test debuginfod buildid-exe buildid-solib" ++ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $bidso ++ set ok 0 ++ expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++ } ++ catch {close}; catch {wait} ++ if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++ set subtest "$test debuginfod buildid-exe path-solib" ++ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $sup_sopath ++ set ok 0 ++ expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++ } ++ catch {close}; catch {wait} ++ if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++ set subtest "$test debuginfod path-exe buildid-solib" ++ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $bidso ++ set ok 0 ++ expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++ } ++ catch {close}; catch {wait} ++ if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++ set subtest "$test debuginfod buildid-solib" ++ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bidso ++ set ok 0 ++ expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++ } ++ catch {close}; catch {wait} ++ if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++ set subtest "$test debuginfod buildid-exe" ++ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 ++ set ok 0 ++ expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ eof { } ++ timeout { } ++ } ++ catch {close}; catch {wait} ++ if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++ kill -INT $debuginfod_pid ++} ++ ++ ++set subtest "$test non-buildid both" ++spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $sup_sopath ++set ok 0 ++expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++} ++catch {close}; catch {wait} ++if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++set subtest "$test non-buildid exe" ++spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath ++set ok 0 ++expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } ++ eof { } ++ timeout { } ++} ++catch {close}; catch {wait} ++if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++set subtest "$test non-buildid solib" ++spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_sopath ++set ok 0 ++expect { ++ -timeout 240 ++ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } ++ eof { } ++ timeout { } ++} ++catch {close}; catch {wait} ++if {$ok > 6} then { pass $subtest } else { fail $subtest } ++ ++return +diff --git a/testsuite/systemtap.base/sdt_buildid.stp b/testsuite/systemtap.base/sdt_buildid.stp +new file mode 100644 +index 0000000..a26d183 +--- /dev/null ++++ b/testsuite/systemtap.base/sdt_buildid.stp +@@ -0,0 +1,19 @@ ++global count ++ ++function trace () { ++ printf ("Count %d [%d] %s %s\n", count++, pid(), $$name, pp()) ++} ++ ++probe process(@1).mark("test_probe_0") { trace() } ++%( $# > 1 %? probe process(@2).mark("test_probe_0") { trace() } %) ++ ++probe begin ++{ ++ printf ("Count %d\n", count++) ++} ++ ++probe timer.s(1) // exit quickly after enough marks fire ++{ ++ if (count > 10) exit() ++} ++ +diff --git a/testsuite/systemtap.base/sdt_buildid_.d b/testsuite/systemtap.base/sdt_buildid_.d +new file mode 100644 +index 0000000..ebfca55 +--- /dev/null ++++ b/testsuite/systemtap.base/sdt_buildid_.d +@@ -0,0 +1,4 @@ ++provider sdt_buildid { ++ probe test_probe_0 (); ++}; ++ diff --git a/rhbz1855264.patch b/rhbz1855264.patch new file mode 100644 index 0000000..c361090 --- /dev/null +++ b/rhbz1855264.patch @@ -0,0 +1,874 @@ +commit 0a281a96ddf7cae9a0f0cc0eb505a752ffdd932e +Author: William Cohen +Date: Tue Jun 16 16:02:11 2020 -0400 + + Make sizeof.stp runnable with the bpf backend. + +diff --git a/testsuite/systemtap.examples/general/sizeof.meta b/testsuite/systemtap.examples/general/sizeof.meta +index 29713e4..b30078d 100644 +--- a/testsuite/systemtap.examples/general/sizeof.meta ++++ b/testsuite/systemtap.examples/general/sizeof.meta +@@ -2,7 +2,7 @@ title: Print the Size of a C Type + name: sizeof.stp + version: 1.0 + author: anonymous +-keywords: statistics memory ++keywords: statistics memory bpf + subsystem: any + status: proposed + exit: event-ended +@@ -11,3 +11,5 @@ scope: system-wide + description: This script prints the size of a type, based on dwarf debuginfo for any kernel or userspace module, or trial-compilation of a given header file name. + test_check: stap -p4 sizeof.stp task_struct 'kernel' + test_installcheck: stap sizeof.stp FILE '' ++test_check_bpf: stap -p4 --bpf sizeof.stp task_struct 'kernel' ++test_installcheck_bpf: stap --bpf sizeof.stp FILE '' +diff --git a/testsuite/systemtap.examples/general/sizeof.stp b/testsuite/systemtap.examples/general/sizeof.stp +index 0c77dce..5aec674 100755 +--- a/testsuite/systemtap.examples/general/sizeof.stp ++++ b/testsuite/systemtap.examples/general/sizeof.stp +@@ -7,9 +7,11 @@ + # sizeof.stp TYPENAME + + probe oneshot { +- println("type ", @1, +- %( $# > 1 %? " in ", @2, %) /* module or header file name */ +- " byte-size: ", +- %( $# > 1 %? @cast_module_sizeof(@2, @1) %: @cast_sizeof(@1) %) +- ) ++ %( $# > 1 %? ++ printf("type %s in %s byte-size: %d\n", @1, @2, ++ @cast_module_sizeof(@2, @1)) ++ %: ++ printf("type %s byte-size: %d\n", @1, ++ @cast_sizeof(@1)) ++ %) + } +commit 2b2b6a622dc1d434c60d0ea159b260f660068ad1 +Author: William Cohen +Date: Wed Jun 17 11:57:18 2020 -0400 + + Fix sizeof.stp to explicitly use kernel debuginfo if one not specified + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to use the @cast_module_sizeof() + instead of @cast_size() to use the kernel debuginfo. + +diff --git a/testsuite/systemtap.examples/general/sizeof.stp b/testsuite/systemtap.examples/general/sizeof.stp +index 5aec674..b45f593 100755 +--- a/testsuite/systemtap.examples/general/sizeof.stp ++++ b/testsuite/systemtap.examples/general/sizeof.stp +@@ -12,6 +12,6 @@ probe oneshot { + @cast_module_sizeof(@2, @1)) + %: + printf("type %s byte-size: %d\n", @1, +- @cast_sizeof(@1)) ++ @cast_module_sizeof("kernel", @1)) + %) + } + +commit 717b7dddd08b66b3caa5585221472d84e40be658 +Author: William Cohen +Date: Wed Jun 17 13:08:30 2020 -0400 + + Use explicit @cast() operators to fslatency-nd.stp and fsslower-nd.stp + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + +diff --git a/testsuite/systemtap.examples/lwtools/fslatency-nd.stp b/testsuite/systemtap.examples/lwtools/fslatency-nd.stp +index 6008399..0bee34f 100755 +--- a/testsuite/systemtap.examples/lwtools/fslatency-nd.stp ++++ b/testsuite/systemtap.examples/lwtools/fslatency-nd.stp +@@ -63,8 +63,8 @@ probe __vfs_read = kprobe.function("__vfs_read") + { + # Skip the call if new_sync_read() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->read +- || !@cast(file, "file")->f_op->read_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->read ++ || !@cast(file, "file", "kernel")->f_op->read_iter) + next + } + +@@ -75,8 +75,8 @@ probe __vfs_write = kprobe.function("__vfs_write") + { + # Skip the call if new_sync_write() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->write +- || !@cast(file, "file")->f_op->write_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->write ++ || !@cast(file, "file", "kernel")->f_op->write_iter) + next + } + +@@ -102,8 +102,8 @@ probe __vfs_read.return = kprobe.function("__vfs_read").return + { + # Skip the call if new_sync_read() wouldn't be called. + file = @entry(pointer_arg(1)) +- if (!file || @cast(file, "file")->f_op->read +- || !@cast(file, "file")->f_op->read_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->read ++ || !@cast(file, "file", "kernel")->f_op->read_iter) + next + } + +@@ -115,8 +115,8 @@ probe __vfs_write.return = kprobe.function("__vfs_write") + { + # Skip the call if new_sync_write() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->write +- || !@cast(file, "file")->f_op->write_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->write ++ || !@cast(file, "file", "kernel")->f_op->write_iter) + next + } + +diff --git a/testsuite/systemtap.examples/lwtools/fsslower-nd.stp b/testsuite/systemtap.examples/lwtools/fsslower-nd.stp +index 64abe41..90fa9b5 100755 +--- a/testsuite/systemtap.examples/lwtools/fsslower-nd.stp ++++ b/testsuite/systemtap.examples/lwtools/fsslower-nd.stp +@@ -65,8 +65,8 @@ probe __vfs_read = kprobe.function("__vfs_read") + { + # Skip the call if new_sync_read() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->read +- || !@cast(file, "file")->f_op->read_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->read ++ || !@cast(file, "file", "kernel")->f_op->read_iter) + next + } + +@@ -77,8 +77,8 @@ probe __vfs_write = kprobe.function("__vfs_write") + { + # Skip the call if new_sync_write() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->write +- || !@cast(file, "file")->f_op->write_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->write ++ || !@cast(file, "file", "kernel")->f_op->write_iter) + next + } + +@@ -110,8 +110,8 @@ probe __vfs_read.return = kprobe.function("__vfs_read").return + { + # Skip the call if new_sync_read() wouldn't be called. + file = @entry(pointer_arg(1)) +- if (!file || @cast(file, "file")->f_op->read +- || !@cast(file, "file")->f_op->read_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->read ++ || !@cast(file, "file", "kernel")->f_op->read_iter) + next + } + +@@ -123,7 +123,7 @@ probe __vfs_write.return = kprobe.function("__vfs_write") + { + # Skip the call if new_sync_write() wouldn't be called. + file = pointer_arg(1) +- if (!file || @cast(file, "file")->f_op->write +- || !@cast(file, "file")->f_op->write_iter) ++ if (!file || @cast(file, "file", "kernel")->f_op->write ++ || !@cast(file, "file", "kernel")->f_op->write_iter) + next + } + +commit 9eb37102d48b814821b7f474986a7bfe86784192 +Author: William Cohen +Date: Wed Jun 17 13:39:20 2020 -0400 + + Use explicit @cast() operators for pfiles.stp and ioctl_handler.stp + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + +diff --git a/testsuite/systemtap.examples/process/pfiles.stp b/testsuite/systemtap.examples/process/pfiles.stp +index d52a154..6344a4c 100755 +--- a/testsuite/systemtap.examples/process/pfiles.stp ++++ b/testsuite/systemtap.examples/process/pfiles.stp +@@ -787,9 +787,9 @@ function print_unix_socket(sock) { + strlen(peername) > 0 ? peername . "\n" : "") + try { # skip line in case of null pointers + printf(" peercred pid: %d\n", +- @defined(@cast(sock, "socket")->sk->sk_peer_pid) ? +- @cast(sock, "socket")->sk->sk_peer_pid->numbers[0]->nr : +- @cast(sock, "socket")->sk->sk_peercred->pid ); } catch { } ++ @defined(@cast(sock, "socket", "kernel")->sk->sk_peer_pid) ? ++ @cast(sock, "socket", "kernel")->sk->sk_peer_pid->numbers[0]->nr : ++ @cast(sock, "socket", "kernel")->sk->sk_peercred->pid ); } catch { } + } + + function print_ipv4_socket(sock) { +diff --git a/testsuite/systemtap.examples/profiling/ioctl_handler.stp b/testsuite/systemtap.examples/profiling/ioctl_handler.stp +index 7044185..6f1e52c 100755 +--- a/testsuite/systemtap.examples/profiling/ioctl_handler.stp ++++ b/testsuite/systemtap.examples/profiling/ioctl_handler.stp +@@ -9,7 +9,7 @@ probe syscall.ioctl { + ioctl_requests[execname()] <<< 1 + try { + # Dig down through the task struct to find the actual function handling ioctl. +- ioctl_func_address = @cast(task_current(), "struct task_struct")->files->fdt->fd[fd]->f_op->unlocked_ioctl ++ ioctl_func_address = @cast(task_current(), "struct task_struct", "kernel")->files->fdt->fd[fd]->f_op->unlocked_ioctl + if (ioctl_func_address) + ioctl_func[execname(), ioctl_func_address] <<< 1 + } catch { + +commit 3040d4e8ddb6a9b1d1a57a0185206498670c3f1a +Author: William Cohen +Date: Wed Jun 17 13:53:58 2020 -0400 + + Use explicit @cast() operators for stapgames/pingpong.stp tapset. + +diff --git a/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp b/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp +index 026e4a9..f6ad2db 100644 +--- a/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp ++++ b/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp +@@ -10,12 +10,12 @@ global GM_tty_ws_row, GM_tty_ws_col, GM_tty_name + # Initialize current TTY -- must be called from begin + function game_tty_init:long () + { +- tty = @cast(task_current(), "task_struct")->signal->tty ++ tty = @cast(task_current(), "task_struct", "kernel")->signal->tty + if (tty) { +- namep = @cast(tty,"tty_struct")->name ++ namep = @cast(tty,"tty_struct", "kernel")->name + GM_tty_name = kernel_string(namep) +- GM_tty_ws_col = @cast(tty, "tty_struct")->winsize->ws_col +- GM_tty_ws_row = @cast(tty, "tty_struct")->winsize->ws_row ++ GM_tty_ws_col = @cast(tty, "tty_struct", "kernel")->winsize->ws_col ++ GM_tty_ws_row = @cast(tty, "tty_struct", "kernel")->winsize->ws_row + } + return tty + } + +commit 3d922919dbe5657becf48917f1c661bf6711e956 +Author: William Cohen +Date: Thu Jun 18 13:32:50 2020 -0400 + + Use explicit @cast() operators for periodic.stp + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + +diff --git a/testsuite/systemtap.examples/profiling/periodic.stp b/testsuite/systemtap.examples/profiling/periodic.stp +index f18f183..b9052e5 100755 +--- a/testsuite/systemtap.examples/profiling/periodic.stp ++++ b/testsuite/systemtap.examples/profiling/periodic.stp +@@ -18,8 +18,8 @@ probe kernel.trace("timer_expire_entry") + period[$timer] <<< elapsed + funct[$timer] = $timer->function + data[$timer] = @defined($timer->data) ? $timer->data : 0 +- proc_info[$timer] = @defined($timer->data) ? 0 : @container_of($timer, "struct process_timer", timer)->task +- delayed_work_info[$timer] = @defined($timer->data) ? 0 : & @container_of($timer, "struct delayed_work", timer) ++ proc_info[$timer] = @defined($timer->data) ? 0 : @module_container_of($timer, "kernel", "struct process_timer", timer)->task ++ delayed_work_info[$timer] = @defined($timer->data) ? 0 : & @module_container_of($timer, "kernel", "struct delayed_work", timer) + } + last_expire[$timer] = new_expire + } + +commit b2d18cb3afca76536506fe4992fdd6ef091ce82f +Author: William Cohen +Date: Thu Jun 18 15:01:40 2020 -0400 + + Use explicit @cast() operators for semop-watch.stp example. + +diff --git a/testsuite/systemtap.examples/process/semop-watch.stp b/testsuite/systemtap.examples/process/semop-watch.stp +index ca2bf0a..bf1d632 100755 +--- a/testsuite/systemtap.examples/process/semop-watch.stp ++++ b/testsuite/systemtap.examples/process/semop-watch.stp +@@ -3,7 +3,7 @@ global times; + + probe syscall.{semop,semtimedop} + { +- sembuf_sz = @cast_sizeof("struct sembuf"); ++ sembuf_sz = @cast_module_sizeof("kernel", "struct sembuf"); + res = sprintf("set %d sems", semid) + + %( systemtap_v < "2.3" %? +@@ -14,7 +14,7 @@ probe syscall.{semop,semtimedop} + for(i = 0; i < nsops; i++) { + offset = i * sembuf_sz; + pointer = sops_uaddr + offset; +- num_addr = & @cast(pointer, "struct sembuf")->sem_num; ++ num_addr = & @cast(pointer, "struct sembuf", "kernel")->sem_num; + num = user_short(num_addr); + + res = sprintf("%s %d", res, num); + +commit a948c291c9cd7320d3c9b18b5037908cbbdf70b7 +Author: William Cohen +Date: Mon Jun 22 11:28:32 2020 -0400 + + Use explicit @cast() operators pointing to kernel for tapsets + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + +diff --git a/tapset/linux/dentry.stp b/tapset/linux/dentry.stp +index 4e73532..d148c57 100644 +--- a/tapset/linux/dentry.stp ++++ b/tapset/linux/dentry.stp +@@ -28,7 +28,7 @@ + + @__private30 function __dentry_IS_ROOT:long(dentry:long) + { +- return (@cast(dentry, "dentry")->d_parent == dentry) ++ return (@cast(dentry, "dentry", "kernel")->d_parent == dentry) + } + + +@@ -61,7 +61,7 @@ + */ + function d_name:string(dentry:long) + { +- s = & @cast(dentry, "dentry")->d_name; ++ s = & @cast(dentry, "dentry", "kernel")->d_name; + return kernel_string_n(s->name, s->len); + } + +@@ -70,8 +70,8 @@ function d_name:string(dentry:long) + { + /* i_dentry is an hlist_head on 3.6+, or a list_head before that. */ + d_alias = @choose_defined( +- @cast(inode, "struct inode")->i_dentry->first, +- @cast(inode, "struct inode")->i_dentry->next) ++ @cast(inode, "struct inode", "kernel")->i_dentry->first, ++ @cast(inode, "struct inode", "kernel")->i_dentry->next) + + if (@type_member_defined("struct dentry", d_alias)) { + return & @container_of(d_alias, "struct dentry", d_alias) +@@ -86,8 +86,8 @@ function d_name:string(dentry:long) + { + /* s_mounts was added in kernel 3.6, commit b3d9b7a3c. */ + if (@type_member_defined("struct super_block", s_mounts)) { +- mnt_ns = @cast(task_current(), "struct task_struct")->nsproxy->mnt_ns +- sb = @cast(inode, "struct inode")->i_sb ++ mnt_ns = @cast(task_current(), "struct task_struct", "kernel")->nsproxy->mnt_ns ++ sb = @cast(inode, "struct inode", "kernel")->i_sb + + /* Look for the mount which matches the current namespace */ + head = &sb->s_mounts +@@ -141,7 +141,7 @@ function reverse_path_walk:string(dentry:long) + { + while(1) { + name = __dentry_prepend(dentry, name); +- dentry = @cast(dentry, "dentry")->d_parent; ++ dentry = @cast(dentry, "dentry", "kernel")->d_parent; + if (__dentry_IS_ROOT(dentry)) + return name; + } +@@ -209,8 +209,8 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) + * dentry == vfsmnt->mnt_root. In that case, we'll just go + * ahead and handle them normally. + */ +- dentry = & @cast(dentry, "dentry") +- vfsmnt = & @cast(vfsmnt, "vfsmount") ++ dentry = & @cast(dentry, "dentry", "kernel") ++ vfsmnt = & @cast(vfsmnt, "vfsmount", "kernel") + + if (@type_member_defined("dentry", d_op->d_dname) + && dentry->d_op && dentry->d_op->d_dname +@@ -230,7 +230,7 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) + return sprintf("ANON_INODE:%s", d_name(dentry)) + } + else if (vfsmnt->mnt_sb->s_magic == @const("NSFS_MAGIC")) { +- ns_ops = &@cast(dentry->d_fsdata, "proc_ns_operations") ++ ns_ops = &@cast(dentry->d_fsdata, "proc_ns_operations", "kernel") + return sprintf("%s:[%lu]", kernel_string(ns_ops->name), + dentry->d_inode->i_ino) + } +@@ -239,16 +239,16 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) + + # Handle old-school vs. new-school fs_structs. + if (@type_member_defined("fs_struct", rootmnt)) { +- root_dentry = & @cast(task, "task_struct")->fs->root +- root_vfsmnt = & @cast(task, "task_struct")->fs->rootmnt ++ root_dentry = & @cast(task, "task_struct", "kernel")->fs->root ++ root_vfsmnt = & @cast(task, "task_struct", "kernel")->fs->rootmnt + } + else { +- root_dentry = @cast(task, "task_struct")->fs->root->dentry +- root_vfsmnt = @cast(task, "task_struct")->fs->root->mnt ++ root_dentry = @cast(task, "task_struct", "kernel")->fs->root->dentry ++ root_vfsmnt = @cast(task, "task_struct", "kernel")->fs->root->mnt + } + + if (@type_member_defined("mount", mnt_parent)) { +- mnt = &@cast(real_mount(vfsmnt), "mount") ++ mnt = &@cast(real_mount(vfsmnt), "mount", "kernel") + if (mnt == 0) + return "" + } +@@ -305,10 +305,10 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) + */ + function d_path:string(nd:long) + { +- dentry = @choose_defined(@cast(nd,"nameidata")->path->dentry, +- @cast(nd,"nameidata")->dentry) +- vfsmnt = @choose_defined(@cast(nd,"nameidata")->path->mnt, +- @cast(nd,"nameidata")->mnt) ++ dentry = @choose_defined(@cast(nd,"nameidata", "kernel")->path->dentry, ++ @cast(nd,"nameidata", "kernel")->dentry) ++ vfsmnt = @choose_defined(@cast(nd,"nameidata", "kernel")->path->mnt, ++ @cast(nd,"nameidata", "kernel")->mnt) + + return sprintf("%s/", task_dentry_path(task_current(), dentry, vfsmnt)) + } +@@ -353,8 +353,8 @@ function fullpath_struct_path:string(path:long) + function fullpath_struct_file:string(task:long, file:long) + { + return task_dentry_path(task, +- @choose_defined(@cast(file, "file")->f_path->dentry, +- @cast(file, "file")->f_dentry), +- @choose_defined(@cast(file, "file")->f_path->mnt, +- @cast(file, "file")->f_vfsmnt)) ++ @choose_defined(@cast(file, "file", "kernel")->f_path->dentry, ++ @cast(file, "file", "kernel")->f_dentry), ++ @choose_defined(@cast(file, "file", "kernel")->f_path->mnt, ++ @cast(file, "file", "kernel")->f_vfsmnt)) + } +diff --git a/tapset/linux/dev.stp b/tapset/linux/dev.stp +index 0232fc9..079ce1c 100644 +--- a/tapset/linux/dev.stp ++++ b/tapset/linux/dev.stp +@@ -56,8 +56,8 @@ function usrdev2kerndev:long(dev:long) + function disk_name:string(hd:long, partno:long) + { + if (!partno) +- return kernel_string(@cast(hd, "gendisk")->disk_name) +- disk_name = kernel_string(@cast(hd, "gendisk")->disk_name) ++ return kernel_string(@cast(hd, "gendisk", "kernel")->disk_name) ++ disk_name = kernel_string(@cast(hd, "gendisk", "kernel")->disk_name) + if (isdigit(substr(disk_name, strlen(disk_name)-1, 1))) + return sprintf("%sp%d", disk_name, partno) + else +@@ -66,7 +66,7 @@ function disk_name:string(hd:long, partno:long) + + function bdevname:string(bdev:long) + { +- bdev = & @cast(bdev, "block_device") ++ bdev = & @cast(bdev, "block_device", "kernel") + if (bdev == 0) + return "N/A" + +diff --git a/tapset/linux/ioblock.stp b/tapset/linux/ioblock.stp +index ad3603c..9d8f57b 100644 +--- a/tapset/linux/ioblock.stp ++++ b/tapset/linux/ioblock.stp +@@ -107,12 +107,12 @@ function bio_rw_str(rw:long) + @__private30 function __bio_start_sect:long(bio:long) + { + try { +- if (@defined(@cast(bio, "bio")->bi_dev)) { +- return @cast(bio, "bio")->bi_bdev->bd_part->start_sect ++ if (@defined(@cast(bio, "bio", "kernel")->bi_dev)) { ++ return @cast(bio, "bio", "kernel")->bi_bdev->bd_part->start_sect + } +- else if (@defined(@cast(bio, "bio")->bi_disk)) { +- return disk_get_part_start_sect(@cast(bio, "bio")->bi_disk, +- @cast(bio, "bio")->bi_partno) ++ else if (@defined(@cast(bio, "bio", "kernel")->bi_disk)) { ++ return disk_get_part_start_sect(@cast(bio, "bio", "kernel")->bi_disk, ++ @cast(bio, "bio", "kernel")->bi_partno) + } + } catch { + return -1 +@@ -122,12 +122,12 @@ function bio_rw_str(rw:long) + /* returns the block device name */ + @__private30 function __bio_devname:string(bio:long) + { +- if (@defined(@cast(bio, "bio")->bi_bdev)) { +- return bdevname(@cast(bio, "bio")->bi_bdev) ++ if (@defined(@cast(bio, "bio", "kernel")->bi_bdev)) { ++ return bdevname(@cast(bio, "bio", "kernel")->bi_bdev) + } + else { +- return disk_name(@cast(bio, "bio")->bi_disk, +- @cast(bio, "bio")->bi_partno) ++ return disk_name(@cast(bio, "bio", "kernel")->bi_disk, ++ @cast(bio, "bio", "kernel")->bi_partno) + } + } + +diff --git a/tapset/linux/task.stp b/tapset/linux/task.stp +index 4afc458..b542b61 100644 +--- a/tapset/linux/task.stp ++++ b/tapset/linux/task.stp +@@ -40,7 +40,7 @@ function task_current:long () { + return -1; + } + sig = @task(task)->signal; +- return @cast(sig, "signal_struct")->rlim[nd_limit]->rlim_cur; ++ return @cast(sig, "signal_struct", "kernel")->rlim[nd_limit]->rlim_cur; + } + + /* sfunction task_rlimit - The current resource limit of the task + +commit 403e927796c3008ad5d5fed9bd97dc7cbad424bb +Author: Martin Cermak +Date: Mon Jun 29 16:30:34 2020 +0200 + + PR26181: Use explicit @cast() within get_ip_from_client() + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + + Also, fix the type of server_ip, which historically had been a long, + but since systemtap_v >= "4.3", it is a string. + +diff --git a/tapset/linux/nfs_proc.stp b/tapset/linux/nfs_proc.stp +index 8da3f6b..2579074 100644 +--- a/tapset/linux/nfs_proc.stp ++++ b/tapset/linux/nfs_proc.stp +@@ -77,11 +77,11 @@ function get_ip_from_client:string(clnt:long) + * inside that buffer. */ + if (@cast(addr, "sockaddr")->sa_family + == @const("AF_INET")) { +- return format_ipaddr(&@cast(addr, "sockaddr_in")->sin_addr->s_addr, @const("AF_INET")) ++ return format_ipaddr(&@cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_addr->s_addr, @const("AF_INET")) + } + else if (@cast(addr, "sockaddr")->sa_family + == @const("AF_INET6")) { +- return format_ipaddr(&@cast(addr, "sockaddr_in6")->sin6_addr, @const("AF_INET6")) ++ return format_ipaddr(&@cast(addr, "sockaddr_in6", "kernel:sunrpc")->sin6_addr, @const("AF_INET6")) + } + return "" + } +@@ -90,12 +90,12 @@ function get_ip_from_client:long(clnt:long) + { + cl_xprt = @cast(clnt, "rpc_clnt", "kernel:sunrpc")->cl_xprt + addr = &@cast(cl_xprt, "rpc_xprt", "kernel:sunrpc")->addr +- if (@cast(addr, "sockaddr_in")->sin_family ++ if (@cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_family + != @const("AF_INET")) { + /* Now consider ipv4 only */ + return 0 + } +- return @cast(addr, "sockaddr_in")->sin_addr->s_addr ++ return @cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_addr->s_addr + } + %) + +@@ -758,7 +758,11 @@ probe _nfs.proc2.missing_read_setup = never + { + inode = 0 + client = 0 ++%( systemtap_v >= "4.3" %? ++ server_ip = "0" ++%: + server_ip = 0 ++%) + prot = 0 + + count = 0 + +commit f1a9bb064d11319a7eca4f4233c9edcc4a03af7e +Author: Martin Cermak +Date: Thu Jul 9 09:19:01 2020 +0200 + + Tapset and testsuite updates against @cast() change 00ee19ff03 + + Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() + operations work and they no longer default to using the kernel + debuginfo for type information. Need to include kernel as location for + this information for the @cast() rather than just assuming a default. + + These are respective tapset and testsuite minor updates. + +diff --git a/tapset/linux/networking.stp b/tapset/linux/networking.stp +index 69843a7..0b52cbc 100644 +--- a/tapset/linux/networking.stp ++++ b/tapset/linux/networking.stp +@@ -69,7 +69,7 @@ + + /* A function that returns the device name given the net_device struct */ + function get_netdev_name:string (addr:long) { +- return kernel_string(@cast(addr, "net_device")->name) ++ return kernel_string(@cast(addr, "net_device", "kernel")->name) + } + + /** +diff --git a/tapset/linux/scsi.stp b/tapset/linux/scsi.stp +index 3577942..5359fe8 100644 +--- a/tapset/linux/scsi.stp ++++ b/tapset/linux/scsi.stp +@@ -179,8 +179,8 @@ probe scsi.iocompleted + + function timer_pending:long(timer:long) + { +- return (@choose_defined(@cast(timer, "timer_list")->entry->next, +- @cast(timer, "timer_list")->base) != 0) ++ return (@choose_defined(@cast(timer, "timer_list", "kernel")->entry->next, ++ @cast(timer, "timer_list", "kernel")->base) != 0) + } + + function scsi_timer_pending:long(cmd:long) +diff --git a/testsuite/buildok/pretty.stp b/testsuite/buildok/pretty.stp +index 85c9cd9..a2fc781 100755 +--- a/testsuite/buildok/pretty.stp ++++ b/testsuite/buildok/pretty.stp +@@ -6,14 +6,14 @@ global i = 1 + # pretty-printing with @cast + probe begin { + t = task_current() +- log(@cast(t, "task_struct")->fs$) +- log(@cast(t, "task_struct")->fs$$) +- log(@cast(t, "task_struct")->comm$) +- log(@cast(t, "task_struct")->comm$$) +- log(@cast(t, "task_struct")->comm[0]$) +- log(@cast(t, "task_struct")->comm[0]$$) +- log(@cast(t, "task_struct")->comm[i]$) +- log(@cast(t, "task_struct")->comm[i]$$) ++ log(@cast(t, "task_struct", "kernel")->fs$) ++ log(@cast(t, "task_struct", "kernel")->fs$$) ++ log(@cast(t, "task_struct", "kernel")->comm$) ++ log(@cast(t, "task_struct", "kernel")->comm$$) ++ log(@cast(t, "task_struct", "kernel")->comm[0]$) ++ log(@cast(t, "task_struct", "kernel")->comm[0]$$) ++ log(@cast(t, "task_struct", "kernel")->comm[i]$) ++ log(@cast(t, "task_struct", "kernel")->comm[i]$$) + } + + # pretty-printing in dwarf kernel context +diff --git a/testsuite/semok/cast.stp b/testsuite/semok/cast.stp +index d72763c..fe78e36 100755 +--- a/testsuite/semok/cast.stp ++++ b/testsuite/semok/cast.stp +@@ -2,7 +2,7 @@ + + probe begin { + // basic @cast test, with and without specifying kernel +- println(@cast(0, "task_struct")->tgid) ++ println(@cast(0, "task_struct", "kernel")->tgid) + println(@cast(0, "task_struct", "kernel")->tgid) + + // check module-search paths +@@ -25,5 +25,5 @@ probe begin { + @cast(0, "task_struct", "no_such_module")->tgid + + // PR11556: we should be able to treat the initial pointer like an array too +- println(@cast(0, "task_struct")[42]->tgid) ++ println(@cast(0, "task_struct", "kernel")[42]->tgid) + } +diff --git a/testsuite/semok/pretty.stp b/testsuite/semok/pretty.stp +index 0211d86..25490e7 100755 +--- a/testsuite/semok/pretty.stp ++++ b/testsuite/semok/pretty.stp +@@ -12,16 +12,16 @@ global i = 1 + # pretty-printing with @cast + probe begin { + t = task_current() +- log(@cast(t, "task_struct")$) +- log(@cast(t, "task_struct")$$) +- log(@cast(t, "task_struct")->fs$) +- log(@cast(t, "task_struct")->fs$$) +- log(@cast(t, "task_struct")->comm$) +- log(@cast(t, "task_struct")->comm$$) +- log(@cast(t, "task_struct")->comm[0]$) +- log(@cast(t, "task_struct")->comm[0]$$) +- log(@cast(t, "task_struct")->comm[i]$) +- log(@cast(t, "task_struct")->comm[i]$$) ++ log(@cast(t, "task_struct", "kernel")$) ++ log(@cast(t, "task_struct", "kernel")$$) ++ log(@cast(t, "task_struct", "kernel")->fs$) ++ log(@cast(t, "task_struct", "kernel")->fs$$) ++ log(@cast(t, "task_struct", "kernel")->comm$) ++ log(@cast(t, "task_struct", "kernel")->comm$$) ++ log(@cast(t, "task_struct", "kernel")->comm[0]$) ++ log(@cast(t, "task_struct", "kernel")->comm[0]$$) ++ log(@cast(t, "task_struct", "kernel")->comm[i]$) ++ log(@cast(t, "task_struct", "kernel")->comm[i]$$) + } + + # pretty-printing in dwarf kernel context +diff --git a/testsuite/semok/sizeof.stp b/testsuite/semok/sizeof.stp +index 8e35e29..a5a6bbb 100755 +--- a/testsuite/semok/sizeof.stp ++++ b/testsuite/semok/sizeof.stp +@@ -1,7 +1,7 @@ + #! stap -p2 + + probe begin { +- println("task_struct: ", @cast_sizeof("task_struct")) ++ # println("task_struct: ", @cast_sizeof("task_struct")) + println("task_struct: ", @cast_module_sizeof("kernel", "task_struct")) + println("task_struct: ", @cast_module_sizeof("kernel", "task_struct")) + println("FILE: ", @cast_module_sizeof("", "FILE")) +diff --git a/testsuite/semok/thirtyeight.stp b/testsuite/semok/thirtyeight.stp +index 15189b7..5018795 100755 +--- a/testsuite/semok/thirtyeight.stp ++++ b/testsuite/semok/thirtyeight.stp +@@ -7,4 +7,4 @@ + probe kernel.function("do_sys_open") { println(@defined($mode) ? 1 : $nosuchvar) } + probe kernel.trace("sched_switch")? { println(@defined($next->pid) ? 1 : $nosuchvar) } + probe procfs.write { println(@defined($value) ? 1 : $nosuchvar) } +-probe begin { println(@defined(@cast(0, "task_struct")->pid) ? 1 : $nosuchvar) } ++probe begin { println(@defined(@cast(0, "task_struct", "kernel")->pid) ? 1 : $nosuchvar) } +diff --git a/testsuite/semok/thirtysix.stp b/testsuite/semok/thirtysix.stp +index 14f10c1..0adae14 100755 +--- a/testsuite/semok/thirtysix.stp ++++ b/testsuite/semok/thirtysix.stp +@@ -17,17 +17,17 @@ probe begin,end,error,never { println(@defined($nosuchvar)?$nosuchvar:0) } # inv + probe timer.s(1),timer.jiffies(1) { println(@defined($nosuchvar)?$nosuchvar:0) } # invalid + probe timer.profile { println(@defined($nosuchvar)?$nosuchvar:0) } # invalid + +-probe begin { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid +-probe begin { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid +-probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid +-probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid ++probe begin { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid ++probe begin { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid ++probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid ++probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid + +-function foo1() { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid +-function foo2() { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid ++function foo1() { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid ++function foo2() { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid + probe begin { foo1(); foo2(); } + + # PR11598: support @defined(&...) +-probe begin { println(@defined(@cast(0, "task_struct")->rcu)?$nosuchvar:0) } # invalid +-probe begin { println(@defined(&@cast(0, "task_struct")->rcu)?1:$nosuchvar) } # valid ++probe begin { println(@defined(@cast(0, "task_struct", "kernel")->rcu)?$nosuchvar:0) } # invalid ++probe begin { println(@defined(&@cast(0, "task_struct", "kernel")->rcu)?1:$nosuchvar) } # valid + probe kernel.function("release_task") { println(@defined($p->rcu)?$nosuchvar:0) } # invalid + probe kernel.function("release_task") { println(@defined(&$p->rcu)?1:$nosuchvar) } # valid +diff --git a/testsuite/systemtap.base/bitfield.stp b/testsuite/systemtap.base/bitfield.stp +index 0208108..b5f7b89 100644 +--- a/testsuite/systemtap.base/bitfield.stp ++++ b/testsuite/systemtap.base/bitfield.stp +@@ -11,8 +11,8 @@ function check:long(ack:long, urg:long) { + ptr = get_ptr() + + /* set the bits with cast */ +- @cast(ptr, "tcphdr")->ack = ack +- @cast(ptr, "tcphdr")->urg = urg ++ @cast(ptr, "tcphdr", "kernel")->ack = ack ++ @cast(ptr, "tcphdr", "kernel")->urg = urg + + /* check that reading with embedded-C is ok */ + real_ack = get_ack() +@@ -20,8 +20,8 @@ function check:long(ack:long, urg:long) { + errors = (ack != real_ack) + (urg != real_urg) + + /* check that reading with a cast is ok */ +- cast_ack = @cast(ptr, "tcphdr")->ack +- cast_urg = @cast(ptr, "tcphdr")->urg ++ cast_ack = @cast(ptr, "tcphdr", "kernel")->ack ++ cast_urg = @cast(ptr, "tcphdr", "kernel")->urg + errors += (ack != cast_ack) + (urg != cast_urg) + + if (errors) +diff --git a/testsuite/systemtap.base/target_set.stp b/testsuite/systemtap.base/target_set.stp +index ad4dca6..7c458cb 100644 +--- a/testsuite/systemtap.base/target_set.stp ++++ b/testsuite/systemtap.base/target_set.stp +@@ -19,13 +19,13 @@ probe begin + probe syscall.nanosleep + { + if (target_set_pid(pid()) +- && user_long(&@cast(req_uaddr, "struct timespec")->tv_sec) == $1) ++ && user_long(&@cast(req_uaddr, "struct timespec", "kernel")->tv_sec) == $1) + target_set_report() + } + probe syscall.compat_nanosleep ? + { + if (target_set_pid(pid()) +- && user_long(&@cast(req_uaddr, "struct compat_timespec")->tv_sec) == $1) ++ && user_long(&@cast(req_uaddr, "struct compat_timespec", "kernel")->tv_sec) == $1) + target_set_report() + } + +diff --git a/testsuite/systemtap.context/usymbols.exp b/testsuite/systemtap.context/usymbols.exp +index f53c1cd..e12f067 100644 +--- a/testsuite/systemtap.context/usymbols.exp ++++ b/testsuite/systemtap.context/usymbols.exp +@@ -20,7 +20,7 @@ set testscript { + probe syscall.rt_sigaction { + if (pid() == target() && execname() == "%s") { + // Note user address. +- handler = user_long(&@cast(act_uaddr, "struct sigaction")->sa_handler); ++ handler = user_long(&@cast(act_uaddr, "struct sigaction", "kernel")->sa_handler); + try { + printf("handler: %%s (%%s)\n", usymname(handler), umodname(handler)); + } catch { +@@ -31,9 +31,9 @@ set testscript { + probe syscall.rt_sigaction32 ? { + if (pid() == target() && execname() == "%s") { + // Note user address. +- handler = user_long(@defined(@cast(0, "compat_sigaction")->sa_handler) +- ? &@cast(act_uaddr, "compat_sigaction")->sa_handler +- : &@cast(act_uaddr, "sigaction32")->sa_handler); ++ handler = user_long(@defined(@cast(0, "compat_sigaction", "kernel")->sa_handler) ++ ? &@cast(act_uaddr, "compat_sigaction", "kernel")->sa_handler ++ : &@cast(act_uaddr, "sigaction32", "kernel")->sa_handler); + try { + printf("handler: %%s (%%s)\n", usymname(handler), umodname(handler)); + } catch { + +commit c6831f14e043f88096b2219828c0124cf2549b77 +Author: Frank Ch. Eigler +Date: Thu Jul 9 21:41:51 2020 -0400 + + testuite: More @cast() fallout + + Adjust another test case that uses the deprecated + + probe begin { @cast(PTR, "type") } + + construct. Now "kernel" is formally required to specify context. + +diff --git a/testsuite/systemtap.base/cast.stp b/testsuite/systemtap.base/cast.stp +index cc44a36..0e191eb 100644 +--- a/testsuite/systemtap.base/cast.stp ++++ b/testsuite/systemtap.base/cast.stp +@@ -4,7 +4,7 @@ probe begin + + // Compare PIDs + pid = pid() +- cast_pid = @cast(curr, "task_struct")->tgid ++ cast_pid = @cast(curr, "task_struct", "kernel")->tgid + if (pid == cast_pid) + println("PID OK") + else +@@ -18,7 +18,7 @@ probe begin + printf("PID2 %d != %d\n", pid, cast_pid) + + // Compare PIDs with an array access (PR11556) +- cast_pid = @cast(curr, "task_struct")[0]->tgid ++ cast_pid = @cast(curr, "task_struct", "kernel")[0]->tgid + if (pid == cast_pid) + println("PID3 OK") + else +@@ -26,16 +26,16 @@ probe begin + + // Compare execnames + name = execname() +- cast_name = kernel_string(@cast(curr, "task_struct")->comm) ++ cast_name = kernel_string(@cast(curr, "task_struct", "kernel")->comm) + if (name == cast_name) + println("execname OK") + else + printf("execname \"%s\" != \"%s\"\n", name, cast_name) + + // Compare usage counter values through a struct address +- usage = @cast(curr, "task_struct")->usage->counter +- pusage = & @cast(curr, "task_struct")->usage +- cast_usage = @cast(pusage, "atomic_t")->counter ++ usage = @cast(curr, "task_struct", "kernel")->usage->counter ++ pusage = & @cast(curr, "task_struct", "kernel")->usage ++ cast_usage = @cast(pusage, "atomic_t", "kernel")->counter + if (usage == cast_usage) + println("usage OK") + else diff --git a/systemtap.spec b/systemtap.spec index a9492f0..2a44465 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -87,7 +87,7 @@ Name: systemtap Version: 4.3 -Release: 1%{?release_override}%{?dist} +Release: 2%{?release_override}%{?dist} # for version, see also configure.ac @@ -123,6 +123,9 @@ License: GPLv2+ URL: http://sourceware.org/systemtap/ Source: ftp://sourceware.org/pub/systemtap/releases/systemtap-%{version}.tar.gz +Patch10: rhbz1847676,1857749.patch +Patch11: rhbz1855264.patch + # Build* BuildRequires: gcc-c++ BuildRequires: cpio @@ -508,6 +511,9 @@ systemtap-runtime-virthost machine to execute systemtap scripts. %prep %setup -q +%patch10 -p1 +%patch11 -p1 + %build # Enable/disable the dyninst pure-userspace backend @@ -1226,6 +1232,9 @@ done # PRERELEASE %changelog +* Tue Jul 28 2020 Frank Ch. Eigler - 4.3-2 +- Backport important uprobes-inode and @cast() related changes. + * Thu Jun 11 2020 Frank Ch. Eigler - 4.3-1 - Upstream release. From 9eb659ff86190b43dc169aae0014fc9a02adcba0 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Mon, 9 Nov 2020 22:29:25 -0500 Subject: [PATCH 9/9] upstream release --- rhbz1847676,1857749.patch | 1403 ------------------------------------- rhbz1855264.patch | 874 ----------------------- sources | 2 +- systemtap.spec | 21 +- 4 files changed, 10 insertions(+), 2290 deletions(-) delete mode 100644 rhbz1847676,1857749.patch delete mode 100644 rhbz1855264.patch diff --git a/rhbz1847676,1857749.patch b/rhbz1847676,1857749.patch deleted file mode 100644 index 80721d9..0000000 --- a/rhbz1847676,1857749.patch +++ /dev/null @@ -1,1403 +0,0 @@ -commit 11c39a7375bd2759b53b89236e755c91a4f5aad8 -Author: Frank Ch. Eigler -Date: Tue Jun 16 20:35:53 2020 -0400 - - RHBZ1847676: uprobes-inode tweaks redux - - Added (back) a spinlock to manage the stapiu_consumer -> process_list - structure, since it is occasionally travered from uprobe pre-handlers, - which are sometimes entered in atomic context (e.g. on rhel7). There, - the normal mutex_t is unsafe. So restoring a spinlock_t just for - those shortlived traversals, rhel7 and rawhide are both happy. - -diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c -index 156360e..922c9f1 100644 ---- a/runtime/linux/uprobes-inode.c -+++ b/runtime/linux/uprobes-inode.c -@@ -143,7 +143,8 @@ struct stapiu_consumer { - struct list_head instance_list_head; // the resulting uprobe instances for this consumer - - struct list_head process_list_head; // the processes for this consumer -- -+ spinlock_t process_list_lock; // protect list; used briefly from even atomic contexts -+ - // List of perf counters used by each probe - // This list is an index into struct stap_perf_probe, - long perf_counters_dim; -@@ -174,16 +175,19 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) - - // First find the related process, set by stapiu_change_plus. - // NB: This is a linear search performed for every probe hit! -- // This could be an algorithmic problem if the list gets large, but -- // we'll wait until this is demonstratedly a hotspot before optimizing. -- mutex_lock(&c->consumer_lock); -+ // This could be an algorithmic problem if the list gets large, -+ // but we'll wait until this is demonstratedly a hotspot before -+ // optimizing. NB: on rhel7 sometimes we're invoked from atomic -+ // context, so must be careful to use the spinlock, not the -+ // mutex. -+ spin_lock(&c->process_list_lock); - list_for_each_entry(p, &c->process_list_head, process_list) { - if (p->tgid == current->tgid) { - process = p; - break; - } - } -- mutex_unlock(&c->consumer_lock); -+ spin_unlock(&c->process_list_lock); - if (!process) { - #ifdef UPROBE_HANDLER_REMOVE - /* Once we're past the starting phase, we can be sure that any -@@ -344,7 +348,7 @@ static void - stapiu_decrement_semaphores(struct stapiu_consumer *consumers, size_t nconsumers) - { - size_t i; -- /* NB: no stapiu_process_slots_lock needed, as the task_finder engine is -+ /* NB: no process_list_lock use needed as the task_finder engine is - * already stopped by now, so no one else will mess with us. We need - * to be sleepable for access_process_vm. */ - for (i = 0; i < nconsumers; ++i) { -@@ -433,7 +437,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) - INIT_LIST_HEAD(&c->instance_list_head); - INIT_LIST_HEAD(&c->process_list_head); - mutex_init(&c->consumer_lock); -- -+ spin_lock_init(&c->process_list_lock); -+ - dbug_uprobes("registering task-finder for procname:%s buildid:%s\n", - ((char*)c->finder.procname ?: (char*)""), - ((char*)c->finder.build_id ?: (char*)"")); -@@ -560,7 +565,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - * calls us in this case with relocation=offset=0, so - * we don't have to worry about it. */ - p->base = relocation - offset; -+ spin_lock (&c->process_list_lock); - list_add(&p->process_list, &c->process_list_head); -+ spin_unlock (&c->process_list_lock); - - rc = 0; - mutex_unlock(&c->consumer_lock); -@@ -587,28 +594,40 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - { - int rc = 0; - struct stapiu_process *p; -+ int any_found; - - if (! c->sdt_sem_offset) // nothing to do - return 0; - -- /* NB: no lock after this point, as we need to be sleepable for -- * get/put_user semaphore action. The given process should be frozen -- * while we're busy, so it's not an issue. -- */ -- -- mutex_lock(&c->consumer_lock); -- -+ // NB: we mustn't hold a lock while changing the task memory, -+ // but we need a lock to protect the process_list from concurrent -+ // add/delete. So hold a spinlock during iteration until the first -+ // hit, then unlock & process. NB: We could in principle have multiple -+ // instances of the same process in the list (e.g., if the process -+ // somehow maps in the same solib multiple times). We can't easily -+ // both iterate this list (in a spinlock-protected safe way), and -+ // relax the spinlock enough to do a safe stapiu_write_task_semaphore() -+ // call within the loop. So we will hit only the copy in our list. -+ any_found = 0; -+ spin_lock(&c->process_list_lock); - /* Look through all the consumer's processes and increment semaphores. */ - list_for_each_entry(p, &c->process_list_head, process_list) { - unsigned long addr = p->base + c->sdt_sem_offset; - if (addr >= relocation && addr < relocation + length) { -- int rc2 = stapiu_write_task_semaphore(task, addr, +1); -+ int rc2; -+ // unlock list and process write for this entry -+ spin_unlock(&c->process_list_lock); -+ any_found=1; -+ rc2 = stapiu_write_task_semaphore(task, addr, +1); - if (!rc) -- rc = rc2; -+ rc = rc2; -+ break; // exit list_for_each loop - } - } -- -- mutex_unlock(&c->consumer_lock); -+ if (! any_found) -+ spin_unlock(&c->process_list_lock); -+ else -+ ; // already unlocked - - return rc; - } -@@ -635,8 +654,9 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, - // process is dying anyway - // - the stapiu_consumer's process_list linked list will have a record - // of the dead process: well, not great, it'll be cleaned up eventually, -- // and cleaning it up NOW is tricky - need some spin lock to protect the list, -- // but not out sleepy mutex: -+ // and cleaning it up NOW is tricky - we could use the process_list_lock -+ // to protect the list (as done in stapiu_change_semaphore_plus), -+ // but not our sleepy mutex: - // - // [ 1955.410237] ? stapiu_change_minus+0x38/0xf0 [stap_54a723c01c50d972590a5c901516849_15522] - // [ 1955.411583] __mutex_lock+0x35/0x820 - -commit 4ccdfe4536d702612912e96d7b6278b169917eaa -Author: Frank Ch. Eigler -Date: Mon Jul 6 13:27:46 2020 -0400 - - RHBZ1847676 cont'd: more uprobes-inode/onthefly concurrency controls - - The systemtap.onthefly/*.exp tests had recently become hang-prone on - some kernels, for reasons still not completely understood. This set - of patches adds: - - - irq*-block spinlocks into uprobes-invoked paths, in case there is - peculiar reentrancy (from irq-related tracepoints) - - - a mutex lock/unlock into the stapiu_exit() path, in case there is - a concurrent stapiu_refresh() invoked by onthefly machinery around - exit time - - - restrictions into the onthefly module_refresh() translator code to - preclude STAP_SESSION_STOPPING as a time to do any sort of refresh - operation. Now probes that were disarmed will stay disarmed during - probe-end/error/etc. processing, which is always valid with the - spec, and avoids a class of late module-refresh ops - - Testing on rhel7 and rawhide indicates the reproducible hang is gone. - Our testsuite already tortures this code; invoke by hand via: - - % sudo make installcheck RUNTESTFLAGS="-v affection.exp hrtimer_onthefly.exp kprobes_onthefly.exp tracepoint_onthefly.exp uprobes_onthefly.exp" - -diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c -index 922c9f1..3de7281 100644 ---- a/runtime/linux/uprobes-inode.c -+++ b/runtime/linux/uprobes-inode.c -@@ -172,6 +172,7 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) - if (_stp_target) // need we filter by pid at all? - { - struct stapiu_process *p, *process = NULL; -+ unsigned long flags; - - // First find the related process, set by stapiu_change_plus. - // NB: This is a linear search performed for every probe hit! -@@ -180,14 +181,14 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) - // optimizing. NB: on rhel7 sometimes we're invoked from atomic - // context, so must be careful to use the spinlock, not the - // mutex. -- spin_lock(&c->process_list_lock); -+ spin_lock_irqsave(&c->process_list_lock, flags); - list_for_each_entry(p, &c->process_list_head, process_list) { - if (p->tgid == current->tgid) { - process = p; - break; - } - } -- spin_unlock(&c->process_list_lock); -+ spin_unlock_irqrestore(&c->process_list_lock, flags); - if (!process) { - #ifdef UPROBE_HANDLER_REMOVE - /* Once we're past the starting phase, we can be sure that any -@@ -398,7 +399,7 @@ static void - stapiu_consumer_refresh(struct stapiu_consumer *c) - { - struct stapiu_instance *inst; -- -+ - mutex_lock(& c->consumer_lock); - - list_for_each_entry(inst, &c->instance_list_head, instance_list) { -@@ -420,7 +421,10 @@ stapiu_exit(struct stapiu_consumer *consumers, size_t nconsumers) - stapiu_decrement_semaphores(consumers, nconsumers); - for (i = 0; i < nconsumers; ++i) { - struct stapiu_consumer *c = &consumers[i]; -+ // protect against conceivable stapiu_refresh() at same time -+ mutex_lock(& c->consumer_lock); - stapiu_consumer_unreg(c); -+ mutex_unlock(& c->consumer_lock); - /* NB: task_finder needs no unregister. */ - } - } -@@ -480,6 +484,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - struct stapiu_instance *inst = NULL; - struct stapiu_process *p; - int j; -+ unsigned long flags; - - if (! inode) { - rc = -EINVAL; -@@ -565,9 +570,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - * calls us in this case with relocation=offset=0, so - * we don't have to worry about it. */ - p->base = relocation - offset; -- spin_lock (&c->process_list_lock); -+ spin_lock_irqsave (&c->process_list_lock, flags); - list_add(&p->process_list, &c->process_list_head); -- spin_unlock (&c->process_list_lock); -+ spin_unlock_irqrestore (&c->process_list_lock, flags); - - rc = 0; - mutex_unlock(&c->consumer_lock); -@@ -595,6 +600,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - int rc = 0; - struct stapiu_process *p; - int any_found; -+ unsigned long flags; - - if (! c->sdt_sem_offset) // nothing to do - return 0; -@@ -609,14 +615,14 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - // relax the spinlock enough to do a safe stapiu_write_task_semaphore() - // call within the loop. So we will hit only the copy in our list. - any_found = 0; -- spin_lock(&c->process_list_lock); -+ spin_lock_irqsave(&c->process_list_lock, flags); - /* Look through all the consumer's processes and increment semaphores. */ - list_for_each_entry(p, &c->process_list_head, process_list) { - unsigned long addr = p->base + c->sdt_sem_offset; - if (addr >= relocation && addr < relocation + length) { - int rc2; - // unlock list and process write for this entry -- spin_unlock(&c->process_list_lock); -+ spin_unlock_irqrestore(&c->process_list_lock, flags); - any_found=1; - rc2 = stapiu_write_task_semaphore(task, addr, +1); - if (!rc) -@@ -625,7 +631,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - } - } - if (! any_found) -- spin_unlock(&c->process_list_lock); -+ spin_unlock_irqrestore(&c->process_list_lock, flags); - else - ; // already unlocked - -diff --git a/translate.cxx b/translate.cxx -index 10b3d32..b10af5a 100644 ---- a/translate.cxx -+++ b/translate.cxx -@@ -2144,19 +2144,13 @@ c_unparser::emit_module_refresh () - o->newline() << "mutex_lock(&module_refresh_mutex);"; - - /* If we're not in STARTING/RUNNING state, don't try doing any work. -- PR16766 */ -+ PR16766. We don't want to run refresh ops during e.g. STOPPING, -+ so as to possibly activate uprobes near shutdown. */ - o->newline() << "state = atomic_read (session_state());"; -- o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING && state != STAP_SESSION_ERROR) {"; -- // cannot _stp_warn etc. since we're not in probe context -- o->newline(1) << "#if defined(__KERNEL__)"; -- o->newline() << "if (state != STAP_SESSION_STOPPING)"; -- o->newline(1) << "printk (KERN_ERR \"stap module notifier triggered in unexpected state %d\\n\", state);"; -- o->indent(-1); -- o->newline() << "#endif"; -- -+ o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING) {"; -+ o->newline(1); - if (!session->runtime_usermode_p()) - o->newline() << "mutex_unlock(&module_refresh_mutex);"; -- - o->newline() << "return;"; - o->newline(-1) << "}"; - - -commit 046fa017d2ab7fea1a4ba2295c31f768c072855e -Author: Frank Ch. Eigler -Date: Sun Jul 12 09:57:15 2020 -0400 - - RHBZ1847676 cont'd: one more uprobes-inode/onthefly concurrency control - - In uprobes-inode.c (stapiu_change_plus), the runtime can react to - arrivals of new mappings of a solib or executable by registering new - uprobes. Due to an assumption that this could not happen at - inconvenient times (such as a stapiu_refresh or near shutdown times), - the actual uprobes registration operation was done outside the - consumer_lock mutex being held. But it appears this can happen at bad - times, so the mutex needs to be held, just like within - stapiu_consumer_refresh(). - - The onthefly tests now survive iterating testing on rawhide+lockdep - and rhel7+lockdep. - -diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c -index 3de7281..01c8a07 100644 ---- a/runtime/linux/uprobes-inode.c -+++ b/runtime/linux/uprobes-inode.c -@@ -575,12 +575,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - spin_unlock_irqrestore (&c->process_list_lock, flags); - - rc = 0; -- mutex_unlock(&c->consumer_lock); -- - // Register actual uprobe if cond_enabled right now - if (c->probe->cond_enabled) - (void) stapiu_register(inst, c); -- goto out; -+ goto out1; - - out2: - _stp_kfree(inst); - -commit a9a0131eb59e8abc197d3d2a553a86bcdec3dd70 -Author: Frank Ch. Eigler -Date: Fri Jul 17 22:33:04 2020 -0400 - - rhbz1857749: uprobes-inode regression in sdt semaphore setting - - Previous code neglected to set sdt.h semaphores for more than the - first process systemtap happened to encounter. This was from a - mistaken understanding of what it meant for stapiu_change_plus() to be - called with the same inode/consumer combination. Even though uprobes - are automatically shared, each new process still needs its perfctr and - sdt-semaphores individually set, so we do that now (as before the - rework of this code). Mechanized testing incoming shortly. - -diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c -index 01c8a07..de81839 100644 ---- a/runtime/linux/uprobes-inode.c -+++ b/runtime/linux/uprobes-inode.c -@@ -190,6 +190,10 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs) - } - spin_unlock_irqrestore(&c->process_list_lock, flags); - if (!process) { -+ /* We know that we're in -c/-x mode, but this process is not -+ in the process hierarchy, so the uprobe should be ignored -+ and future hits prevented. PR15278 -+ */ - #ifdef UPROBE_HANDLER_REMOVE - /* Once we're past the starting phase, we can be sure that any - * processes which are executing code in a mapping have already -@@ -242,8 +246,8 @@ stapiu_register (struct stapiu_instance* inst, struct stapiu_consumer* c) - (unsigned long) inst->inode->i_ino, - (void*) (uintptr_t) c->offset, - c->probe->index, -- ((char*)c->finder.procname ?: (char*)""), -- ((char*)c->finder.build_id ?: (char*)"")); -+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), -+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); - - if (!c->return_p) { - inst->kconsumer.handler = stapiu_probe_prehandler; -@@ -444,8 +448,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) - spin_lock_init(&c->process_list_lock); - - dbug_uprobes("registering task-finder for procname:%s buildid:%s\n", -- ((char*)c->finder.procname ?: (char*)""), -- ((char*)c->finder.build_id ?: (char*)"")); -+ ((char*)c->finder.procname ?: ""), -+ ((char*)c->finder.build_id ?: "")); - - ret = stap_register_task_finder_target(&c->finder); - if (ret != 0) { -@@ -499,22 +503,22 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - if (rc) - goto out; - -- dbug_uprobes("notified for inode-offset u%sprobe " -+ dbug_uprobes("notified for inode-offset arrival u%sprobe " - "%lu:%p pidx %zu target procname:%s buildid:%s\n", - c->return_p ? "ret" : "", - (unsigned long) inode->i_ino, - (void*) (uintptr_t) c->offset, - c->probe->index, -- ((char*)c->finder.procname ?: (char*)""), -- ((char*)c->finder.build_id ?: (char*)"")); -+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), -+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); - - /* Check the buildid of the target (if we haven't already). We - * lock the target so we don't have concurrency issues. */ - mutex_lock(&c->consumer_lock); - -- // Check if we already have an instance for this inode, as though we -- // were called twice by task-finder mishap, or (hypothetically) the -- // shlib was mmapped twice. -+ // Check if we already have an instance for this inode. This is normal: -+ // if a different process maps the same solib, or forks into the same -+ // executable. In this case, we must not re-register the same uprobe. - list_for_each_entry(i, &c->instance_list_head, instance_list) { - if (i->inode == inode) { - inst = i; -@@ -522,28 +526,33 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - } - } - -- if (inst) { // wouldn't expect a re-notification -- if (inst->registered_p != c->probe->cond_enabled) -- // ... this should not happen -- ; -- goto out1; -- } -- -- // Normal case: need a new one. -- inst = _stp_kzalloc(sizeof(struct stapiu_instance)); -- if (! inst) { -- rc = -ENOMEM; -- goto out1; -- } -+ if (!inst) { // new instance; need new uprobe etc. -+ // Normal case: need a new one. -+ inst = _stp_kzalloc(sizeof(struct stapiu_instance)); -+ if (! inst) { -+ rc = -ENOMEM; -+ goto out1; -+ } - -- inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback -+ inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback -+ -+ /* Grab the inode first (to prevent TOCTTOU problems). */ -+ inst->inode = igrab(inode); -+ if (!inst->inode) { -+ rc = -EINVAL; -+ goto out2; -+ } -+ -+ // Add the inode/instance to the list -+ list_add(&inst->instance_list, &c->instance_list_head); - -- /* Grab the inode first (to prevent TOCTTOU problems). */ -- inst->inode = igrab(inode); -- if (!inst->inode) { -- rc = -EINVAL; -- goto out2; -+ // Register the actual uprobe if cond_enabled already -+ if (c->probe->cond_enabled) -+ (void) stapiu_register(inst, c); - } -+ -+ // ... but we may have to do per-process work anyway: perfctr -+ // initialization and sdt.h semaphore manipulation! - - // Perform perfctr registration if required - for (j=0; j < c->perf_counters_dim; j++) { -@@ -551,12 +560,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - (void) _stp_perf_read_init ((c->perf_counters)[j], task); - } - -- // Add the inode/instance to the list -- list_add(&inst->instance_list, &c->instance_list_head); -- - // Associate this consumer with this process. If we encounter - // resource problems here, we don't really have to undo the uprobe -- // registrations etc. already in effect. -+ // registrations etc. already in effect. It may break correct -+ // tracking of process hierarchy in -c/-x operation, but too bad. - p = _stp_kzalloc(sizeof(struct stapiu_process)); - if (! p) { - rc = -ENOMEM; -@@ -573,11 +580,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - spin_lock_irqsave (&c->process_list_lock, flags); - list_add(&p->process_list, &c->process_list_head); - spin_unlock_irqrestore (&c->process_list_lock, flags); -- -+ // NB: actual semaphore value bumping is done later -+ - rc = 0; - // Register actual uprobe if cond_enabled right now -- if (c->probe->cond_enabled) -- (void) stapiu_register(inst, c); - goto out1; - - out2: -@@ -617,11 +623,21 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - /* Look through all the consumer's processes and increment semaphores. */ - list_for_each_entry(p, &c->process_list_head, process_list) { - unsigned long addr = p->base + c->sdt_sem_offset; -+ if (p->tgid != task->tgid) // skip other processes in the list -+ continue; - if (addr >= relocation && addr < relocation + length) { - int rc2; - // unlock list and process write for this entry - spin_unlock_irqrestore(&c->process_list_lock, flags); - any_found=1; -+ -+ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " -+ "pidx %zu address %lx\n", -+ c->return_p ? "ret" : "", -+ (long) task->tgid, -+ c->probe->index, -+ (unsigned long) addr); -+ - rc2 = stapiu_write_task_semaphore(task, addr, +1); - if (!rc) - rc = rc2; -@@ -641,15 +657,8 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - * about the semaphores, so we can just release the process slot. */ - static int - stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, -- unsigned long relocation, unsigned long length) -+ unsigned long addr, unsigned long length) - { -- dbug_uprobes("notified for inode-offset departure u%sprobe " -- "pidx %zu target procname:%s buildid:%s\n", -- c->return_p ? "ret" : "", -- c->probe->index, -- ((char*)c->finder.procname ?: (char*)""), -- ((char*)c->finder.build_id ?: (char*)"")); -- - // We don't need do anything really. - // A process going away means: - // - its uprobes will no longer fire: no problem, the uprobe inode -@@ -674,6 +683,36 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task, - // [ 1955.436334] ? __x64_sys_execve+0x27/0x30 - // [ 1955.437700] ? do_syscall_64+0x5c/0xa0 - -+ // But as an optimization - to avoid having them build up indefinitely, -+ // and make semaphore operations go slowly, we will nuke matching entries anyway. -+ unsigned long flags; -+ struct stapiu_process *p, *tmp; -+ unsigned nmatch=0; -+ -+ spin_lock_irqsave(&c->process_list_lock, flags); -+ list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) { -+ // we nuke by matching semaphore address (where ..._semaphore_plus wrote) -+ // against the address range being unmapped -+ unsigned long semaddr = p->base + c->sdt_sem_offset; -+ if (p->tgid != task->tgid) // skip other processes in the list -+ continue; -+ if (semaddr >= addr && semaddr < addr + length) { -+ list_del(&p->process_list); -+ _stp_kfree (p); -+ nmatch ++; -+ } -+ } -+ spin_unlock_irqrestore(&c->process_list_lock, flags); -+ -+ if (nmatch > 0) -+ dbug_uprobes("notified for inode-offset departure u%sprobe " -+ "pidx %zu matches:%u procname:%s buildid:%s\n", -+ c->return_p ? "ret" : "", -+ c->probe->index, -+ nmatch, -+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")), -+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: ""))); -+ - return 0; - } - - -commit e90530877ee21cffa2a9d53567ba5b5de1dd9b32 -Author: Frank Ch. Eigler -Date: Mon Jul 27 07:58:30 2020 -0400 - - PR25568 / RHBZ1857749: buildid/uprobes/inode rework, task_finder etc. side - - During work on a new stress tests for build-id based probes (coming in - next commit), it was found that the task_finder2 logic for buildid - verification didn't, well, work, because it was never run (due to an - erroneous pathlen conditional), and couldn't be safely run where it - was (because it was under spinlock but would have done - access_process_vm). Reworked the relevant bits of task_finder2 to - perform build-id verification for processes later - during the quiesce - callback periods. (Buildid verification for solibs is already done - in the task_finder2 consumer uprobes-inode.c.) - - Testing with sdt_misc indicated a case where a preexisting process - (with solib sdt.h semaphores) was being attached to by a new stap - binary. task_finder2's enumeration of the preexising processes' - memory map segments violated assumptions by recent code related to - tracking in stapiu_process[] lists. (It did not mirror the temporal - ld.so mmap sequence.) Changed this tracking to use the inode* as the - key, and stop trying to track mapping lengths, to make positive - matches and eliminate duplicate stapiu_process[] entries for the same - (process,solib) permutation. Reworked stapiu_process[] accumulation - generally to move to the two immediate task_finder callbacks, out of - stapiu_change_plus(). - - Added lots of commentary and diagnostics throughout. stap - -DDEBUG_UPROBES give meaningful info about uprobes & sdt semaphores; - with -DDEBUG_TASK_FINDER, more but not overwhelming relevant info - appears. - -diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c -index 9777efb..8b8057a 100644 ---- a/runtime/linux/task_finder2.c -+++ b/runtime/linux/task_finder2.c -@@ -652,8 +652,8 @@ __verify_build_id(struct task_struct *tsk, unsigned long addr, - tsk_build_id[build_id_len] = '\0'; - - if (strcmp(build_id, tsk_build_id)) { -- dbug_task(2, "target build-id not matched: [%s] != [%s]\n", -- build_id, tsk_build_id); -+ dbug_task(2, "target build-id not matched: [%s] @ 0x%lx != [%s]\n", -+ build_id, addr, tsk_build_id); - return false; - } - -@@ -884,16 +884,9 @@ __stp_utrace_attach_match_filename(struct task_struct *tsk, - // procname/build-id and match an "all thread" probe. - if (tgt == NULL) - continue; -- /* buildid-based target */ -- else if (tgt->build_id_len > 0 && tgt->procname > 0 -- && !__verify_build_id(tsk, -- tgt->build_id_vaddr, -- tgt->build_id, -- tgt->build_id_len)) -- { -- continue; -- } -- else if (tgt->build_id_len == 0 && tgt->pathlen > 0 -+ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */ -+ /* procname-based target */ -+ else if (tgt->pathlen > 0 - && (tgt->pathlen != filelen - || strcmp(tgt->procname, filename) != 0)) - { -@@ -1341,6 +1334,34 @@ __stp_tf_quiesce_worker(struct task_work *work) - return; - } - -+ /* If we had a build-id based executable probe (so we have a -+ * tgt->build_id) set, we could not check it back in -+ * __stp_utrace_attach_* because we can't do sleepy -+ * access_process_vm() calls from there. BUt now that we're -+ * in process context, quiesced, finally we can check. If we -+ * were build-id based, and the build-id does not match, then -+ * we UTRACE_DETACH from this process and skip the callbacks. -+ * -+ * XXX: For processes that do match, we redo this check every -+ * time this callbacks is encountered somehow. That's -+ * probably unnecessary. -+ */ -+ if (tgt->build_id_len > 0) { -+ int ok = __verify_build_id(current, -+ tgt->build_id_vaddr, -+ tgt->build_id, -+ tgt->build_id_len); -+ -+ dbug_task(2, "verified buildid-target process pid=%ld ok=%d\n", -+ (long) current->tgid, ok); -+ if (!ok) { -+ // stap_utrace_detach (current, & tgt->ops); -+ /* Remember that this task_work_func is finished. */ -+ stp_task_work_func_done(); -+ return; -+ } -+ } -+ - __stp_tf_handler_start(); - - /* NB make sure we run mmap callbacks before other callbacks -@@ -1434,6 +1455,21 @@ __stp_utrace_task_finder_target_quiesce(u32 action, - } - } - else { -+ /* Like in __stp_tf_quiesce_worker(), verify build-id now if belated. */ -+ if (tgt->build_id_len > 0) { -+ int ok = __verify_build_id(current, -+ tgt->build_id_vaddr, -+ tgt->build_id, -+ tgt->build_id_len); -+ -+ dbug_task(2, "verified2 buildid-target process pid=%ld ok=%d\n", -+ (long) current->tgid, ok); -+ if (!ok) { -+ __stp_tf_handler_end(); -+ return UTRACE_RESUME; // NB: not _DETACH; that interferes with other engines -+ } -+ } -+ - /* NB make sure we run mmap callbacks before other callbacks - * like 'probe process.begin' handlers so that the vma tracker - * is already initialized in the latter contexts */ -@@ -1797,15 +1833,7 @@ stap_start_task_finder(void) - struct stap_task_finder_target, list); - if (tgt == NULL) - continue; -- /* buildid-based target */ -- else if (tgt->build_id_len > 0 && tgt->procname > 0 -- && !__verify_build_id(tsk, -- tgt->build_id_vaddr, -- tgt->build_id, -- tgt->build_id_len)) -- { -- continue; -- } -+ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */ - /* procname-based target */ - else if (tgt->build_id == 0 && tgt->pathlen > 0 - && (tgt->pathlen != mmpathlen -diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c -index de81839..757da30 100644 ---- a/runtime/linux/uprobes-inode.c -+++ b/runtime/linux/uprobes-inode.c -@@ -76,7 +76,7 @@ struct stapiu_instance { - struct list_head instance_list; // to find other instances e.g. during shutdown - - struct uprobe_consumer kconsumer; // the kernel-side struct for uprobe callbacks etc. -- struct inode *inode; // XXX: refcount? -+ struct inode *inode; // refcounted - unsigned registered_p:1; // whether the this kconsumer is registered (= armed, live) - - struct stapiu_consumer *sconsumer; // whose instance are we -@@ -86,10 +86,14 @@ struct stapiu_instance { - /* A snippet to record the per-process vm where a particular - executable/solib was mapped. Used for sdt semaphore setting, and - for identifying processes of our interest (vs. disinterest) for -- uprobe hits. This object is owned by a stapiu_consumer. */ -+ uprobe hits. This object is owned by a stapiu_consumer. We use -+ the same inode* as the stapiu_instance, and have the same lifespan, -+ so don't bother separately refcount it. -+*/ - struct stapiu_process { - struct list_head process_list; // to find other processes - -+ struct inode *inode; // the inode* for solib or executable - unsigned long relocation; // the mmap'ed .text address - unsigned long base; // the address to apply sdt offsets against - pid_t tgid; // pid -@@ -392,6 +396,7 @@ stapiu_consumer_unreg(struct stapiu_consumer *c) - // multiple times in the list. Don't break after the first. - list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) { - list_del(&p->process_list); -+ // no refcount used for the inode field - _stp_kfree (p); - } - } -@@ -498,6 +503,8 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - /* Do the buildid check. NB: on F29+, offset may not equal - 0 for LOADable "R E" segments, because the read-only .note.* - stuff may have been loaded earlier, separately. PR23890. */ -+ // NB: this is not really necessary for buildid-based probes, -+ // which had this verified already. - rc = _stp_usermodule_check(task, c->module_name, - relocation - offset); - if (rc) -@@ -527,7 +534,6 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - } - - if (!inst) { // new instance; need new uprobe etc. -- // Normal case: need a new one. - inst = _stp_kzalloc(sizeof(struct stapiu_instance)); - if (! inst) { - rc = -ENOMEM; -@@ -560,30 +566,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - (void) _stp_perf_read_init ((c->perf_counters)[j], task); - } - -- // Associate this consumer with this process. If we encounter -- // resource problems here, we don't really have to undo the uprobe -- // registrations etc. already in effect. It may break correct -- // tracking of process hierarchy in -c/-x operation, but too bad. -- p = _stp_kzalloc(sizeof(struct stapiu_process)); -- if (! p) { -- rc = -ENOMEM; -- goto out1; -- } -- p->tgid = task->tgid; -- p->relocation = relocation; -- /* The base is used for relocating semaphores. If the -- * probe is in an ET_EXEC binary, then that offset -- * already is a real address. But stapiu_process_found -- * calls us in this case with relocation=offset=0, so -- * we don't have to worry about it. */ -- p->base = relocation - offset; -- spin_lock_irqsave (&c->process_list_lock, flags); -- list_add(&p->process_list, &c->process_list_head); -- spin_unlock_irqrestore (&c->process_list_lock, flags); -- // NB: actual semaphore value bumping is done later -+ // NB: process_list[] already extended up in stapiu_mmap_found(). - - rc = 0; -- // Register actual uprobe if cond_enabled right now - goto out1; - - out2: -@@ -599,7 +584,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task, - * Increment the semaphore now. */ - static int - stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task, -- unsigned long relocation, unsigned long length) -+ unsigned long relocation, struct inode* inode) - { - int rc = 0; - struct stapiu_process *p; -@@ -609,6 +594,13 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - if (! c->sdt_sem_offset) // nothing to do - return 0; - -+ dbug_uprobes("considering semaphore (u%sprobe) pid %ld inode 0x%lx" -+ "pidx %zu\n", -+ c->return_p ? "ret" : "", -+ (long) task->tgid, -+ (unsigned long) inode, -+ c->probe->index); -+ - // NB: we mustn't hold a lock while changing the task memory, - // but we need a lock to protect the process_list from concurrent - // add/delete. So hold a spinlock during iteration until the first -@@ -617,32 +609,31 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task - // somehow maps in the same solib multiple times). We can't easily - // both iterate this list (in a spinlock-protected safe way), and - // relax the spinlock enough to do a safe stapiu_write_task_semaphore() -- // call within the loop. So we will hit only the copy in our list. -+ // call within the loop. So we will hit only the first copy in our list. - any_found = 0; - spin_lock_irqsave(&c->process_list_lock, flags); - /* Look through all the consumer's processes and increment semaphores. */ - list_for_each_entry(p, &c->process_list_head, process_list) { - unsigned long addr = p->base + c->sdt_sem_offset; -- if (p->tgid != task->tgid) // skip other processes in the list -- continue; -- if (addr >= relocation && addr < relocation + length) { -- int rc2; -- // unlock list and process write for this entry -- spin_unlock_irqrestore(&c->process_list_lock, flags); -- any_found=1; -- -- dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " -- "pidx %zu address %lx\n", -- c->return_p ? "ret" : "", -- (long) task->tgid, -- c->probe->index, -- (unsigned long) addr); -- -- rc2 = stapiu_write_task_semaphore(task, addr, +1); -- if (!rc) -- rc = rc2; -- break; // exit list_for_each loop -- } -+ int rc2; -+ if (p->tgid != task->tgid) continue; // skip other processes in the list -+ if (p->inode != inode) continue; // skip other inodes -+ -+ // unlock list and process write for this entry -+ spin_unlock_irqrestore(&c->process_list_lock, flags); -+ any_found=1; -+ -+ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld " -+ "pidx %zu address 0x%lx\n", -+ c->return_p ? "ret" : "", -+ (long) task->tgid, -+ c->probe->index, -+ (unsigned long) addr); -+ -+ rc2 = stapiu_write_task_semaphore(task, addr, +1); -+ if (!rc) -+ rc = rc2; -+ break; // exit list_for_each loop - } - if (! any_found) - spin_unlock_irqrestore(&c->process_list_lock, flags); -@@ -755,17 +746,41 @@ stapiu_process_found(struct stap_task_finder_target *tf_target, - - if (!process_p) - return 0; /* ignore threads */ -- -+ -+ dbug_uprobes("process_found pid=%ld f.p=%s f.b=%s c.p=%s c.b=%s\n", -+ (long)task->tgid, -+ ((char*)c->finder.procname ?: ""), -+ ((char*)c->finder.build_id ?: ""), -+ ((char*)c->solib_pathname ?: ""), -+ ((char*)c->solib_build_id ?: "")); -+ - /* ET_EXEC events are like shlib events, but with 0 relocation bases */ - if (register_p) { - int rc = -EINVAL; - struct inode *inode = stapiu_get_task_inode(task); - - if (inode) { -- rc = stapiu_change_plus(c, task, 0, TASK_SIZE, -- 0, 0, inode); -- stapiu_change_semaphore_plus(c, task, 0, -- TASK_SIZE); -+ // Add a stapiu_process record to the consumer, so that -+ // the semaphore increment logic will accept this task. -+ struct stapiu_process* p; -+ unsigned long flags; -+ p = _stp_kzalloc(sizeof(struct stapiu_process)); -+ if (p) { -+ p->tgid = task->tgid; -+ p->relocation = 0; -+ p->inode = inode; -+ p->base = 0; -+ spin_lock_irqsave (&c->process_list_lock, flags); -+ list_add(&p->process_list, &c->process_list_head); -+ spin_unlock_irqrestore (&c->process_list_lock, flags); -+ } else { -+ _stp_warn("out of memory tracking executable in process %ld\n", -+ (long) task->tgid); -+ } -+ -+ rc = stapiu_change_plus(c, task, 0, TASK_SIZE, 0, 0, inode); -+ -+ stapiu_change_semaphore_plus(c, task, 0, inode); - } - return rc; - } else -@@ -776,6 +791,8 @@ stapiu_process_found(struct stap_task_finder_target *tf_target, - bool - __verify_build_id (struct task_struct *tsk, unsigned long addr, - unsigned const char *build_id, int build_id_len); -+// defined in task_finder2.c -+ - - - /* The task_finder_mmap_callback. These callbacks are NOT -@@ -791,28 +808,119 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target, - struct stapiu_consumer *c = - container_of(tf_target, struct stapiu_consumer, finder); - int rc = 0; -+ struct stapiu_process* p; -+ int known_mapping_p; -+ unsigned long flags; - -- /* The file path or build-id must match. The build-id address -- * is calculated using start address of this vma, the file -- * offset of the vma start address and the file offset of -- * the build-id. */ -- if (c->solib_pathname && path && strcmp (path, c->solib_pathname)) -- return 0; -- if (c->solib_build_id_len > 0 && !__verify_build_id(task, -- addr - offset + c->solib_build_id_vaddr, -- c->solib_build_id, -- c->solib_build_id_len)) -- return 0; -+ /* -+ We need to verify that this file/mmap corresponds to the given stapiu_consumer. -+ One could compare (inode) file name, but that won't work with buildid-based -+ uprobes. For those, one cannot just -+ -+ __verify_build_id(... addr - offset + c->solib_build_id_vaddr ...) -+ -+ because dlopen()ing a shared library involves multiple mmaps, including -+ some at repeating/offset addresses. See glibc _dl_map_segments() in various -+ versions. So by the fourth call (!) on modern glibc's, we get a VM_WRITE-able -+ data segment mapped, but that's at a load/mapping address that is offset by a -+ page from the base (file offset=0) mapping. -+ -+ e.g. on Fedora 32 / glibc 2.31, with testsuite/libsdt_buildid.so: -+ -+ Program Headers: -+ Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -+ LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0004b8 0x0004b8 R 0x1000 -+ LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000161 0x000161 R E 0x1000 -+ LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x0000cc 0x0000cc R 0x1000 -+ LOAD 0x002df8 0x0000000000003df8 0x0000000000003df8 0x000232 0x000238 RW 0x1000 -+ DYNAMIC 0x002e10 0x0000000000003e10 0x0000000000003e10 0x0001d0 0x0001d0 RW 0x8 -+ -+ strace: -+ openat(AT_FDCWD, ".../libsdt_buildid.so", O_RDONLY|O_CLOEXEC) = 3 -+ mmap(NULL, 16432, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x148c764ac000 -+ mmap(0x148c764ad000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x148c764ad000 -+ mmap(0x148c764ae000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764ae000 -+ mmap(0x148c764af000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764af000 -+ -+ Note how the virtual mapping for the fourth mmap (also) maps file-offset 0x2000 at -+ vm offset 0x3000. -+ -+ So what we do is rely on the name/buildid validation tests being run -+ -earlier- in the dlopen/mmap sequence to validate near-future -+ mmap()s. We search the c->process_list[] for a mapping that already -+ overlaps the new range, and if so, consider it validated ... whether -+ for the solib_pathname or the solib_build_id case. -+ -+ This is complicated for startup-time traversal of processes/mmaps, -+ where it seems sometimes we get notifications out of temporal sequence. -+ */ - -- /* 1 - shared libraries' executable segments load from offset 0 -- * - ld.so convention offset != 0 is now allowed -- * so stap_uprobe_change_plus can set a semaphore, -- * i.e. a static extern, in a shared object -- * 2 - the shared library we're interested in -- * 3 - mapping should be executable or writeable (for -- * semaphore in .so) -- * NB: or both, on kernels that lack noexec mapping -- */ -+ known_mapping_p = 0; -+ spin_lock_irqsave(&c->process_list_lock, flags); -+ list_for_each_entry(p, &c->process_list_head, process_list) { -+ if (p->tgid != task->tgid) continue; -+ if (p->inode != dentry->d_inode) continue; -+ known_mapping_p = 1; -+ break; -+ } -+ spin_unlock_irqrestore(&c->process_list_lock, flags); -+ -+ -+ // Check if this mapping (solib) is of interest: whether we expect -+ // it by buildid or name. -+ -+ if (! known_mapping_p) { -+ /* The file path or build-id must match. The build-id address -+ * is calculated using start address of this vma, the file -+ * offset of the vma start address and the file offset of -+ * the build-id. */ -+ if (c->solib_pathname && path && strcmp (path, c->solib_pathname)) -+ return 0; -+ if (c->solib_build_id_len > 0 && !__verify_build_id(task, -+ addr - offset + c->solib_build_id_vaddr, -+ c->solib_build_id, -+ c->solib_build_id_len)) -+ return 0; -+ } -+ -+ // If we made it this far, we have an interesting solib. -+ -+ dbug_uprobes("mmap_found pid=%ld path=%s addr=0x%lx length=%lu offset=%lu flags=0x%lx known=%d\n", -+ (long) task->tgid, path, addr, length, offset, vm_flags, known_mapping_p); -+ -+ if (! known_mapping_p) { -+ // OK, let's add it. The first mapping should be a VM_READ mapping -+ // of the entire solib file, which will also serve as the apprx. -+ // outer bounds of the repeatedly-mapped segments. -+ -+#if 0 -+ // Consider an assumption about the dlopen/mmap sequence -+ // If it comes out of sequence, we could get length/base wrong in the stored -+ // stapiu_process, which could lead us to miscalculate semaphore addresses. -+ // -+ // However, this has been observed on task-finder initial-enumeration case, -+ // (sdt_misc.exp, where a solib test is already running when stap starts). -+ if (offset != 0) -+ return 0; -+#endif -+ -+ // Associate this consumer with this process. If we encounter -+ // resource problems here, we don't really have to undo the uprobe -+ // registrations etc. already in effect. It may break correct -+ // tracking of process hierarchy in -c/-x operation, but too bad. -+ p = _stp_kzalloc(sizeof(struct stapiu_process)); -+ if (p) { -+ p->tgid = task->tgid; -+ p->relocation = addr; -+ p->inode = dentry->d_inode; -+ p->base = addr-offset; // ... in case caught this during the second mmap -+ spin_lock_irqsave (&c->process_list_lock, flags); -+ list_add(&p->process_list, &c->process_list_head); -+ spin_unlock_irqrestore (&c->process_list_lock, flags); -+ } else -+ _stp_warn("out of memory tracking solib %s in process %ld\n", -+ path, (long) task->tgid); -+ } - - /* Check non-writable, executable sections for probes. */ - if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE)) -@@ -827,7 +935,7 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target, - */ - - if ((rc == 0) && (vm_flags & VM_WRITE)) -- rc = stapiu_change_semaphore_plus(c, task, addr, length); -+ rc = stapiu_change_semaphore_plus(c, task, addr, dentry->d_inode); - - return rc; - } -diff --git a/runtime/sym.c b/runtime/sym.c -index be09ec8..21d820a 100644 ---- a/runtime/sym.c -+++ b/runtime/sym.c -@@ -713,9 +713,10 @@ static int _stp_build_id_check (struct _stp_module *m, - // NB: It is normal for different binaries with the same file path - // coexist in the same system via chroot or namespaces, therefore - // we make sure below is really a warning. -- _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" address " -+ _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address " - "%#lx, expected %s actual %s\n", -- m->path, notes_addr, hexstring_theory, hexstring_practice); -+ m->path, (long) tsk->tgid, -+ notes_addr, hexstring_theory, hexstring_practice); - return 1; - } - - -commit 5e1ef9d7f2a5ea6e5511ef5228cf05dda1c570b3 -Author: Frank Ch. Eigler -Date: Mon Jul 27 07:58:30 2020 -0400 - - PR25568 / RHBZ1857749: sdt_buildid.exp test case - - Add new test that checks for combinations of buildid and pathname - based uprobes for executables and shared libraries. - -diff --git a/testsuite/systemtap.base/sdt_buildid.c b/testsuite/systemtap.base/sdt_buildid.c -new file mode 100644 -index 0000000..ccbb2f2 ---- /dev/null -+++ b/testsuite/systemtap.base/sdt_buildid.c -@@ -0,0 +1,26 @@ -+#include -+#include -+#include -+ -+void bar (); -+ -+#ifndef ONLY_MAIN -+#include "sdt_buildid_.h" -+ -+void -+bar () -+{ -+ printf("%s=%ld\n", "test_probe_0_semaphore", SDT_BUILDID_TEST_PROBE_0_ENABLED()); -+ if (SDT_BUILDID_TEST_PROBE_0_ENABLED()) -+ SDT_BUILDID_TEST_PROBE_0(); -+} -+#endif -+ -+#ifndef NO_MAIN -+int -+main () -+{ -+ bar(); -+ return 0; -+} -+#endif -diff --git a/testsuite/systemtap.base/sdt_buildid.exp b/testsuite/systemtap.base/sdt_buildid.exp -new file mode 100644 -index 0000000..3141fd6 ---- /dev/null -+++ b/testsuite/systemtap.base/sdt_buildid.exp -@@ -0,0 +1,214 @@ -+set test "sdt_buildid" -+ -+set pbtype_flags {{additional_flags=-g} {} {}} -+set fail_count 0 -+ -+# Compile a C program to use as the user-space probing target -+set stap_path $env(SYSTEMTAP_PATH)/stap -+set sup_dpath "[pwd]/sdt_buildid_.d" -+set sup_hpath "[pwd]/sdt_buildid_.h" -+set sup_opath "[pwd]/sdt_buildid_.o" -+ -+# Run dtrace -+if {[installtest_p]} { -+ set dtrace $env(SYSTEMTAP_PATH)/dtrace -+} else { -+ set dtrace ../dtrace -+} -+ -+verbose -log "$dtrace --types -h -s $srcdir/$subdir/sdt_buildid_.d" -+if {[catch {exec $dtrace --types -h -s \ -+ $srcdir/$subdir/sdt_buildid_.d} res]} { -+ verbose -log "unable to run $dtrace: $res" -+} -+verbose -log "$dtrace --types -G -s $srcdir/$subdir/sdt_buildid_.d" -+if {[catch {exec $dtrace --types -G -s \ -+ $srcdir/$subdir/sdt_buildid_.d} res]} { -+ verbose -log "unable to run $dtrace: $res" -+} -+if {[file exists $sup_hpath] && [file exists $sup_opath]} then { -+ pass "$test dtrace" -+} else { -+ incr fail_count -+ fail "$test dtrace" -+ return -+} -+ -+set sup_flags [sdt_includes] -+set sup_flags "$sup_flags additional_flags=-Wall" -+set sup_flags "$sup_flags additional_flags=-Werror" -+set sup_flags "$sup_flags additional_flags=$sup_opath" -+set sup_flags "$sup_flags additional_flags=-I." -+set sup_exepath "[pwd]/sdt_buildid.x" -+ -+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exepath \ -+ executable $sup_flags] -+if { $res != "" } { -+ incr fail_count -+ verbose "target_compile failed: $res" 2 -+ fail "$test compiling" -+ return -+} else { -+ pass "$test compiling" -+} -+ -+ -+set sup41_flags "$sup_flags additional_flags=-shared" -+set sup41_flags "$sup41_flags additional_flags=-fPIC" -+set sup41_flags "$sup41_flags additional_flags=-DNO_MAIN" -+set sup_sopath "[pwd]/libsdt_buildid.so" -+set sup_exe2path "[pwd]/sdt_buildid_shared.x" -+set res0 [target_compile $srcdir/$subdir/sdt_buildid.c $sup_sopath \ -+ executable $sup41_flags ] -+set sup42_flags "additional_flags=-Wl,-rpath,[pwd]" -+set sup42_flags "$sup42_flags additional_flags=-L[pwd] additional_flags=-lsdt_buildid" -+set sup42_flags "$sup42_flags additional_flags=-DONLY_MAIN" -+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exe2path \ -+ executable $sup42_flags ] -+if { $res0 != "" || $res != "" } { -+ incr fail_count -+ verbose "target_compile failed: $res0 $res" 2 -+ fail "$test compiling -shared" -+ return -+} else { -+ pass "$test compiling -shared" -+} -+ -+catch { exec eu-readelf -n $sup_exepath | grep Build.ID | awk "{print \$NF}" } bid1 -+catch { exec eu-readelf -n $sup_sopath | grep Build.ID | awk "{print \$NF}" } bidso -+catch { exec eu-readelf -n $sup_exe2path | grep Build.ID | awk "{print \$NF}" } bid2 -+verbose -log "buildid: $sup_exepath $bid1" -+verbose -log "buildid: $sup_sopath $bidso" -+verbose -log "buildid: $sup_exe2path $bid2" -+# though we won't use the $bid2 -+ -+if {![installtest_p]} { -+ untested $test -+ return -+} -+ -+# To test via build-id, we need a debuginfod server to scan the testsuite build -+# directory. -+ -+ -+if [catch {exec /usr/bin/which debuginfod} debuginfod] then { -+ untested "$test debuginfod" -+} else { -+ set port [expr {10000 + int(rand()*10000)}] -+ spawn $debuginfod -p $port -d :memory: -F . -+ set debuginfod_pid [exp_pid $spawn_id] -+ # give it time to scan the build directory -+ sleep 10 -+ # XXX: we could expect some verbose traffic -+ set env(DEBUGINFOD_URLS) "http://localhost:$port $env(DEBUGINFOD_URLS)" -+ verbose -log "started debuginfod on port $port" -+ -+ set subtest "$test debuginfod buildid-exe buildid-solib" -+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $bidso -+ set ok 0 -+ expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+ } -+ catch {close}; catch {wait} -+ if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+ set subtest "$test debuginfod buildid-exe path-solib" -+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $sup_sopath -+ set ok 0 -+ expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+ } -+ catch {close}; catch {wait} -+ if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+ set subtest "$test debuginfod path-exe buildid-solib" -+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $bidso -+ set ok 0 -+ expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+ } -+ catch {close}; catch {wait} -+ if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+ set subtest "$test debuginfod buildid-solib" -+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bidso -+ set ok 0 -+ expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+ } -+ catch {close}; catch {wait} -+ if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+ set subtest "$test debuginfod buildid-exe" -+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 -+ set ok 0 -+ expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ eof { } -+ timeout { } -+ } -+ catch {close}; catch {wait} -+ if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+ kill -INT $debuginfod_pid -+} -+ -+ -+set subtest "$test non-buildid both" -+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $sup_sopath -+set ok 0 -+expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+} -+catch {close}; catch {wait} -+if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+set subtest "$test non-buildid exe" -+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath -+set ok 0 -+expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue } -+ eof { } -+ timeout { } -+} -+catch {close}; catch {wait} -+if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+set subtest "$test non-buildid solib" -+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_sopath -+set ok 0 -+expect { -+ -timeout 240 -+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue } -+ eof { } -+ timeout { } -+} -+catch {close}; catch {wait} -+if {$ok > 6} then { pass $subtest } else { fail $subtest } -+ -+return -diff --git a/testsuite/systemtap.base/sdt_buildid.stp b/testsuite/systemtap.base/sdt_buildid.stp -new file mode 100644 -index 0000000..a26d183 ---- /dev/null -+++ b/testsuite/systemtap.base/sdt_buildid.stp -@@ -0,0 +1,19 @@ -+global count -+ -+function trace () { -+ printf ("Count %d [%d] %s %s\n", count++, pid(), $$name, pp()) -+} -+ -+probe process(@1).mark("test_probe_0") { trace() } -+%( $# > 1 %? probe process(@2).mark("test_probe_0") { trace() } %) -+ -+probe begin -+{ -+ printf ("Count %d\n", count++) -+} -+ -+probe timer.s(1) // exit quickly after enough marks fire -+{ -+ if (count > 10) exit() -+} -+ -diff --git a/testsuite/systemtap.base/sdt_buildid_.d b/testsuite/systemtap.base/sdt_buildid_.d -new file mode 100644 -index 0000000..ebfca55 ---- /dev/null -+++ b/testsuite/systemtap.base/sdt_buildid_.d -@@ -0,0 +1,4 @@ -+provider sdt_buildid { -+ probe test_probe_0 (); -+}; -+ diff --git a/rhbz1855264.patch b/rhbz1855264.patch deleted file mode 100644 index c361090..0000000 --- a/rhbz1855264.patch +++ /dev/null @@ -1,874 +0,0 @@ -commit 0a281a96ddf7cae9a0f0cc0eb505a752ffdd932e -Author: William Cohen -Date: Tue Jun 16 16:02:11 2020 -0400 - - Make sizeof.stp runnable with the bpf backend. - -diff --git a/testsuite/systemtap.examples/general/sizeof.meta b/testsuite/systemtap.examples/general/sizeof.meta -index 29713e4..b30078d 100644 ---- a/testsuite/systemtap.examples/general/sizeof.meta -+++ b/testsuite/systemtap.examples/general/sizeof.meta -@@ -2,7 +2,7 @@ title: Print the Size of a C Type - name: sizeof.stp - version: 1.0 - author: anonymous --keywords: statistics memory -+keywords: statistics memory bpf - subsystem: any - status: proposed - exit: event-ended -@@ -11,3 +11,5 @@ scope: system-wide - description: This script prints the size of a type, based on dwarf debuginfo for any kernel or userspace module, or trial-compilation of a given header file name. - test_check: stap -p4 sizeof.stp task_struct 'kernel' - test_installcheck: stap sizeof.stp FILE '' -+test_check_bpf: stap -p4 --bpf sizeof.stp task_struct 'kernel' -+test_installcheck_bpf: stap --bpf sizeof.stp FILE '' -diff --git a/testsuite/systemtap.examples/general/sizeof.stp b/testsuite/systemtap.examples/general/sizeof.stp -index 0c77dce..5aec674 100755 ---- a/testsuite/systemtap.examples/general/sizeof.stp -+++ b/testsuite/systemtap.examples/general/sizeof.stp -@@ -7,9 +7,11 @@ - # sizeof.stp TYPENAME - - probe oneshot { -- println("type ", @1, -- %( $# > 1 %? " in ", @2, %) /* module or header file name */ -- " byte-size: ", -- %( $# > 1 %? @cast_module_sizeof(@2, @1) %: @cast_sizeof(@1) %) -- ) -+ %( $# > 1 %? -+ printf("type %s in %s byte-size: %d\n", @1, @2, -+ @cast_module_sizeof(@2, @1)) -+ %: -+ printf("type %s byte-size: %d\n", @1, -+ @cast_sizeof(@1)) -+ %) - } -commit 2b2b6a622dc1d434c60d0ea159b260f660068ad1 -Author: William Cohen -Date: Wed Jun 17 11:57:18 2020 -0400 - - Fix sizeof.stp to explicitly use kernel debuginfo if one not specified - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to use the @cast_module_sizeof() - instead of @cast_size() to use the kernel debuginfo. - -diff --git a/testsuite/systemtap.examples/general/sizeof.stp b/testsuite/systemtap.examples/general/sizeof.stp -index 5aec674..b45f593 100755 ---- a/testsuite/systemtap.examples/general/sizeof.stp -+++ b/testsuite/systemtap.examples/general/sizeof.stp -@@ -12,6 +12,6 @@ probe oneshot { - @cast_module_sizeof(@2, @1)) - %: - printf("type %s byte-size: %d\n", @1, -- @cast_sizeof(@1)) -+ @cast_module_sizeof("kernel", @1)) - %) - } - -commit 717b7dddd08b66b3caa5585221472d84e40be658 -Author: William Cohen -Date: Wed Jun 17 13:08:30 2020 -0400 - - Use explicit @cast() operators to fslatency-nd.stp and fsslower-nd.stp - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - -diff --git a/testsuite/systemtap.examples/lwtools/fslatency-nd.stp b/testsuite/systemtap.examples/lwtools/fslatency-nd.stp -index 6008399..0bee34f 100755 ---- a/testsuite/systemtap.examples/lwtools/fslatency-nd.stp -+++ b/testsuite/systemtap.examples/lwtools/fslatency-nd.stp -@@ -63,8 +63,8 @@ probe __vfs_read = kprobe.function("__vfs_read") - { - # Skip the call if new_sync_read() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->read -- || !@cast(file, "file")->f_op->read_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->read -+ || !@cast(file, "file", "kernel")->f_op->read_iter) - next - } - -@@ -75,8 +75,8 @@ probe __vfs_write = kprobe.function("__vfs_write") - { - # Skip the call if new_sync_write() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->write -- || !@cast(file, "file")->f_op->write_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->write -+ || !@cast(file, "file", "kernel")->f_op->write_iter) - next - } - -@@ -102,8 +102,8 @@ probe __vfs_read.return = kprobe.function("__vfs_read").return - { - # Skip the call if new_sync_read() wouldn't be called. - file = @entry(pointer_arg(1)) -- if (!file || @cast(file, "file")->f_op->read -- || !@cast(file, "file")->f_op->read_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->read -+ || !@cast(file, "file", "kernel")->f_op->read_iter) - next - } - -@@ -115,8 +115,8 @@ probe __vfs_write.return = kprobe.function("__vfs_write") - { - # Skip the call if new_sync_write() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->write -- || !@cast(file, "file")->f_op->write_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->write -+ || !@cast(file, "file", "kernel")->f_op->write_iter) - next - } - -diff --git a/testsuite/systemtap.examples/lwtools/fsslower-nd.stp b/testsuite/systemtap.examples/lwtools/fsslower-nd.stp -index 64abe41..90fa9b5 100755 ---- a/testsuite/systemtap.examples/lwtools/fsslower-nd.stp -+++ b/testsuite/systemtap.examples/lwtools/fsslower-nd.stp -@@ -65,8 +65,8 @@ probe __vfs_read = kprobe.function("__vfs_read") - { - # Skip the call if new_sync_read() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->read -- || !@cast(file, "file")->f_op->read_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->read -+ || !@cast(file, "file", "kernel")->f_op->read_iter) - next - } - -@@ -77,8 +77,8 @@ probe __vfs_write = kprobe.function("__vfs_write") - { - # Skip the call if new_sync_write() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->write -- || !@cast(file, "file")->f_op->write_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->write -+ || !@cast(file, "file", "kernel")->f_op->write_iter) - next - } - -@@ -110,8 +110,8 @@ probe __vfs_read.return = kprobe.function("__vfs_read").return - { - # Skip the call if new_sync_read() wouldn't be called. - file = @entry(pointer_arg(1)) -- if (!file || @cast(file, "file")->f_op->read -- || !@cast(file, "file")->f_op->read_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->read -+ || !@cast(file, "file", "kernel")->f_op->read_iter) - next - } - -@@ -123,7 +123,7 @@ probe __vfs_write.return = kprobe.function("__vfs_write") - { - # Skip the call if new_sync_write() wouldn't be called. - file = pointer_arg(1) -- if (!file || @cast(file, "file")->f_op->write -- || !@cast(file, "file")->f_op->write_iter) -+ if (!file || @cast(file, "file", "kernel")->f_op->write -+ || !@cast(file, "file", "kernel")->f_op->write_iter) - next - } - -commit 9eb37102d48b814821b7f474986a7bfe86784192 -Author: William Cohen -Date: Wed Jun 17 13:39:20 2020 -0400 - - Use explicit @cast() operators for pfiles.stp and ioctl_handler.stp - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - -diff --git a/testsuite/systemtap.examples/process/pfiles.stp b/testsuite/systemtap.examples/process/pfiles.stp -index d52a154..6344a4c 100755 ---- a/testsuite/systemtap.examples/process/pfiles.stp -+++ b/testsuite/systemtap.examples/process/pfiles.stp -@@ -787,9 +787,9 @@ function print_unix_socket(sock) { - strlen(peername) > 0 ? peername . "\n" : "") - try { # skip line in case of null pointers - printf(" peercred pid: %d\n", -- @defined(@cast(sock, "socket")->sk->sk_peer_pid) ? -- @cast(sock, "socket")->sk->sk_peer_pid->numbers[0]->nr : -- @cast(sock, "socket")->sk->sk_peercred->pid ); } catch { } -+ @defined(@cast(sock, "socket", "kernel")->sk->sk_peer_pid) ? -+ @cast(sock, "socket", "kernel")->sk->sk_peer_pid->numbers[0]->nr : -+ @cast(sock, "socket", "kernel")->sk->sk_peercred->pid ); } catch { } - } - - function print_ipv4_socket(sock) { -diff --git a/testsuite/systemtap.examples/profiling/ioctl_handler.stp b/testsuite/systemtap.examples/profiling/ioctl_handler.stp -index 7044185..6f1e52c 100755 ---- a/testsuite/systemtap.examples/profiling/ioctl_handler.stp -+++ b/testsuite/systemtap.examples/profiling/ioctl_handler.stp -@@ -9,7 +9,7 @@ probe syscall.ioctl { - ioctl_requests[execname()] <<< 1 - try { - # Dig down through the task struct to find the actual function handling ioctl. -- ioctl_func_address = @cast(task_current(), "struct task_struct")->files->fdt->fd[fd]->f_op->unlocked_ioctl -+ ioctl_func_address = @cast(task_current(), "struct task_struct", "kernel")->files->fdt->fd[fd]->f_op->unlocked_ioctl - if (ioctl_func_address) - ioctl_func[execname(), ioctl_func_address] <<< 1 - } catch { - -commit 3040d4e8ddb6a9b1d1a57a0185206498670c3f1a -Author: William Cohen -Date: Wed Jun 17 13:53:58 2020 -0400 - - Use explicit @cast() operators for stapgames/pingpong.stp tapset. - -diff --git a/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp b/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp -index 026e4a9..f6ad2db 100644 ---- a/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp -+++ b/testsuite/systemtap.examples/stapgames/tapset/gmtty.stp -@@ -10,12 +10,12 @@ global GM_tty_ws_row, GM_tty_ws_col, GM_tty_name - # Initialize current TTY -- must be called from begin - function game_tty_init:long () - { -- tty = @cast(task_current(), "task_struct")->signal->tty -+ tty = @cast(task_current(), "task_struct", "kernel")->signal->tty - if (tty) { -- namep = @cast(tty,"tty_struct")->name -+ namep = @cast(tty,"tty_struct", "kernel")->name - GM_tty_name = kernel_string(namep) -- GM_tty_ws_col = @cast(tty, "tty_struct")->winsize->ws_col -- GM_tty_ws_row = @cast(tty, "tty_struct")->winsize->ws_row -+ GM_tty_ws_col = @cast(tty, "tty_struct", "kernel")->winsize->ws_col -+ GM_tty_ws_row = @cast(tty, "tty_struct", "kernel")->winsize->ws_row - } - return tty - } - -commit 3d922919dbe5657becf48917f1c661bf6711e956 -Author: William Cohen -Date: Thu Jun 18 13:32:50 2020 -0400 - - Use explicit @cast() operators for periodic.stp - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - -diff --git a/testsuite/systemtap.examples/profiling/periodic.stp b/testsuite/systemtap.examples/profiling/periodic.stp -index f18f183..b9052e5 100755 ---- a/testsuite/systemtap.examples/profiling/periodic.stp -+++ b/testsuite/systemtap.examples/profiling/periodic.stp -@@ -18,8 +18,8 @@ probe kernel.trace("timer_expire_entry") - period[$timer] <<< elapsed - funct[$timer] = $timer->function - data[$timer] = @defined($timer->data) ? $timer->data : 0 -- proc_info[$timer] = @defined($timer->data) ? 0 : @container_of($timer, "struct process_timer", timer)->task -- delayed_work_info[$timer] = @defined($timer->data) ? 0 : & @container_of($timer, "struct delayed_work", timer) -+ proc_info[$timer] = @defined($timer->data) ? 0 : @module_container_of($timer, "kernel", "struct process_timer", timer)->task -+ delayed_work_info[$timer] = @defined($timer->data) ? 0 : & @module_container_of($timer, "kernel", "struct delayed_work", timer) - } - last_expire[$timer] = new_expire - } - -commit b2d18cb3afca76536506fe4992fdd6ef091ce82f -Author: William Cohen -Date: Thu Jun 18 15:01:40 2020 -0400 - - Use explicit @cast() operators for semop-watch.stp example. - -diff --git a/testsuite/systemtap.examples/process/semop-watch.stp b/testsuite/systemtap.examples/process/semop-watch.stp -index ca2bf0a..bf1d632 100755 ---- a/testsuite/systemtap.examples/process/semop-watch.stp -+++ b/testsuite/systemtap.examples/process/semop-watch.stp -@@ -3,7 +3,7 @@ global times; - - probe syscall.{semop,semtimedop} - { -- sembuf_sz = @cast_sizeof("struct sembuf"); -+ sembuf_sz = @cast_module_sizeof("kernel", "struct sembuf"); - res = sprintf("set %d sems", semid) - - %( systemtap_v < "2.3" %? -@@ -14,7 +14,7 @@ probe syscall.{semop,semtimedop} - for(i = 0; i < nsops; i++) { - offset = i * sembuf_sz; - pointer = sops_uaddr + offset; -- num_addr = & @cast(pointer, "struct sembuf")->sem_num; -+ num_addr = & @cast(pointer, "struct sembuf", "kernel")->sem_num; - num = user_short(num_addr); - - res = sprintf("%s %d", res, num); - -commit a948c291c9cd7320d3c9b18b5037908cbbdf70b7 -Author: William Cohen -Date: Mon Jun 22 11:28:32 2020 -0400 - - Use explicit @cast() operators pointing to kernel for tapsets - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - -diff --git a/tapset/linux/dentry.stp b/tapset/linux/dentry.stp -index 4e73532..d148c57 100644 ---- a/tapset/linux/dentry.stp -+++ b/tapset/linux/dentry.stp -@@ -28,7 +28,7 @@ - - @__private30 function __dentry_IS_ROOT:long(dentry:long) - { -- return (@cast(dentry, "dentry")->d_parent == dentry) -+ return (@cast(dentry, "dentry", "kernel")->d_parent == dentry) - } - - -@@ -61,7 +61,7 @@ - */ - function d_name:string(dentry:long) - { -- s = & @cast(dentry, "dentry")->d_name; -+ s = & @cast(dentry, "dentry", "kernel")->d_name; - return kernel_string_n(s->name, s->len); - } - -@@ -70,8 +70,8 @@ function d_name:string(dentry:long) - { - /* i_dentry is an hlist_head on 3.6+, or a list_head before that. */ - d_alias = @choose_defined( -- @cast(inode, "struct inode")->i_dentry->first, -- @cast(inode, "struct inode")->i_dentry->next) -+ @cast(inode, "struct inode", "kernel")->i_dentry->first, -+ @cast(inode, "struct inode", "kernel")->i_dentry->next) - - if (@type_member_defined("struct dentry", d_alias)) { - return & @container_of(d_alias, "struct dentry", d_alias) -@@ -86,8 +86,8 @@ function d_name:string(dentry:long) - { - /* s_mounts was added in kernel 3.6, commit b3d9b7a3c. */ - if (@type_member_defined("struct super_block", s_mounts)) { -- mnt_ns = @cast(task_current(), "struct task_struct")->nsproxy->mnt_ns -- sb = @cast(inode, "struct inode")->i_sb -+ mnt_ns = @cast(task_current(), "struct task_struct", "kernel")->nsproxy->mnt_ns -+ sb = @cast(inode, "struct inode", "kernel")->i_sb - - /* Look for the mount which matches the current namespace */ - head = &sb->s_mounts -@@ -141,7 +141,7 @@ function reverse_path_walk:string(dentry:long) - { - while(1) { - name = __dentry_prepend(dentry, name); -- dentry = @cast(dentry, "dentry")->d_parent; -+ dentry = @cast(dentry, "dentry", "kernel")->d_parent; - if (__dentry_IS_ROOT(dentry)) - return name; - } -@@ -209,8 +209,8 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) - * dentry == vfsmnt->mnt_root. In that case, we'll just go - * ahead and handle them normally. - */ -- dentry = & @cast(dentry, "dentry") -- vfsmnt = & @cast(vfsmnt, "vfsmount") -+ dentry = & @cast(dentry, "dentry", "kernel") -+ vfsmnt = & @cast(vfsmnt, "vfsmount", "kernel") - - if (@type_member_defined("dentry", d_op->d_dname) - && dentry->d_op && dentry->d_op->d_dname -@@ -230,7 +230,7 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) - return sprintf("ANON_INODE:%s", d_name(dentry)) - } - else if (vfsmnt->mnt_sb->s_magic == @const("NSFS_MAGIC")) { -- ns_ops = &@cast(dentry->d_fsdata, "proc_ns_operations") -+ ns_ops = &@cast(dentry->d_fsdata, "proc_ns_operations", "kernel") - return sprintf("%s:[%lu]", kernel_string(ns_ops->name), - dentry->d_inode->i_ino) - } -@@ -239,16 +239,16 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) - - # Handle old-school vs. new-school fs_structs. - if (@type_member_defined("fs_struct", rootmnt)) { -- root_dentry = & @cast(task, "task_struct")->fs->root -- root_vfsmnt = & @cast(task, "task_struct")->fs->rootmnt -+ root_dentry = & @cast(task, "task_struct", "kernel")->fs->root -+ root_vfsmnt = & @cast(task, "task_struct", "kernel")->fs->rootmnt - } - else { -- root_dentry = @cast(task, "task_struct")->fs->root->dentry -- root_vfsmnt = @cast(task, "task_struct")->fs->root->mnt -+ root_dentry = @cast(task, "task_struct", "kernel")->fs->root->dentry -+ root_vfsmnt = @cast(task, "task_struct", "kernel")->fs->root->mnt - } - - if (@type_member_defined("mount", mnt_parent)) { -- mnt = &@cast(real_mount(vfsmnt), "mount") -+ mnt = &@cast(real_mount(vfsmnt), "mount", "kernel") - if (mnt == 0) - return "" - } -@@ -305,10 +305,10 @@ function task_dentry_path:string(task:long,dentry:long,vfsmnt:long) - */ - function d_path:string(nd:long) - { -- dentry = @choose_defined(@cast(nd,"nameidata")->path->dentry, -- @cast(nd,"nameidata")->dentry) -- vfsmnt = @choose_defined(@cast(nd,"nameidata")->path->mnt, -- @cast(nd,"nameidata")->mnt) -+ dentry = @choose_defined(@cast(nd,"nameidata", "kernel")->path->dentry, -+ @cast(nd,"nameidata", "kernel")->dentry) -+ vfsmnt = @choose_defined(@cast(nd,"nameidata", "kernel")->path->mnt, -+ @cast(nd,"nameidata", "kernel")->mnt) - - return sprintf("%s/", task_dentry_path(task_current(), dentry, vfsmnt)) - } -@@ -353,8 +353,8 @@ function fullpath_struct_path:string(path:long) - function fullpath_struct_file:string(task:long, file:long) - { - return task_dentry_path(task, -- @choose_defined(@cast(file, "file")->f_path->dentry, -- @cast(file, "file")->f_dentry), -- @choose_defined(@cast(file, "file")->f_path->mnt, -- @cast(file, "file")->f_vfsmnt)) -+ @choose_defined(@cast(file, "file", "kernel")->f_path->dentry, -+ @cast(file, "file", "kernel")->f_dentry), -+ @choose_defined(@cast(file, "file", "kernel")->f_path->mnt, -+ @cast(file, "file", "kernel")->f_vfsmnt)) - } -diff --git a/tapset/linux/dev.stp b/tapset/linux/dev.stp -index 0232fc9..079ce1c 100644 ---- a/tapset/linux/dev.stp -+++ b/tapset/linux/dev.stp -@@ -56,8 +56,8 @@ function usrdev2kerndev:long(dev:long) - function disk_name:string(hd:long, partno:long) - { - if (!partno) -- return kernel_string(@cast(hd, "gendisk")->disk_name) -- disk_name = kernel_string(@cast(hd, "gendisk")->disk_name) -+ return kernel_string(@cast(hd, "gendisk", "kernel")->disk_name) -+ disk_name = kernel_string(@cast(hd, "gendisk", "kernel")->disk_name) - if (isdigit(substr(disk_name, strlen(disk_name)-1, 1))) - return sprintf("%sp%d", disk_name, partno) - else -@@ -66,7 +66,7 @@ function disk_name:string(hd:long, partno:long) - - function bdevname:string(bdev:long) - { -- bdev = & @cast(bdev, "block_device") -+ bdev = & @cast(bdev, "block_device", "kernel") - if (bdev == 0) - return "N/A" - -diff --git a/tapset/linux/ioblock.stp b/tapset/linux/ioblock.stp -index ad3603c..9d8f57b 100644 ---- a/tapset/linux/ioblock.stp -+++ b/tapset/linux/ioblock.stp -@@ -107,12 +107,12 @@ function bio_rw_str(rw:long) - @__private30 function __bio_start_sect:long(bio:long) - { - try { -- if (@defined(@cast(bio, "bio")->bi_dev)) { -- return @cast(bio, "bio")->bi_bdev->bd_part->start_sect -+ if (@defined(@cast(bio, "bio", "kernel")->bi_dev)) { -+ return @cast(bio, "bio", "kernel")->bi_bdev->bd_part->start_sect - } -- else if (@defined(@cast(bio, "bio")->bi_disk)) { -- return disk_get_part_start_sect(@cast(bio, "bio")->bi_disk, -- @cast(bio, "bio")->bi_partno) -+ else if (@defined(@cast(bio, "bio", "kernel")->bi_disk)) { -+ return disk_get_part_start_sect(@cast(bio, "bio", "kernel")->bi_disk, -+ @cast(bio, "bio", "kernel")->bi_partno) - } - } catch { - return -1 -@@ -122,12 +122,12 @@ function bio_rw_str(rw:long) - /* returns the block device name */ - @__private30 function __bio_devname:string(bio:long) - { -- if (@defined(@cast(bio, "bio")->bi_bdev)) { -- return bdevname(@cast(bio, "bio")->bi_bdev) -+ if (@defined(@cast(bio, "bio", "kernel")->bi_bdev)) { -+ return bdevname(@cast(bio, "bio", "kernel")->bi_bdev) - } - else { -- return disk_name(@cast(bio, "bio")->bi_disk, -- @cast(bio, "bio")->bi_partno) -+ return disk_name(@cast(bio, "bio", "kernel")->bi_disk, -+ @cast(bio, "bio", "kernel")->bi_partno) - } - } - -diff --git a/tapset/linux/task.stp b/tapset/linux/task.stp -index 4afc458..b542b61 100644 ---- a/tapset/linux/task.stp -+++ b/tapset/linux/task.stp -@@ -40,7 +40,7 @@ function task_current:long () { - return -1; - } - sig = @task(task)->signal; -- return @cast(sig, "signal_struct")->rlim[nd_limit]->rlim_cur; -+ return @cast(sig, "signal_struct", "kernel")->rlim[nd_limit]->rlim_cur; - } - - /* sfunction task_rlimit - The current resource limit of the task - -commit 403e927796c3008ad5d5fed9bd97dc7cbad424bb -Author: Martin Cermak -Date: Mon Jun 29 16:30:34 2020 +0200 - - PR26181: Use explicit @cast() within get_ip_from_client() - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - - Also, fix the type of server_ip, which historically had been a long, - but since systemtap_v >= "4.3", it is a string. - -diff --git a/tapset/linux/nfs_proc.stp b/tapset/linux/nfs_proc.stp -index 8da3f6b..2579074 100644 ---- a/tapset/linux/nfs_proc.stp -+++ b/tapset/linux/nfs_proc.stp -@@ -77,11 +77,11 @@ function get_ip_from_client:string(clnt:long) - * inside that buffer. */ - if (@cast(addr, "sockaddr")->sa_family - == @const("AF_INET")) { -- return format_ipaddr(&@cast(addr, "sockaddr_in")->sin_addr->s_addr, @const("AF_INET")) -+ return format_ipaddr(&@cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_addr->s_addr, @const("AF_INET")) - } - else if (@cast(addr, "sockaddr")->sa_family - == @const("AF_INET6")) { -- return format_ipaddr(&@cast(addr, "sockaddr_in6")->sin6_addr, @const("AF_INET6")) -+ return format_ipaddr(&@cast(addr, "sockaddr_in6", "kernel:sunrpc")->sin6_addr, @const("AF_INET6")) - } - return "" - } -@@ -90,12 +90,12 @@ function get_ip_from_client:long(clnt:long) - { - cl_xprt = @cast(clnt, "rpc_clnt", "kernel:sunrpc")->cl_xprt - addr = &@cast(cl_xprt, "rpc_xprt", "kernel:sunrpc")->addr -- if (@cast(addr, "sockaddr_in")->sin_family -+ if (@cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_family - != @const("AF_INET")) { - /* Now consider ipv4 only */ - return 0 - } -- return @cast(addr, "sockaddr_in")->sin_addr->s_addr -+ return @cast(addr, "sockaddr_in", "kernel:sunrpc")->sin_addr->s_addr - } - %) - -@@ -758,7 +758,11 @@ probe _nfs.proc2.missing_read_setup = never - { - inode = 0 - client = 0 -+%( systemtap_v >= "4.3" %? -+ server_ip = "0" -+%: - server_ip = 0 -+%) - prot = 0 - - count = 0 - -commit f1a9bb064d11319a7eca4f4233c9edcc4a03af7e -Author: Martin Cermak -Date: Thu Jul 9 09:19:01 2020 +0200 - - Tapset and testsuite updates against @cast() change 00ee19ff03 - - Commit 00ee19ff030f665df7e087a579f39105256a0253 changed how @cast() - operations work and they no longer default to using the kernel - debuginfo for type information. Need to include kernel as location for - this information for the @cast() rather than just assuming a default. - - These are respective tapset and testsuite minor updates. - -diff --git a/tapset/linux/networking.stp b/tapset/linux/networking.stp -index 69843a7..0b52cbc 100644 ---- a/tapset/linux/networking.stp -+++ b/tapset/linux/networking.stp -@@ -69,7 +69,7 @@ - - /* A function that returns the device name given the net_device struct */ - function get_netdev_name:string (addr:long) { -- return kernel_string(@cast(addr, "net_device")->name) -+ return kernel_string(@cast(addr, "net_device", "kernel")->name) - } - - /** -diff --git a/tapset/linux/scsi.stp b/tapset/linux/scsi.stp -index 3577942..5359fe8 100644 ---- a/tapset/linux/scsi.stp -+++ b/tapset/linux/scsi.stp -@@ -179,8 +179,8 @@ probe scsi.iocompleted - - function timer_pending:long(timer:long) - { -- return (@choose_defined(@cast(timer, "timer_list")->entry->next, -- @cast(timer, "timer_list")->base) != 0) -+ return (@choose_defined(@cast(timer, "timer_list", "kernel")->entry->next, -+ @cast(timer, "timer_list", "kernel")->base) != 0) - } - - function scsi_timer_pending:long(cmd:long) -diff --git a/testsuite/buildok/pretty.stp b/testsuite/buildok/pretty.stp -index 85c9cd9..a2fc781 100755 ---- a/testsuite/buildok/pretty.stp -+++ b/testsuite/buildok/pretty.stp -@@ -6,14 +6,14 @@ global i = 1 - # pretty-printing with @cast - probe begin { - t = task_current() -- log(@cast(t, "task_struct")->fs$) -- log(@cast(t, "task_struct")->fs$$) -- log(@cast(t, "task_struct")->comm$) -- log(@cast(t, "task_struct")->comm$$) -- log(@cast(t, "task_struct")->comm[0]$) -- log(@cast(t, "task_struct")->comm[0]$$) -- log(@cast(t, "task_struct")->comm[i]$) -- log(@cast(t, "task_struct")->comm[i]$$) -+ log(@cast(t, "task_struct", "kernel")->fs$) -+ log(@cast(t, "task_struct", "kernel")->fs$$) -+ log(@cast(t, "task_struct", "kernel")->comm$) -+ log(@cast(t, "task_struct", "kernel")->comm$$) -+ log(@cast(t, "task_struct", "kernel")->comm[0]$) -+ log(@cast(t, "task_struct", "kernel")->comm[0]$$) -+ log(@cast(t, "task_struct", "kernel")->comm[i]$) -+ log(@cast(t, "task_struct", "kernel")->comm[i]$$) - } - - # pretty-printing in dwarf kernel context -diff --git a/testsuite/semok/cast.stp b/testsuite/semok/cast.stp -index d72763c..fe78e36 100755 ---- a/testsuite/semok/cast.stp -+++ b/testsuite/semok/cast.stp -@@ -2,7 +2,7 @@ - - probe begin { - // basic @cast test, with and without specifying kernel -- println(@cast(0, "task_struct")->tgid) -+ println(@cast(0, "task_struct", "kernel")->tgid) - println(@cast(0, "task_struct", "kernel")->tgid) - - // check module-search paths -@@ -25,5 +25,5 @@ probe begin { - @cast(0, "task_struct", "no_such_module")->tgid - - // PR11556: we should be able to treat the initial pointer like an array too -- println(@cast(0, "task_struct")[42]->tgid) -+ println(@cast(0, "task_struct", "kernel")[42]->tgid) - } -diff --git a/testsuite/semok/pretty.stp b/testsuite/semok/pretty.stp -index 0211d86..25490e7 100755 ---- a/testsuite/semok/pretty.stp -+++ b/testsuite/semok/pretty.stp -@@ -12,16 +12,16 @@ global i = 1 - # pretty-printing with @cast - probe begin { - t = task_current() -- log(@cast(t, "task_struct")$) -- log(@cast(t, "task_struct")$$) -- log(@cast(t, "task_struct")->fs$) -- log(@cast(t, "task_struct")->fs$$) -- log(@cast(t, "task_struct")->comm$) -- log(@cast(t, "task_struct")->comm$$) -- log(@cast(t, "task_struct")->comm[0]$) -- log(@cast(t, "task_struct")->comm[0]$$) -- log(@cast(t, "task_struct")->comm[i]$) -- log(@cast(t, "task_struct")->comm[i]$$) -+ log(@cast(t, "task_struct", "kernel")$) -+ log(@cast(t, "task_struct", "kernel")$$) -+ log(@cast(t, "task_struct", "kernel")->fs$) -+ log(@cast(t, "task_struct", "kernel")->fs$$) -+ log(@cast(t, "task_struct", "kernel")->comm$) -+ log(@cast(t, "task_struct", "kernel")->comm$$) -+ log(@cast(t, "task_struct", "kernel")->comm[0]$) -+ log(@cast(t, "task_struct", "kernel")->comm[0]$$) -+ log(@cast(t, "task_struct", "kernel")->comm[i]$) -+ log(@cast(t, "task_struct", "kernel")->comm[i]$$) - } - - # pretty-printing in dwarf kernel context -diff --git a/testsuite/semok/sizeof.stp b/testsuite/semok/sizeof.stp -index 8e35e29..a5a6bbb 100755 ---- a/testsuite/semok/sizeof.stp -+++ b/testsuite/semok/sizeof.stp -@@ -1,7 +1,7 @@ - #! stap -p2 - - probe begin { -- println("task_struct: ", @cast_sizeof("task_struct")) -+ # println("task_struct: ", @cast_sizeof("task_struct")) - println("task_struct: ", @cast_module_sizeof("kernel", "task_struct")) - println("task_struct: ", @cast_module_sizeof("kernel", "task_struct")) - println("FILE: ", @cast_module_sizeof("", "FILE")) -diff --git a/testsuite/semok/thirtyeight.stp b/testsuite/semok/thirtyeight.stp -index 15189b7..5018795 100755 ---- a/testsuite/semok/thirtyeight.stp -+++ b/testsuite/semok/thirtyeight.stp -@@ -7,4 +7,4 @@ - probe kernel.function("do_sys_open") { println(@defined($mode) ? 1 : $nosuchvar) } - probe kernel.trace("sched_switch")? { println(@defined($next->pid) ? 1 : $nosuchvar) } - probe procfs.write { println(@defined($value) ? 1 : $nosuchvar) } --probe begin { println(@defined(@cast(0, "task_struct")->pid) ? 1 : $nosuchvar) } -+probe begin { println(@defined(@cast(0, "task_struct", "kernel")->pid) ? 1 : $nosuchvar) } -diff --git a/testsuite/semok/thirtysix.stp b/testsuite/semok/thirtysix.stp -index 14f10c1..0adae14 100755 ---- a/testsuite/semok/thirtysix.stp -+++ b/testsuite/semok/thirtysix.stp -@@ -17,17 +17,17 @@ probe begin,end,error,never { println(@defined($nosuchvar)?$nosuchvar:0) } # inv - probe timer.s(1),timer.jiffies(1) { println(@defined($nosuchvar)?$nosuchvar:0) } # invalid - probe timer.profile { println(@defined($nosuchvar)?$nosuchvar:0) } # invalid - --probe begin { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid --probe begin { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid --probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid --probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid -+probe begin { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid -+probe begin { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid -+probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid -+probe kernel.function("do_sys_open") { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid - --function foo1() { println(@defined(@cast(0, "task_struct")->foo)?$nosuchvar:0) } # invalid --function foo2() { println(@defined(@cast(0, "task_struct")->pid)?1:$nosuchvar) } # valid -+function foo1() { println(@defined(@cast(0, "task_struct", "kernel")->foo)?$nosuchvar:0) } # invalid -+function foo2() { println(@defined(@cast(0, "task_struct", "kernel")->pid)?1:$nosuchvar) } # valid - probe begin { foo1(); foo2(); } - - # PR11598: support @defined(&...) --probe begin { println(@defined(@cast(0, "task_struct")->rcu)?$nosuchvar:0) } # invalid --probe begin { println(@defined(&@cast(0, "task_struct")->rcu)?1:$nosuchvar) } # valid -+probe begin { println(@defined(@cast(0, "task_struct", "kernel")->rcu)?$nosuchvar:0) } # invalid -+probe begin { println(@defined(&@cast(0, "task_struct", "kernel")->rcu)?1:$nosuchvar) } # valid - probe kernel.function("release_task") { println(@defined($p->rcu)?$nosuchvar:0) } # invalid - probe kernel.function("release_task") { println(@defined(&$p->rcu)?1:$nosuchvar) } # valid -diff --git a/testsuite/systemtap.base/bitfield.stp b/testsuite/systemtap.base/bitfield.stp -index 0208108..b5f7b89 100644 ---- a/testsuite/systemtap.base/bitfield.stp -+++ b/testsuite/systemtap.base/bitfield.stp -@@ -11,8 +11,8 @@ function check:long(ack:long, urg:long) { - ptr = get_ptr() - - /* set the bits with cast */ -- @cast(ptr, "tcphdr")->ack = ack -- @cast(ptr, "tcphdr")->urg = urg -+ @cast(ptr, "tcphdr", "kernel")->ack = ack -+ @cast(ptr, "tcphdr", "kernel")->urg = urg - - /* check that reading with embedded-C is ok */ - real_ack = get_ack() -@@ -20,8 +20,8 @@ function check:long(ack:long, urg:long) { - errors = (ack != real_ack) + (urg != real_urg) - - /* check that reading with a cast is ok */ -- cast_ack = @cast(ptr, "tcphdr")->ack -- cast_urg = @cast(ptr, "tcphdr")->urg -+ cast_ack = @cast(ptr, "tcphdr", "kernel")->ack -+ cast_urg = @cast(ptr, "tcphdr", "kernel")->urg - errors += (ack != cast_ack) + (urg != cast_urg) - - if (errors) -diff --git a/testsuite/systemtap.base/target_set.stp b/testsuite/systemtap.base/target_set.stp -index ad4dca6..7c458cb 100644 ---- a/testsuite/systemtap.base/target_set.stp -+++ b/testsuite/systemtap.base/target_set.stp -@@ -19,13 +19,13 @@ probe begin - probe syscall.nanosleep - { - if (target_set_pid(pid()) -- && user_long(&@cast(req_uaddr, "struct timespec")->tv_sec) == $1) -+ && user_long(&@cast(req_uaddr, "struct timespec", "kernel")->tv_sec) == $1) - target_set_report() - } - probe syscall.compat_nanosleep ? - { - if (target_set_pid(pid()) -- && user_long(&@cast(req_uaddr, "struct compat_timespec")->tv_sec) == $1) -+ && user_long(&@cast(req_uaddr, "struct compat_timespec", "kernel")->tv_sec) == $1) - target_set_report() - } - -diff --git a/testsuite/systemtap.context/usymbols.exp b/testsuite/systemtap.context/usymbols.exp -index f53c1cd..e12f067 100644 ---- a/testsuite/systemtap.context/usymbols.exp -+++ b/testsuite/systemtap.context/usymbols.exp -@@ -20,7 +20,7 @@ set testscript { - probe syscall.rt_sigaction { - if (pid() == target() && execname() == "%s") { - // Note user address. -- handler = user_long(&@cast(act_uaddr, "struct sigaction")->sa_handler); -+ handler = user_long(&@cast(act_uaddr, "struct sigaction", "kernel")->sa_handler); - try { - printf("handler: %%s (%%s)\n", usymname(handler), umodname(handler)); - } catch { -@@ -31,9 +31,9 @@ set testscript { - probe syscall.rt_sigaction32 ? { - if (pid() == target() && execname() == "%s") { - // Note user address. -- handler = user_long(@defined(@cast(0, "compat_sigaction")->sa_handler) -- ? &@cast(act_uaddr, "compat_sigaction")->sa_handler -- : &@cast(act_uaddr, "sigaction32")->sa_handler); -+ handler = user_long(@defined(@cast(0, "compat_sigaction", "kernel")->sa_handler) -+ ? &@cast(act_uaddr, "compat_sigaction", "kernel")->sa_handler -+ : &@cast(act_uaddr, "sigaction32", "kernel")->sa_handler); - try { - printf("handler: %%s (%%s)\n", usymname(handler), umodname(handler)); - } catch { - -commit c6831f14e043f88096b2219828c0124cf2549b77 -Author: Frank Ch. Eigler -Date: Thu Jul 9 21:41:51 2020 -0400 - - testuite: More @cast() fallout - - Adjust another test case that uses the deprecated - - probe begin { @cast(PTR, "type") } - - construct. Now "kernel" is formally required to specify context. - -diff --git a/testsuite/systemtap.base/cast.stp b/testsuite/systemtap.base/cast.stp -index cc44a36..0e191eb 100644 ---- a/testsuite/systemtap.base/cast.stp -+++ b/testsuite/systemtap.base/cast.stp -@@ -4,7 +4,7 @@ probe begin - - // Compare PIDs - pid = pid() -- cast_pid = @cast(curr, "task_struct")->tgid -+ cast_pid = @cast(curr, "task_struct", "kernel")->tgid - if (pid == cast_pid) - println("PID OK") - else -@@ -18,7 +18,7 @@ probe begin - printf("PID2 %d != %d\n", pid, cast_pid) - - // Compare PIDs with an array access (PR11556) -- cast_pid = @cast(curr, "task_struct")[0]->tgid -+ cast_pid = @cast(curr, "task_struct", "kernel")[0]->tgid - if (pid == cast_pid) - println("PID3 OK") - else -@@ -26,16 +26,16 @@ probe begin - - // Compare execnames - name = execname() -- cast_name = kernel_string(@cast(curr, "task_struct")->comm) -+ cast_name = kernel_string(@cast(curr, "task_struct", "kernel")->comm) - if (name == cast_name) - println("execname OK") - else - printf("execname \"%s\" != \"%s\"\n", name, cast_name) - - // Compare usage counter values through a struct address -- usage = @cast(curr, "task_struct")->usage->counter -- pusage = & @cast(curr, "task_struct")->usage -- cast_usage = @cast(pusage, "atomic_t")->counter -+ usage = @cast(curr, "task_struct", "kernel")->usage->counter -+ pusage = & @cast(curr, "task_struct", "kernel")->usage -+ cast_usage = @cast(pusage, "atomic_t", "kernel")->counter - if (usage == cast_usage) - println("usage OK") - else diff --git a/sources b/sources index 77da3ac..e30f417 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (systemtap-4.3.tar.gz) = db992adaa827601d1e3f28f6d70611b515b5e2e934d4251a07d5798f1d42a59351beb1422ab8df6ee634476b51b2127462e7bf7bf26afaa83cce91ffe59f3696 +SHA512 (systemtap-4.4.tar.gz) = 8fb1fe5071ec99ce3c6bcf82afdc98a3e1abc0ea937f3019b225c3a1879ada30080740b1918a84c6db06fe1893e6d1e7dc84be26c7e597d7feda1efe11354e76 diff --git a/systemtap.spec b/systemtap.spec index 2a44465..8eeef74 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -48,7 +48,8 @@ %if 0%{?fedora} >= 18 || 0%{?rhel} >= 6 %define initdir %{_initddir} -%else # RHEL5 doesn't know _initddir +%else + # RHEL5 doesn't know _initddir %define initdir %{_initrddir} %endif @@ -58,7 +59,8 @@ %else %if 0%{?rhel} >= 6 %define udevrulesdir /lib/udev/rules.d - %else # RHEL5 + %else + # RHEL5 %define udevrulesdir /etc/udev/rules.d %endif %endif @@ -86,8 +88,8 @@ %define __brp_mangle_shebangs_exclude_from .stp$ Name: systemtap -Version: 4.3 -Release: 2%{?release_override}%{?dist} +Version: 4.4 +Release: 1%{?release_override}%{?dist} # for version, see also configure.ac @@ -123,9 +125,6 @@ License: GPLv2+ URL: http://sourceware.org/systemtap/ Source: ftp://sourceware.org/pub/systemtap/releases/systemtap-%{version}.tar.gz -Patch10: rhbz1847676,1857749.patch -Patch11: rhbz1855264.patch - # Build* BuildRequires: gcc-c++ BuildRequires: cpio @@ -366,6 +365,7 @@ Requires: systemtap = %{version}-%{release} Requires: systemtap-sdt-devel = %{version}-%{release} Requires: systemtap-server = %{version}-%{release} Requires: dejagnu which elfutils grep nc +Requires: elfutils-debuginfod Requires: gcc gcc-c++ make glibc-devel # testsuite/systemtap.base/ptrace.exp needs strace Requires: strace @@ -511,9 +511,6 @@ systemtap-runtime-virthost machine to execute systemtap scripts. %prep %setup -q -%patch10 -p1 -%patch11 -p1 - %build # Enable/disable the dyninst pure-userspace backend @@ -1232,8 +1229,8 @@ done # PRERELEASE %changelog -* Tue Jul 28 2020 Frank Ch. Eigler - 4.3-2 -- Backport important uprobes-inode and @cast() related changes. +* Mon Nov 09 2020 Frank Ch. Eigler - 4.4-1 +- Upstream release. * Thu Jun 11 2020 Frank Ch. Eigler - 4.3-1 - Upstream release.