From ecdab1b9e678d2e97b638ce9beb1aba9d0b74305 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Sep 2019 11:50:15 -0300 Subject: [PATCH 01/77] perf env: Add routine to read the env->cpuid from the running machine commit f1cedfb82858c8a7ec21e45d0ce7b6e2ce9edea0 upstream In 'perf top' we use that cpuid when initializing the per arch annotation init routines (e.g. x86__annotate_init()) and in that case (live mode, 'perf top') we need to obtain it from the running machine, not from a perf.data file header. Provide a means to do that. Will be used by 'perf top' in a followup patch. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-h2wb3sx7u7znx6lqfezrh7ca@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/util/env.c | 16 ++++++++++++++++ tools/perf/util/env.h | 1 + 2 files changed, 17 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index ef64e197bc8d..63882f7ba781 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -2,6 +2,7 @@ #include "cpumap.h" #include "debug.h" #include "env.h" +#include "util/header.h" #include #include #include "bpf-event.h" @@ -260,6 +261,21 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_cpuid(struct perf_env *env) +{ + char cpuid[128]; + int err = get_cpuid(cpuid, sizeof(cpuid)); + + if (err) + return err; + + free(env->cpuid); + env->cpuid = strdup(cpuid); + if (env->cpuid == NULL) + return ENOMEM; + return 0; +} + static int perf_env__read_arch(struct perf_env *env) { struct utsname uts; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 37028215d4a5..9d4d13771764 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -104,6 +104,7 @@ void perf_env__exit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); +int perf_env__read_cpuid(struct perf_env *env); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); -- Gitee From d5b61ba75c54fa678849be0f4868a02042e6e112 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 25 Oct 2019 17:08:33 +0300 Subject: [PATCH 02/77] perf/aux: Allow using AUX data in perf samples commit a4faf00d994c40e64f656805ac375c65e324eefb upstream AUX data can be used to annotate perf events such as performance counters or tracepoints/breakpoints by including it in sample records when PERF_SAMPLE_AUX flag is set. Such samples would be instrumental in debugging and profiling by providing, for example, a history of instruction flow leading up to the event's overflow. The implementation makes use of grouping an AUX event with all the events that wish to take samples of the AUX data, such that the former is the group leader. The samplees should also specify the desired size of the AUX sample via attr.aux_sample_size. AUX capable PMUs need to explicitly add support for sampling, because it relies on a new callback to take a snapshot of the buffer without touching the event states. [Backport changes] 1.In kernel/events/core.c, the difference in the patch is due to the addition of curly braces around the if condition block. This change was necessary to accommodate the additional statement inside the block. There are no functional changes intended; the modification is purely structural to ensure correct code organization. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: adrian.hunter@intel.com Cc: mathieu.poirier@linaro.org Link: https://lkml.kernel.org/r/20191025140835.53665-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- include/linux/perf_event.h | 19 ++++ include/uapi/linux/perf_event.h | 10 +- kernel/events/core.c | 173 +++++++++++++++++++++++++++++++- kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 36 +++++++ 5 files changed, 234 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2d85c2bfedc8..09e0cca1eaaf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -278,6 +278,8 @@ struct perf_event; #define PERF_PMU_CAP_NO_EXCLUDE 0x80 #define PERF_PMU_CAP_AUX_OUTPUT 0x100 +struct perf_output_handle; + /** * struct pmu - generic performance monitoring unit */ @@ -452,6 +454,19 @@ struct pmu { */ void (*free_aux) (void *aux); /* optional */ + /* + * Take a snapshot of the AUX buffer without touching the event + * state, so that preempting ->start()/->stop() callbacks does + * not interfere with their logic. Called in PMI context. + * + * Returns the size of AUX data copied to the output handle. + * + * Optional. + */ + long (*snapshot_aux) (struct perf_event *event, + struct perf_output_handle *handle, + unsigned long size); + /* * Validate address range filters: make sure the HW supports the * requested configuration and number of filters; return 0 if the @@ -999,6 +1014,7 @@ struct perf_sample_data { u32 reserved; } cpu_entry; struct perf_callchain_entry *callchain; + u64 aux_size; /* * regs_user may point to task_pt_regs or to regs_user_copy, depending @@ -1395,6 +1411,9 @@ extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); +extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, + struct perf_output_handle *handle, + unsigned long from, unsigned long to); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern u64 perf_swevent_set_period(struct perf_event *event); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 507eddef8715..7fa9f958cd9b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -141,8 +141,9 @@ enum perf_event_sample_format { PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, + PERF_SAMPLE_AUX = 1U << 20, - PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -304,6 +305,7 @@ enum perf_event_read_format { /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -428,7 +430,9 @@ struct perf_event_attr { */ __u32 aux_watermark; __u16 sample_max_stack; - __u16 __reserved_2; /* align to __u64 */ + __u16 __reserved_2; + __u32 aux_sample_size; + __u32 __reserved_3; }; /* @@ -870,6 +874,8 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/kernel/events/core.c b/kernel/events/core.c index d478c9789ff6..6befff36dbc0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1947,6 +1947,11 @@ static void perf_put_aux_event(struct perf_event *event) } } +static bool perf_need_aux_event(struct perf_event *event) +{ + return !!event->attr.aux_output || !!event->attr.aux_sample_size; +} + static int perf_get_aux_event(struct perf_event *event, struct perf_event *group_leader) { @@ -1959,7 +1964,17 @@ static int perf_get_aux_event(struct perf_event *event, if (!group_leader) return 0; - if (!perf_aux_output_match(event, group_leader)) + /* + * aux_output and aux_sample_size are mutually exclusive. + */ + if (event->attr.aux_output && event->attr.aux_sample_size) + return 0; + + if (event->attr.aux_output && + !perf_aux_output_match(event, group_leader)) + return 0; + + if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux) return 0; if (!atomic_long_inc_not_zero(&group_leader->refcount)) @@ -6234,6 +6249,122 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, } } +static unsigned long perf_prepare_sample_aux(struct perf_event *event, + struct perf_sample_data *data, + size_t size) +{ + struct perf_event *sampler = event->aux_event; + struct ring_buffer *rb; + + data->aux_size = 0; + + if (!sampler) + goto out; + + if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE)) + goto out; + + if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) + goto out; + + rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + if (!rb) + goto out; + + /* + * If this is an NMI hit inside sampling code, don't take + * the sample. See also perf_aux_sample_output(). + */ + if (READ_ONCE(rb->aux_in_sampling)) { + data->aux_size = 0; + } else { + size = min_t(size_t, size, perf_aux_size(rb)); + data->aux_size = ALIGN(size, sizeof(u64)); + } + ring_buffer_put(rb); + +out: + return data->aux_size; +} + +long perf_pmu_snapshot_aux(struct ring_buffer *rb, + struct perf_event *event, + struct perf_output_handle *handle, + unsigned long size) +{ + unsigned long flags; + long ret; + + /* + * Normal ->start()/->stop() callbacks run in IRQ mode in scheduler + * paths. If we start calling them in NMI context, they may race with + * the IRQ ones, that is, for example, re-starting an event that's just + * been stopped, which is why we're using a separate callback that + * doesn't change the event state. + * + * IRQs need to be disabled to prevent IPIs from racing with us. + */ + local_irq_save(flags); + /* + * Guard against NMI hits inside the critical section; + * see also perf_prepare_sample_aux(). + */ + WRITE_ONCE(rb->aux_in_sampling, 1); + barrier(); + + ret = event->pmu->snapshot_aux(event, handle, size); + + barrier(); + WRITE_ONCE(rb->aux_in_sampling, 0); + local_irq_restore(flags); + + return ret; +} + +static void perf_aux_sample_output(struct perf_event *event, + struct perf_output_handle *handle, + struct perf_sample_data *data) +{ + struct perf_event *sampler = event->aux_event; + unsigned long pad; + struct ring_buffer *rb; + long size; + + if (WARN_ON_ONCE(!sampler || !data->aux_size)) + return; + + rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + if (!rb) + return; + + size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size); + + /* + * An error here means that perf_output_copy() failed (returned a + * non-zero surplus that it didn't copy), which in its current + * enlightened implementation is not possible. If that changes, we'd + * like to know. + */ + if (WARN_ON_ONCE(size < 0)) + goto out_put; + + /* + * The pad comes from ALIGN()ing data->aux_size up to u64 in + * perf_prepare_sample_aux(), so should not be more than that. + */ + pad = data->aux_size - size; + if (WARN_ON_ONCE(pad >= sizeof(u64))) + pad = 8; + + if (pad) { + u64 zero = 0; + perf_output_copy(handle, &zero, pad); + } + +out_put: + ring_buffer_put(rb); +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -6560,6 +6691,13 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_PHYS_ADDR) perf_output_put(handle, data->phys_addr); + if (sample_type & PERF_SAMPLE_AUX) { + perf_output_put(handle, data->aux_size); + + if (data->aux_size) + perf_aux_sample_output(event, handle, data); + } + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; @@ -6754,6 +6892,35 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_PHYS_ADDR) data->phys_addr = perf_virt_to_phys(data->addr); + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + header->size += sizeof(u64); /* size */ + + /* + * Given the 16bit nature of header::size, an AUX sample can + * easily overflow it, what with all the preceding sample bits. + * Make sure this doesn't happen by using up to U16_MAX bytes + * per sample in total (rounded down to 8 byte boundary). + */ + size = min_t(size_t, U16_MAX - header->size, + event->attr.aux_sample_size); + size = rounddown(size, 8); + size = perf_prepare_sample_aux(event, data, size); + + WARN_ON_ONCE(size + header->size > U16_MAX); + header->size += size; + } + /* + * If you're adding more sample types here, you likely need to do + * something about the overflowing header::size, like repurpose the + * lowest 3 bits of size, which should be always zero at the moment. + * This raises a more important question, do we really need 512k sized + * samples and why, so good argumentation is in order for whatever you + * do here next. + */ + WARN_ON_ONCE(header->size & 7); } static __always_inline int @@ -10720,7 +10887,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->size = size; - if (attr->__reserved_1 || attr->__reserved_2) + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) @@ -11291,7 +11458,7 @@ SYSCALL_DEFINE5(perf_event_open, } } - if (event->attr.aux_output && !perf_get_aux_event(event, group_leader)) { + if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader)) { err = -EINVAL; goto err_locked; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 6e87b358e082..00a0d3f08de3 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -50,6 +50,7 @@ struct ring_buffer { unsigned long aux_mmap_locked; void (*free_aux)(void *); refcount_t aux_refcount; + int aux_in_sampling; void **aux_pages; void *aux_priv; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ffb59a4ef4ff..07ab081952f3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -562,6 +562,42 @@ void *perf_get_aux(struct perf_output_handle *handle) } EXPORT_SYMBOL_GPL(perf_get_aux); +/* + * Copy out AUX data from an AUX handle. + */ +long perf_output_copy_aux(struct perf_output_handle *aux_handle, + struct perf_output_handle *handle, + unsigned long from, unsigned long to) +{ + unsigned long tocopy, remainder, len = 0; + struct ring_buffer *rb = aux_handle->rb; + void *addr; + + from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1; + to &= (rb->aux_nr_pages << PAGE_SHIFT) - 1; + + do { + tocopy = PAGE_SIZE - offset_in_page(from); + if (to > from) + tocopy = min(tocopy, to - from); + if (!tocopy) + break; + + addr = rb->aux_pages[from >> PAGE_SHIFT]; + addr += offset_in_page(from); + + remainder = perf_output_copy(handle, addr, tocopy); + if (remainder) + return -EFAULT; + + len += tocopy; + from += tocopy; + from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1; + } while (to != from); + + return len; +} + #define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY) static struct page *rb_alloc_aux_page(int node, int order) -- Gitee From 184c6d9e76bd2db1fe36f01645568354d3ed5b23 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 1 Apr 2020 13:16:09 +0300 Subject: [PATCH 03/77] perf evsel: Move and globalize perf_evsel__find_pmu() and perf_evsel__is_aux_event() commit e12ee9f7513cb5dbe8b12aac030dfbeff35b3766 upstream Move and globalize 2 functions from the auxtrace specific sources so that they can be reused. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20200401101613.6201-13-adrian.hunter@intel.com [ Move to pmu.c, as moving to evsel.h breaks the python binding ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/util/auxtrace.c | 19 ------------------- tools/perf/util/evsel.h | 3 +++ tools/perf/util/pmu.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 23a8c152e568..4086d5848734 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -59,25 +59,6 @@ #include "symbol/kallsyms.h" #include -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - -static bool perf_evsel__is_aux_event(struct evsel *evsel) -{ - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); - - return pmu && pmu->auxtrace; -} - /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 88882e55294e..da65be7ca536 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -152,6 +152,9 @@ int perf_evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), void (*fini)(struct evsel *evsel)); +struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel); +bool perf_evsel__is_aux_event(struct evsel *evsel); + struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); static inline struct evsel *evsel__new(struct perf_event_attr *attr) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 99227038063b..36e3a777d20d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,6 +18,7 @@ #include #include #include "debug.h" +#include "evsel.h" #include "pmu.h" #include "parse-events.h" #include "header.h" @@ -882,6 +883,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + +bool perf_evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + struct perf_pmu *perf_pmu__find(const char *name) { struct perf_pmu *pmu; -- Gitee From a116e54246db8504566d3b511c1dd0f7830d96ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 29 Aug 2019 13:31:48 +0200 Subject: [PATCH 04/77] perf env: Add perf_env__numa_node() commit 389799a7a1e86c55f38897e679762efadcc9dedd upstream To speed up cpu to node lookup, add perf_env__numa_node(), that creates cpu array on the first lookup, that holds numa nodes for each stored cpu. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Joe Mario Cc: Kan Liang Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190904073415.723-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/util/env.c | 40 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 6 ++++++ 2 files changed, 46 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 63882f7ba781..350d91faa5d2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -184,6 +184,7 @@ void perf_env__exit(struct perf_env *env) zfree(&env->sibling_threads); zfree(&env->pmu_mappings); zfree(&env->cpu); + zfree(&env->numa_map); for (i = 0; i < env->nr_numa_nodes; i++) perf_cpu_map__put(env->numa_nodes[i].map); @@ -358,3 +359,42 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } + + +int perf_env__numa_node(struct perf_env *env, int cpu) +{ + if (!env->nr_numa_map) { + struct numa_node *nn; + int i, nr = 0; + + for (i = 0; i < env->nr_numa_nodes; i++) { + nn = &env->numa_nodes[i]; + nr = max(nr, perf_cpu_map__max(nn->map)); + } + + nr++; + + /* + * We initialize the numa_map array to prepare + * it for missing cpus, which return node -1 + */ + env->numa_map = malloc(nr * sizeof(int)); + if (!env->numa_map) + return -1; + + for (i = 0; i < nr; i++) + env->numa_map[i] = -1; + + env->nr_numa_map = nr; + + for (i = 0; i < env->nr_numa_nodes; i++) { + int tmp, j; + + nn = &env->numa_nodes[i]; + perf_cpu_map__for_each_cpu(j, tmp, nn->map) + env->numa_map[j] = i; + } + } + + return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 9d4d13771764..b6c716dfc776 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -87,6 +87,10 @@ struct perf_env { struct rb_root btfs; u32 btfs_cnt; } bpf_progs; + + /* For fast cpu to numa node lookup via perf_env__numa_node */ + int *numa_map; + int nr_numa_map; }; enum perf_compress_type { @@ -120,4 +124,6 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); + +int perf_env__numa_node(struct perf_env *env, int cpu); #endif /* __PERF_ENV_H */ -- Gitee From 945cd6ab3ddafde7060b02647116ab9230577548 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 17 Aug 2021 17:15:07 -0500 Subject: [PATCH 05/77] perf env: Add perf_env__cpuid, perf_env__{nr_}pmu_mappings commit 9fe8895a27a840095281864803b128ddc26dcd30 upstream To be used by IBS raw data display: It needs the recorder's cpuid in order to determine which errata workarounds to apply to the data, and the pmu_mappings are needed in order to figure out which PMU sample type is IBS Fetch vs. IBS Op. When not available from perf.data, we assume local operation, and retrieve cpuid and pmu mappings directly from the running system. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Boris Ostrovsky Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joao Martins Cc: Konrad Rzeszutek Wilk Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: https //lore.kernel.org/r/20210817221509.88391-2-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/util/env.c | 78 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 5 +++ 2 files changed, 83 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 350d91faa5d2..6b8252dd42c1 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -11,6 +11,7 @@ #include #include #include +#include "strbuf.h" struct perf_env perf_env; @@ -262,6 +263,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_pmu_mappings(struct perf_env *env) +{ + struct perf_pmu *pmu = NULL; + u32 pmu_num = 0; + struct strbuf sb; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + } + if (!pmu_num) { + pr_debug("pmu mappings not available\n"); + return -ENOENT; + } + env->nr_pmu_mappings = pmu_num; + + if (strbuf_init(&sb, 128 * pmu_num) < 0) + return -ENOMEM; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) + goto error; + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto error; + } + + env->pmu_mappings = strbuf_detach(&sb, NULL); + + return 0; + +error: + strbuf_release(&sb); + return -1; +} + int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; @@ -360,6 +400,44 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__cpuid(struct perf_env *env) +{ + int status; + + if (!env || !env->cpuid) { /* Assume local operation */ + status = perf_env__read_cpuid(env); + if (status) + return NULL; + } + + return env->cpuid; +} + +int perf_env__nr_pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->nr_pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return 0; + } + + return env->nr_pmu_mappings; +} + +const char *perf_env__pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return NULL; + } + + return env->pmu_mappings; +} int perf_env__numa_node(struct perf_env *env, int cpu) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index b6c716dfc776..0b862b4a01e3 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -109,11 +109,16 @@ void perf_env__exit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpuid(struct perf_env *env); +int perf_env__read_pmu_mappings(struct perf_env *env); +int perf_env__nr_pmu_mappings(struct perf_env *env); +const char *perf_env__pmu_mappings(struct perf_env *env); + int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); -- Gitee From 53c1598491b5b5442dcb3bb8987b726e891ed616 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 4 Oct 2021 16:41:13 -0500 Subject: [PATCH 06/77] perf evsel: Make evsel__env() always return a valid env commit 7b830875d22d68f9a49a02a23c24272513ddb392 upstream It's possible to have an evsel and evsel->evlist populated without an evsel->evlist->env, when, e.g., cmd_record is in its error path. Future patches will add support for evsel__open_strerror to be able to customize error messaging based on perf_env__{arch,cpuid}, so let's have evsel__env return &perf_env instead of NULL in that case. Reviewed-by: Kajol Jain Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Boris Ostrovsky Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joao Martins Cc: Konrad Rzeszutek Wilk Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211004214114.188477-1-kim.phillips@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 39912d9b51b3..797534dbee08 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2547,7 +2547,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, struct perf_env *perf_evsel__env(struct evsel *evsel) { - if (evsel && evsel->evlist) + if (evsel && evsel->evlist && evsel->evlist->env) return evsel->evlist->env; return &perf_env; } -- Gitee From 981395e178f1f5cfe55b6d6d9c6f02569fb3c5af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Apr 2020 10:24:04 -0300 Subject: [PATCH 07/77] perf record: Move sb_evlist to 'struct record' commit bc477d7983e345262757568ec27be0395dc2fe73 upstream Where state related to a 'perf record' session is grouped. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200429131106.27974-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ceae6168d329..a0d247edf55a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -80,6 +80,7 @@ struct record { struct auxtrace_record *itr; struct evlist *evlist; struct perf_session *session; + struct evlist *sb_evlist; int realtime_prio; bool no_buildid; bool no_buildid_set; @@ -1360,7 +1361,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; - struct evlist *sb_evlist = NULL; int fd; float ratio = 0; @@ -1472,9 +1472,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } if (!opts->no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &session->header.env); + bpf_event__add_sb_event(&rec->sb_evlist, &session->header.env); - if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { + if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1748,7 +1748,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(rec->sb_evlist); return status; } -- Gitee From 8d3dacf7a77634dd1aa0ff5ee29147af980d35a3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Apr 2020 10:40:54 -0300 Subject: [PATCH 08/77] perf top: Move sb_evlist to 'struct perf_top' commit ca6c9c8b107f9788662117587cd24bbb19cea94d upstream Where state related to a 'perf top' session is grouped. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200429131106.27974-3-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/builtin-top.c | 7 +++---- tools/perf/util/top.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a30d62186f5e..c228f872ace4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1542,7 +1542,6 @@ int cmd_top(int argc, const char **argv) OPTS_EVSWITCH(&top.evswitch), OPT_END() }; - struct evlist *sb_evlist = NULL; const char * const top_usage[] = { "perf top []", NULL @@ -1683,9 +1682,9 @@ int cmd_top(int argc, const char **argv) } if (!top.record_opts.no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &perf_env); + bpf_event__add_sb_event(&top.sb_evlist, &perf_env); - if (perf_evlist__start_sb_thread(sb_evlist, target)) { + if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1693,7 +1692,7 @@ int cmd_top(int argc, const char **argv) status = __cmd_top(&top); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(top.sb_evlist); out_delete_evlist: evlist__delete(top.evlist); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f117d4f4821e..7bea36a61645 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -18,7 +18,7 @@ struct perf_session; struct perf_top { struct perf_tool tool; - struct evlist *evlist; + struct evlist *evlist, *sb_evlist; struct record_opts record_opts; struct annotation_options annotation_opts; struct evswitch evswitch; -- Gitee From dba1ab35b63aa539fd80065e785d297fa9a6815d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Apr 2020 12:24:51 -0300 Subject: [PATCH 09/77] perf bpf: Decouple creating the evlist from adding the SB event commit b38d85ef49cf6af9d1deaaf01daf0986d47e6c7a upstream Renaming bpf_event__add_sb_event() to evlist__add_sb_event() and requiring that the evlist be allocated beforehand. This will allow using the same side band thread and evlist to be used for multiple purposes in addition to react to PERF_RECORD_BPF_EVENT soon after they are generated. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200429131106.27974-4-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hemanth Selam Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 17 ++++++++++++++--- tools/perf/builtin-top.c | 15 +++++++++++++-- tools/perf/util/bpf-event.c | 3 +-- tools/perf/util/bpf-event.h | 7 +++---- tools/perf/util/evlist.c | 21 ++++----------------- tools/perf/util/evlist.h | 2 +- 6 files changed, 36 insertions(+), 29 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a0d247edf55a..997b03ba6dad 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1463,16 +1463,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = -1; if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - err = -1; goto out_child; } - if (!opts->no_bpf_event) - bpf_event__add_sb_event(&rec->sb_evlist, &session->header.env); + if (!opts->no_bpf_event) { + rec->sb_evlist = evlist__new(); + + if (rec->sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + goto out_child; + } + + if (evlist__add_bpf_sb_event(rec->sb_evlist, &session->header.env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + goto out_child; + } + } if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c228f872ace4..65fe4f5f5de2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1681,8 +1681,19 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - if (!top.record_opts.no_bpf_event) - bpf_event__add_sb_event(&top.sb_evlist, &perf_env); + if (!top.record_opts.no_bpf_event) { + top.sb_evlist = evlist__new(); + + if (top.sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + goto out_delete_evlist; + } + + if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + goto out_delete_evlist; + } + } if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 782c0c8a9a83..96e0a31a38f6 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -422,8 +422,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) return 0; } -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env) +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 81fdc88e6c1a..68f315c3df5b 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,8 +33,7 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env); +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); @@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, - struct perf_env *env __maybe_unused) +static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 505b890ac85c..b110deb88425 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1672,39 +1672,26 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, return leader; } -int perf_evlist__add_sb_event(struct evlist **evlist, +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, perf_evsel__sb_cb_t cb, void *data) { struct evsel *evsel; - bool new_evlist = (*evlist) == NULL; - - if (*evlist == NULL) - *evlist = evlist__new(); - if (*evlist == NULL) - return -1; if (!attr->sample_id_all) { pr_warning("enabling sample_id_all for all side band events\n"); attr->sample_id_all = 1; } - evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); + evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); if (!evsel) - goto out_err; + return -1; evsel->side_band.cb = cb; evsel->side_band.data = data; - evlist__add(*evlist, evsel); + evlist__add(evlist, evsel); return 0; - -out_err: - if (new_evlist) { - evlist__delete(*evlist); - *evlist = NULL; - } - return -1; } static void *perf_evlist__poll_thread(void *arg) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d89e72bd1c81..10fc888d6127 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,7 +107,7 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist, int perf_evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist **evlist, +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, perf_evsel__sb_cb_t cb, void *data); -- Gitee From 491a737c9d3a9b6ec9e959ec5d8513854a33dba5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:09 +0200 Subject: [PATCH 10/77] libperf: Add perf_mmap__init() function commit 353120b48d4f61288e4745b0c8a191784b11c0f4 upstream Add perf_mmap__init() function to initialize 'struct perf_mmap' objects. Add it to a new mmap.c source file, that will carry all the mmap related functions. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/lib/Build | 1 + tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 9 +++++++++ tools/perf/util/evlist.c | 5 ++--- 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tools/perf/lib/mmap.c diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build index c31f1c111f8f..2ef9a4ec6d99 100644 --- a/tools/perf/lib/Build +++ b/tools/perf/lib/Build @@ -3,6 +3,7 @@ libperf-y += cpumap.o libperf-y += threadmap.o libperf-y += evsel.o libperf-y += evlist.o +libperf-y += mmap.o libperf-y += zalloc.o libperf-y += xyarray.o libperf-y += lib.o diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index ba1e519c15b9..e25890de6a55 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -29,4 +29,6 @@ struct perf_mmap { char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; +void perf_mmap__init(struct perf_mmap *map, bool overwrite); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c new file mode 100644 index 000000000000..3da6177510e6 --- /dev/null +++ b/tools/perf/lib/mmap.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +void perf_mmap__init(struct perf_mmap *map, bool overwrite) +{ + map->fd = -1; + map->overwrite = overwrite; + refcount_set(&map->refcnt, 0); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b110deb88425..e3de04e41e81 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -605,8 +605,6 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, return NULL; for (i = 0; i < evlist->core.nr_mmaps; i++) { - map[i].core.fd = -1; - map[i].core.overwrite = overwrite; /* * When the perf_mmap() call is made we grab one refcount, plus * one extra to let perf_mmap__consume() get the last @@ -616,8 +614,9 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - refcount_set(&map[i].core.refcnt, 0); + perf_mmap__init(&map[i].core, overwrite); } + return map; } -- Gitee From 8123d8830e300dce2bf36d7a3def7a3d609ae4de Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:10 +0200 Subject: [PATCH 11/77] libperf: Add 'struct perf_mmap_param' commit e440979faf6ac8048e1792af383df6af78dd1cb0 upstream Add libperf's version of mmap params 'struct perf_mmap_param' object with the basics: 'prot' and 'mask'. Encapsulate it in the current 'struct mmap_params' object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/lib/include/internal/mmap.h | 5 +++++ tools/perf/util/evlist.c | 14 +++++++++----- tools/perf/util/mmap.c | 4 ++-- tools/perf/util/mmap.h | 3 ++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index e25890de6a55..b26806b36bb6 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -29,6 +29,11 @@ struct perf_mmap { char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; +struct perf_mmap_param { + int prot; + int mask; +}; + void perf_mmap__init(struct perf_mmap *map, bool overwrite); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e3de04e41e81..430bfea8b3e5 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -643,7 +643,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, int fd; int cpu; - mp->prot = PROT_READ | PROT_WRITE; + mp->core.prot = PROT_READ | PROT_WRITE; if (evsel->core.attr.write_backward) { output = _output_overwrite; maps = evlist->overwrite_mmap; @@ -656,7 +656,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } - mp->prot &= ~PROT_WRITE; + mp->core.prot &= ~PROT_WRITE; } if (evsel->core.system_wide && thread) @@ -897,8 +897,12 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, - .comp_level = comp_level }; + struct mmap_params mp = { + .nr_cblocks = nr_cblocks, + .affinity = affinity, + .flush = flush, + .comp_level = comp_level + }; if (!evlist->mmap) evlist->mmap = evlist__alloc_mmap(evlist, false); @@ -910,7 +914,7 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, evlist->core.mmap_len = evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->core.mmap_len); - mp.mask = evlist->core.mmap_len - page_size - 1; + mp.core.mask = evlist->core.mmap_len - page_size - 1; auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a35dc57d5995..a496ced5ed2a 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -370,8 +370,8 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) */ refcount_set(&map->core.refcnt, 2); map->core.prev = 0; - map->core.mask = mp->mask; - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + map->core.mask = mp->core.mask; + map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->core.prot, MAP_SHARED, fd, 0); if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e567c1c875bd..4ff75d8aeb05 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -37,7 +37,8 @@ struct mmap { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity, flush, comp_level; + struct perf_mmap_param core; + int nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; -- Gitee From 40e8828f388cb9ddc281660ec0c4a7f8705e9efb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:11 +0200 Subject: [PATCH 12/77] libperf: Adopt perf_mmap__mmap_len() function from tools/perf commit bf59b3053e63783520c2810fc3f676553bc7eedd upstream Move perf_mmap__mmap_len() from tools/perf wto libperf, it will be used in the following patches. And rename the existing perf's function to mmap__mmap_len(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 4 ++-- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 6 ++++++ tools/perf/util/mmap.c | 20 ++++++++++---------- tools/perf/util/mmap.h | 2 +- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 997b03ba6dad..41b1c12e22ba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -277,7 +277,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size if (record__comp_enabled(aio->rec)) { size = zstd_compress(aio->rec->session, aio->data + aio->size, - perf_mmap__mmap_len(map) - aio->size, + mmap__mmap_len(map) - aio->size, buf, size); } else { memcpy(aio->data + aio->size, buf, size); @@ -489,7 +489,7 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); bf = map->data; } diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index b26806b36bb6..e7a67260940c 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -34,6 +34,8 @@ struct perf_mmap_param { int mask; }; +size_t perf_mmap__mmap_len(struct perf_mmap *map); + void perf_mmap__init(struct perf_mmap *map, bool overwrite); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 3da6177510e6..cc4284da4d99 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include void perf_mmap__init(struct perf_mmap *map, bool overwrite) { @@ -7,3 +8,8 @@ void perf_mmap__init(struct perf_mmap *map, bool overwrite) map->overwrite = overwrite; refcount_set(&map->refcnt, 0); } + +size_t perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a496ced5ed2a..a8e81c4cbae8 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -23,9 +23,9 @@ #include "../perf.h" #include /* page_size */ -size_t perf_mmap__mmap_len(struct mmap *map) +size_t mmap__mmap_len(struct mmap *map) { - return map->core.mask + 1 + page_size; + return perf_mmap__mmap_len(&map->core); } /* When check_messup is true, 'end' must points to a good entry */ @@ -170,7 +170,7 @@ static int perf_mmap__aio_enabled(struct mmap *map) #ifdef HAVE_LIBNUMA_SUPPORT static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->aio.data[idx] == MAP_FAILED) { map->aio.data[idx] = NULL; @@ -183,7 +183,7 @@ static int perf_mmap__aio_alloc(struct mmap *map, int idx) static void perf_mmap__aio_free(struct mmap *map, int idx) { if (map->aio.data[idx]) { - munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); + munmap(map->aio.data[idx], mmap__mmap_len(map)); map->aio.data[idx] = NULL; } } @@ -196,7 +196,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; - mmap_len = perf_mmap__mmap_len(map); + mmap_len = mmap__mmap_len(map); node_mask = 1UL << cpu__get_node(cpu); if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", @@ -210,7 +210,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); + map->aio.data[idx] = malloc(mmap__mmap_len(map)); if (map->aio.data[idx] == NULL) return -1; @@ -315,11 +315,11 @@ void perf_mmap__munmap(struct mmap *map) { perf_mmap__aio_munmap(map); if (map->data != NULL) { - munmap(map->data, perf_mmap__mmap_len(map)); + munmap(map->data, mmap__mmap_len(map)); map->data = NULL; } if (map->core.base != NULL) { - munmap(map->core.base, perf_mmap__mmap_len(map)); + munmap(map->core.base, mmap__mmap_len(map)); map->core.base = NULL; map->core.fd = -1; refcount_set(&map->core.refcnt, 0); @@ -371,7 +371,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) refcount_set(&map->core.refcnt, 2); map->core.prev = 0; map->core.mask = mp->core.mask; - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->core.prot, + map->core.base = mmap(NULL, mmap__mmap_len(map), mp->core.prot, MAP_SHARED, fd, 0); if (map->core.base == MAP_FAILED) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", @@ -389,7 +389,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) map->comp_level = mp->comp_level; if (map->comp_level && !perf_mmap__aio_enabled(map)) { - map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { pr_debug2("failed to mmap data buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 4ff75d8aeb05..2b97dc6d9ee2 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -67,7 +67,7 @@ union perf_event *perf_mmap__read_event(struct mmap *map); int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)); -size_t perf_mmap__mmap_len(struct mmap *map); +size_t mmap__mmap_len(struct mmap *map); int perf_mmap__read_init(struct mmap *md); void perf_mmap__read_done(struct mmap *map); -- Gitee From f9fbcadce444d9cc63ea6ab661d10c59bb80c576 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:12 +0200 Subject: [PATCH 13/77] libperf: Adopt perf_mmap__mmap() function from tools/perf commit 32c261c070c222858148c2171698d2954242ddd9 upstream Move perf_mmap__mmap() from tools/perf to libperf, it will be used in the following patches. And rename the existing perf's function to mmap__mmap(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/mmap.c | 18 ++++++++++++++++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 12 +++--------- tools/perf/util/mmap.h | 2 +- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index e7a67260940c..7067b70c6f61 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -37,5 +37,7 @@ struct perf_mmap_param { size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index cc4284da4d99..b216a7db857f 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -13,3 +14,20 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) { return map->mask + 1 + page_size; } + +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu) +{ + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { + map->base = NULL; + return -1; + } + + map->fd = fd; + map->cpu = cpu; + return 0; +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 430bfea8b3e5..3df73d2f4dfc 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -671,7 +671,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (*output == -1) { *output = fd; - if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) + if (mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a8e81c4cbae8..acef6e3f6b80 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -353,7 +353,7 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params CPU_SET(map->core.cpu, &map->affinity_mask); } -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { /* * The last one will be done at perf_mmap__consume(), so that we @@ -369,18 +369,12 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) * perf_evlist__filter_pollfd(). */ refcount_set(&map->core.refcnt, 2); - map->core.prev = 0; - map->core.mask = mp->core.mask; - map->core.base = mmap(NULL, mmap__mmap_len(map), mp->core.prot, - MAP_SHARED, fd, 0); - if (map->core.base == MAP_FAILED) { + + if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - map->core.base = NULL; return -1; } - map->core.fd = fd; - map->core.cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 2b97dc6d9ee2..a60e6ead7255 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -42,7 +42,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void perf_mmap__munmap(struct mmap *map); void perf_mmap__get(struct mmap *map); -- Gitee From e4c3023fe1337adb4c4b97baa1a032093f92502c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:13 +0200 Subject: [PATCH 14/77] libperf: Adopt perf_mmap__get() function from tools/perf commit e75710f063e29ae7715c57b45eb27c2d504b32ca upstream Move perf_mmap__get() from tools/perf to libperf in the internal header internal/mmap.h. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 2 +- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/lib/mmap.c | 5 +++++ tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 5 ----- tools/perf/util/mmap.h | 1 - 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 41b1c12e22ba..4973bee8a84f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -294,7 +294,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size * after started aio request completion or at record__aio_push() * if the request failed to start. */ - perf_mmap__get(map); + perf_mmap__get(&map->core); } aio->size += size; diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 7067b70c6f61..2e68974bffb4 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -39,5 +39,6 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); +void perf_mmap__get(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index b216a7db857f..b765e0505bb6 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -31,3 +31,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, map->cpu = cpu; return 0; } + +void perf_mmap__get(struct perf_mmap *map) +{ + refcount_inc(&map->refcnt); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3df73d2f4dfc..a40d42d79f5c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -677,7 +677,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) return -1; - perf_mmap__get(&maps[idx]); + perf_mmap__get(&maps[idx].core); } revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index acef6e3f6b80..be691b58d8ab 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -110,11 +110,6 @@ static bool perf_mmap__empty(struct mmap *map) return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } -void perf_mmap__get(struct mmap *map) -{ - refcount_inc(&map->core.refcnt); -} - void perf_mmap__put(struct mmap *map) { BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index a60e6ead7255..a73402ee8fe0 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,7 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void perf_mmap__munmap(struct mmap *map); -void perf_mmap__get(struct mmap *map); void perf_mmap__put(struct mmap *map); void perf_mmap__consume(struct mmap *map); -- Gitee From a4d4b4be17081262fcda9fbc4ac3032c80d589e7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:14 +0200 Subject: [PATCH 15/77] libperf: Adopt perf_mmap__unmap() function from tools/perf commit 59d7ea620b58fa7d107834a81528e3098f1c27b0 upstream Move perf_mmap__unmap() from tools/perf to libperf, to internal header internal/mmap.h. It will be used in the following patches. And rename the existing perf's function to mmap__munmap(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/lib/include/internal/mmap.h | 1 + tools/perf/lib/mmap.c | 10 ++++++++++ tools/perf/util/evlist.c | 4 ++-- tools/perf/util/mmap.c | 11 +++-------- tools/perf/util/mmap.h | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 2e68974bffb4..5c2ca9ab12cd 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -39,6 +39,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, bool overwrite); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); +void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index b765e0505bb6..6eb228d89206 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -32,6 +32,16 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, return 0; } +void perf_mmap__munmap(struct perf_mmap *map) +{ + if (map && map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + refcount_set(&map->refcnt, 0); + } +} + void perf_mmap__get(struct perf_mmap *map) { refcount_inc(&map->refcnt); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a40d42d79f5c..75711d2429b6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -577,11 +577,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) if (evlist->mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->mmap[i]); + mmap__munmap(&evlist->mmap[i]); if (evlist->overwrite_mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->overwrite_mmap[i]); + mmap__munmap(&evlist->overwrite_mmap[i]); } void evlist__munmap(struct evlist *evlist) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index be691b58d8ab..2c73b5bcf74e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -115,7 +115,7 @@ void perf_mmap__put(struct mmap *map) BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); if (refcount_dec_and_test(&map->core.refcnt)) - perf_mmap__munmap(map); + mmap__munmap(map); } void perf_mmap__consume(struct mmap *map) @@ -306,19 +306,14 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) } #endif -void perf_mmap__munmap(struct mmap *map) +void mmap__munmap(struct mmap *map) { + perf_mmap__munmap(&map->core); perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, mmap__mmap_len(map)); map->data = NULL; } - if (map->core.base != NULL) { - munmap(map->core.base, mmap__mmap_len(map)); - map->core.base = NULL; - map->core.fd = -1; - refcount_set(&map->core.refcnt, 0); - } auxtrace_mmap__munmap(&map->auxtrace_mmap); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index a73402ee8fe0..6a18b2990059 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -43,7 +43,7 @@ struct mmap_params { }; int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); -void perf_mmap__munmap(struct mmap *map); +void mmap__munmap(struct mmap *map); void perf_mmap__put(struct mmap *map); -- Gitee From 82aaa97b6d2702a787475f4ceba07df1f5e5bbf0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:15 +0200 Subject: [PATCH 16/77] libperf: Adopt perf_mmap__put() function from tools/perf commit 80e53d1148231d7d4fdc4cd89e5393616b33bf82 upstream Move perf_mmap__put() from tools/perf to libperf. Once perf_mmap__put() is moved, we need a way to call application related unmap code (AIO and aux related code for eprf), when the map goes away. Add the perf_mmap::unmap callback to do that. The unmap path from perf is: perf_mmap__put (libperf) perf_mmap__munmap (libperf) map->unmap_cb -> perf_mmap__unmap_cb (perf) mmap__munmap (perf) Committer notes: Add missing linux/kernel.h to tools/perf/lib/mmap.c to get the BUG_ON definition. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 4 ++-- tools/perf/lib/include/internal/mmap.h | 31 ++++++++++++++++---------- tools/perf/lib/mmap.c | 15 ++++++++++++- tools/perf/util/evlist.c | 17 +++++++++----- tools/perf/util/mmap.c | 11 +-------- tools/perf/util/mmap.h | 2 -- 6 files changed, 48 insertions(+), 32 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4973bee8a84f..077a0faa9b98 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -198,7 +198,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock) * every aio write request started in record__aio_push() so * decrement it because the request is now complete. */ - perf_mmap__put(md); + perf_mmap__put(&md->core); rc = 1; } else { /* @@ -333,7 +333,7 @@ static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) * map->refcount is decremented in record__aio_complete() after * aio write operation finishes successfully. */ - perf_mmap__put(map); + perf_mmap__put(&map->core); } return ret; diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index 5c2ca9ab12cd..bf9cc7d005ab 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -10,23 +10,28 @@ /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +struct perf_mmap; + +typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); + /** * struct perf_mmap - perf's ring buffer mmap details * * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ struct perf_mmap { - void *base; - int mask; - int fd; - int cpu; - refcount_t refcnt; - u64 prev; - u64 start; - u64 end; - bool overwrite; - u64 flush; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *base; + int mask; + int fd; + int cpu; + refcount_t refcnt; + u64 prev; + u64 start; + u64 end; + bool overwrite; + u64 flush; + libperf_unmap_cb_t unmap_cb; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); }; struct perf_mmap_param { @@ -36,10 +41,12 @@ struct perf_mmap_param { size_t perf_mmap__mmap_len(struct perf_mmap *map); -void perf_mmap__init(struct perf_mmap *map, bool overwrite); +void perf_mmap__init(struct perf_mmap *map, bool overwrite, + libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, int fd, int cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); +void perf_mmap__put(struct perf_mmap *map); #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 6eb228d89206..89c1e0e8b897 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -2,11 +2,14 @@ #include #include #include +#include -void perf_mmap__init(struct perf_mmap *map, bool overwrite) +void perf_mmap__init(struct perf_mmap *map, bool overwrite, + libperf_unmap_cb_t unmap_cb) { map->fd = -1; map->overwrite = overwrite; + map->unmap_cb = unmap_cb; refcount_set(&map->refcnt, 0); } @@ -40,9 +43,19 @@ void perf_mmap__munmap(struct perf_mmap *map) map->fd = -1; refcount_set(&map->refcnt, 0); } + if (map && map->unmap_cb) + map->unmap_cb(map); } void perf_mmap__get(struct perf_mmap *map) { refcount_inc(&map->refcnt); } + +void perf_mmap__put(struct perf_mmap *map) +{ + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + + if (refcount_dec_and_test(&map->refcnt)) + perf_mmap__munmap(map); +} diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 75711d2429b6..a0907b3d6800 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -409,7 +409,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, struct mmap *map = fda->priv[fd].ptr; if (map) - perf_mmap__put(map); + perf_mmap__put(&map->core); } int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) @@ -577,11 +577,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) if (evlist->mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - mmap__munmap(&evlist->mmap[i]); + perf_mmap__munmap(&evlist->mmap[i].core); if (evlist->overwrite_mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - mmap__munmap(&evlist->overwrite_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i].core); } void evlist__munmap(struct evlist *evlist) @@ -591,6 +591,13 @@ void evlist__munmap(struct evlist *evlist) zfree(&evlist->overwrite_mmap); } +static void perf_mmap__unmap_cb(struct perf_mmap *map) +{ + struct mmap *m = container_of(map, struct mmap, core); + + mmap__munmap(m); +} + static struct mmap *evlist__alloc_mmap(struct evlist *evlist, bool overwrite) { @@ -614,7 +621,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - perf_mmap__init(&map[i].core, overwrite); + perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb); } return map; @@ -691,7 +698,7 @@ static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, */ if (!evsel->core.system_wide && perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { - perf_mmap__put(&maps[idx]); + perf_mmap__put(&maps[idx].core); return -1; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2c73b5bcf74e..9f150d50cea5 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -110,14 +110,6 @@ static bool perf_mmap__empty(struct mmap *map) return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; } -void perf_mmap__put(struct mmap *map) -{ - BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); - - if (refcount_dec_and_test(&map->core.refcnt)) - mmap__munmap(map); -} - void perf_mmap__consume(struct mmap *map) { if (!map->core.overwrite) { @@ -127,7 +119,7 @@ void perf_mmap__consume(struct mmap *map) } if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(map); + perf_mmap__put(&map->core); } int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, @@ -308,7 +300,6 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) void mmap__munmap(struct mmap *map) { - perf_mmap__munmap(&map->core); perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, mmap__mmap_len(map)); diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 6a18b2990059..78e3c4436ce8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,8 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void mmap__munmap(struct mmap *map); -void perf_mmap__put(struct mmap *map); - void perf_mmap__consume(struct mmap *map); static inline u64 perf_mmap__read_head(struct mmap *mm) -- Gitee From 70413497d3bf544e22af0b506a0d08a82099029c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:16 +0200 Subject: [PATCH 17/77] perf tools: Use perf_mmap way to detect aux mmap commit 1d40ae4e1784bfa1646fd153ca022db21511284f upstream We will move this code to libperf shortly, so we need to free it of 'struct auxtrace_mmap' usage, because it won't be available in libperf (for now). The perf_event_mmap_page::aux_size is set when the aux mmap is mapped, so the check is equivalent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/util/mmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9f150d50cea5..f246dd403507 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -107,7 +107,9 @@ union perf_event *perf_mmap__read_event(struct mmap *map) static bool perf_mmap__empty(struct mmap *map) { - return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; + struct perf_event_mmap_page *pc = map->core.base; + + return perf_mmap__read_head(map) == map->core.prev && !pc->aux_size; } void perf_mmap__consume(struct mmap *map) -- Gitee From 0106ea8b395d3f4db8f6b26a19c38c4ee3a3dce7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:17 +0200 Subject: [PATCH 18/77] libperf: Adopt perf_mmap__consume() function from tools/perf commit 7728fa0cfaeb7d25b12c8865c733359cc8e5fb13 upstream Move perf_mmap__consume() vrom tools/perf to libperf and export it in the perf/mmap.h header. Move also the needed helpers perf_mmap__write_tail(), perf_mmap__read_head() and perf_mmap__empty(). Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 3 +- tools/perf/builtin-kvm.c | 5 +-- tools/perf/builtin-top.c | 3 +- tools/perf/builtin-trace.c | 3 +- tools/perf/lib/Makefile | 5 +-- tools/perf/lib/include/internal/mmap.h | 2 ++ tools/perf/lib/include/perf/mmap.h | 11 +++++++ tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 32 ++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/code-reading.c | 3 +- tools/perf/tests/keep-tracking.c | 3 +- tools/perf/tests/mmap-basic.c | 3 +- tools/perf/tests/openat-syscall-tp-fields.c | 3 +- tools/perf/tests/perf-record.c | 3 +- tools/perf/tests/sw-clock.c | 3 +- tools/perf/tests/switch-tracking.c | 3 +- tools/perf/tests/task-exit.c | 3 +- tools/perf/util/evlist.c | 3 +- tools/perf/util/mmap.c | 32 +++++--------------- tools/perf/util/mmap.h | 12 -------- tools/perf/util/python.c | 3 +- 23 files changed, 87 insertions(+), 54 deletions(-) create mode 100644 tools/perf/lib/include/perf/mmap.h diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index fa947952c16a..3397898824f6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -139,7 +140,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe comm2_time = sample.time; } next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 58a9e0989491..0c04c4c6c1eb 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -46,6 +46,7 @@ #include #include #include +#include static const char *get_filename_for_perf_kvm(void) { @@ -766,7 +767,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_mmap__read_event(md)) != NULL) { err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); pr_err("Failed to parse sample\n"); return -1; } @@ -776,7 +777,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) { pr_err("Failed to enqueue sample: %d\n", err); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 65fe4f5f5de2..1c380edb8c66 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -82,6 +82,7 @@ #include #include +#include static volatile int done; static volatile int resize; @@ -885,7 +886,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) if (ret) break; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (top->qe.rotate) { pthread_mutex_lock(&top->qe.mutex); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6052eb057821..56cc5fa99ff8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -77,6 +77,7 @@ #include #include +#include #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 @@ -3509,7 +3510,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err) goto out_disable; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (interrupted) goto out_disable; diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index 85ccb8c439a4..0889c9c3ec19 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -172,8 +172,9 @@ install_headers: $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index bf9cc7d005ab..ee536c4441bb 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -49,4 +49,6 @@ void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); +u64 perf_mmap__read_head(struct perf_mmap *map); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h new file mode 100644 index 000000000000..d3678d1834d9 --- /dev/null +++ b/tools/perf/lib/include/perf/mmap.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LIBPERF_MMAP_H +#define __LIBPERF_MMAP_H + +#include + +struct perf_mmap; + +LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); + +#endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index ab8dbde1136c..d7b327f224e2 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -40,6 +40,7 @@ LIBPERF_0.0.1 { perf_evlist__next; perf_evlist__set_maps; perf_evlist__poll; + perf_mmap__consume; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 89c1e0e8b897..4cada1c89fdb 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include #include #include @@ -59,3 +62,32 @@ void perf_mmap__put(struct perf_mmap *map) if (refcount_dec_and_test(&map->refcnt)) perf_mmap__munmap(map); } + +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +{ + ring_buffer_write_tail(md->base, tail); +} + +u64 perf_mmap__read_head(struct perf_mmap *map) +{ + return ring_buffer_read_head(map->base); +} + +static bool perf_mmap__empty(struct perf_mmap *map) +{ + struct perf_event_mmap_page *pc = map->base; + + return perf_mmap__read_head(map) == map->prev && !pc->aux_size; +} + +void perf_mmap__consume(struct perf_mmap *map) +{ + if (!map->overwrite) { + u64 old = map->prev; + + perf_mmap__write_tail(map, old); + } + + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + perf_mmap__put(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 5128f727c0ef..89aa98af3d63 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -13,6 +13,7 @@ #include "util/mmap.h" #include #include +#include #define NR_ITERS 111 diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 8669bb85e7c7..cd65469cd01f 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "tests.h" #include "llvm.h" #include "debug.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index f5764a3890b9..b5a57bb54c25 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "debug.h" #include "dso.h" @@ -430,7 +431,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, while ((event = perf_mmap__read_event(md)) != NULL) { ret = process_event(machine, evlist, event, state); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) return ret; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 92c7d591bcac..31c005e07b17 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -46,7 +47,7 @@ static int find_comm(struct evlist *evlist, const char *comm) (pid_t)event->comm.tid == getpid() && strcmp(event->comm.comm, comm) == 0) found += 1; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 3a22dce991ba..b176acc4f52e 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * This test will generate random numbers of calls to some getpid syscalls, @@ -139,7 +140,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse goto out_delete_evlist; } nr_events[evsel->idx]++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 2b5c46813053..bbf8ba320721 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -13,6 +13,7 @@ #include "debug.h" #include "util/mmap.h" #include +#include #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 @@ -103,7 +104,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest ++nr_events; if (type != PERF_RECORD_SAMPLE) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); continue; } diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 437426be29e9..6ebbcc65749e 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -6,6 +6,7 @@ #include #include +#include #include "evlist.h" #include "evsel.h" #include "debug.h" @@ -276,7 +277,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus ++errs; } - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 84519df87f30..1aeb558010c1 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -15,6 +15,7 @@ #include "util/mmap.h" #include "util/thread_map.h" #include +#include #define NR_LOOPS 10000000 @@ -117,7 +118,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) total_periods += sample.period; nr_samples++; next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index ffa592e0020e..55728b3da057 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "debug.h" #include "parse-events.h" @@ -275,7 +276,7 @@ static int process_events(struct evlist *evlist, while ((event = perf_mmap__read_event(md)) != NULL) { cnt += 1; ret = add_event(evlist, &events, event); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) goto out_free_nodes; } diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index d85c9f608564..a0f1deacfc62 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -12,6 +12,7 @@ #include #include #include +#include static int exited; static int nr_exit; @@ -126,7 +127,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (event->header.type == PERF_RECORD_EXIT) nr_exit++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } perf_mmap__read_done(md); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a0907b3d6800..1952f2b7ee45 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -1741,7 +1742,7 @@ static void *perf_evlist__poll_thread(void *arg) else pr_warning("cannot locate proper evsel for the side band event\n"); - perf_mmap__consume(map); + perf_mmap__consume(&map->core); got_data = true; } perf_mmap__read_done(map); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index f246dd403507..abe7cbe6c95f 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -13,6 +13,7 @@ #include #include #include // sysconf() +#include #ifdef HAVE_LIBNUMA_SUPPORT #include #endif @@ -95,7 +96,7 @@ union perf_event *perf_mmap__read_event(struct mmap *map) /* non-overwirte doesn't pause the ringbuffer */ if (!map->core.overwrite) - map->core.end = perf_mmap__read_head(map); + map->core.end = perf_mmap__read_head(&map->core); event = perf_mmap__read(map, &map->core.start, map->core.end); @@ -105,25 +106,6 @@ union perf_event *perf_mmap__read_event(struct mmap *map) return event; } -static bool perf_mmap__empty(struct mmap *map) -{ - struct perf_event_mmap_page *pc = map->core.base; - - return perf_mmap__read_head(map) == map->core.prev && !pc->aux_size; -} - -void perf_mmap__consume(struct mmap *map) -{ - if (!map->core.overwrite) { - u64 old = map->core.prev; - - perf_mmap__write_tail(map, old); - } - - if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(&map->core); -} - int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, struct auxtrace_mmap_params *mp __maybe_unused, void *userpg __maybe_unused, @@ -420,7 +402,7 @@ static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) */ static int __perf_mmap__read_init(struct mmap *md) { - u64 head = perf_mmap__read_head(md); + u64 head = perf_mmap__read_head(&md->core); u64 old = md->core.prev; unsigned char *data = md->core.base + page_size; unsigned long size; @@ -437,7 +419,7 @@ static int __perf_mmap__read_init(struct mmap *md) WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->core.prev = head; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); return -EAGAIN; } @@ -466,7 +448,7 @@ int perf_mmap__read_init(struct mmap *map) int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { - u64 head = perf_mmap__read_head(md); + u64 head = perf_mmap__read_head(&md->core); unsigned char *data = md->core.base + page_size; unsigned long size; void *buf; @@ -499,7 +481,7 @@ int perf_mmap__push(struct mmap *md, void *to, } md->core.prev = head; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); out: return rc; } @@ -518,5 +500,5 @@ void perf_mmap__read_done(struct mmap *map) if (!refcount_read(&map->core.refcnt)) return; - map->core.prev = perf_mmap__read_head(map); + map->core.prev = perf_mmap__read_head(&map->core); } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 78e3c4436ce8..89fb93267ff1 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -45,18 +45,6 @@ struct mmap_params { int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); void mmap__munmap(struct mmap *map); -void perf_mmap__consume(struct mmap *map); - -static inline u64 perf_mmap__read_head(struct mmap *mm) -{ - return ring_buffer_read_head(mm->core.base); -} - -static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) -{ - ring_buffer_write_tail(md->core.base, tail); -} - union perf_event *perf_mmap__read_forward(struct mmap *map); union perf_event *perf_mmap__read_event(struct mmap *map); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 02460362256d..82a4fa6c87bd 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "evlist.h" #include "callchain.h" #include "evsel.h" @@ -1045,7 +1046,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, err = perf_evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) return PyErr_Format(PyExc_OSError, -- Gitee From 0c7e54659b1a2b889adbc0d6bba008b8c30efb61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:18 +0200 Subject: [PATCH 19/77] libperf: Adopt perf_mmap__read_init() from tools/perf commit 7c4d41824f9afc659ba425a41018546531cffd72 upstream Move perf_mmap__read_init() from tools/perf to libperf and export it in perf/mmap.h header. And add pr_debug2()/pr_debug3() macros support, because the code is using them. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/core.h | 2 + tools/perf/lib/include/perf/mmap.h | 1 + tools/perf/lib/internal.h | 2 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 84 ++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 82 +------------------ tools/perf/util/mmap.h | 1 - tools/perf/util/python.c | 2 +- 23 files changed, 107 insertions(+), 98 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 3397898824f6..6a0c3ff78e01 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -118,7 +118,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0c04c4c6c1eb..b6a8078dd446 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -760,7 +760,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, *mmap_time = ULLONG_MAX; md = &evlist->mmap[idx]; - err = perf_mmap__read_init(md); + err = perf_mmap__read_init(&md->core); if (err < 0) return (err == -EAGAIN) ? 0 : -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1c380edb8c66..1f0785d9b583 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -872,7 +872,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) union perf_event *event; md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) return; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 56cc5fa99ff8..416d93dee556 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3500,7 +3500,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index cfd70e720c1c..2a80e4b6f819 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -12,6 +12,8 @@ enum libperf_print_level { LIBPERF_WARN, LIBPERF_INFO, LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, }; typedef int (*libperf_print_fn_t)(enum libperf_print_level level, diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index d3678d1834d9..646e9052b003 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -7,5 +7,6 @@ struct perf_mmap; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); +LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h index dc92f241732e..37db745e1502 100644 --- a/tools/perf/lib/internal.h +++ b/tools/perf/lib/internal.h @@ -14,5 +14,7 @@ do { \ #define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__) #define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__) +#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__) +#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__) #endif /* __LIBPERF_INTERNAL_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index d7b327f224e2..bc3fbb213a3e 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -41,6 +41,7 @@ LIBPERF_0.0.1 { perf_evlist__set_maps; perf_evlist__poll; perf_mmap__consume; + perf_mmap__read_init; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 4cada1c89fdb..fdbc6c550dea 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include #include #include #include #include #include +#include "internal.h" void perf_mmap__init(struct perf_mmap *map, bool overwrite, libperf_unmap_cb_t unmap_cb) @@ -91,3 +95,83 @@ void perf_mmap__consume(struct perf_mmap *map) if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) perf_mmap__put(map); } + +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = *start; + int size = mask + 1; + + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); + while (true) { + if (evt_head - *start >= (unsigned int)size) { + pr_debug("Finished reading overwrite ring buffer: rewind\n"); + if (evt_head - *start > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finished reading overwrite ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +/* + * Report the start and end of the available data in ringbuffer + */ +static int __perf_mmap__read_init(struct perf_mmap *md) +{ + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + + md->start = md->overwrite ? head : old; + md->end = md->overwrite ? old : head; + + if ((md->end - md->start) < md->flush) + return -EAGAIN; + + size = md->end - md->start; + if (size > (unsigned long)(md->mask) + 1) { + if (!md->overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md); + return -EAGAIN; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) + return -EINVAL; + } + + return 0; +} + +int perf_mmap__read_init(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return -ENOENT; + + return __perf_mmap__read_init(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 89aa98af3d63..7ea60b6df366 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -38,7 +38,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; - perf_mmap__read_init(map); + perf_mmap__read_init(&map->core); while ((event = perf_mmap__read_event(map)) != NULL) { const u32 type = event->header.type; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index cd65469cd01f..f871d817cdeb 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -186,7 +186,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index b5a57bb54c25..cf992e0b27ff 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -426,7 +426,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 31c005e07b17..e85da7e77269 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -39,7 +39,7 @@ static int find_comm(struct evlist *evlist, const char *comm) found = 0; for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { if (event->header.type == PERF_RECORD_COMM && diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index b176acc4f52e..77f42f0ac15d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -114,7 +114,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index bbf8ba320721..d6a563120d93 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -93,7 +93,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6ebbcc65749e..2587cb8b2c0f 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -171,7 +171,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aeb558010c1..808669507c30 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -100,7 +100,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) evlist__disable(evlist); md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 55728b3da057..bedfdec34972 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -270,7 +270,7 @@ static int process_events(struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index a0f1deacfc62..e743df610880 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -120,7 +120,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused retry: md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; while ((event = perf_mmap__read_event(md)) != NULL) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1952f2b7ee45..5dd30b7e4597 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1732,7 +1732,7 @@ static void *perf_evlist__poll_thread(void *arg) struct mmap *map = &evlist->mmap[i]; union perf_event *event; - if (perf_mmap__read_init(map)) + if (perf_mmap__read_init(&map->core)) continue; while ((event = perf_mmap__read_event(map)) != NULL) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index abe7cbe6c95f..59379118c2f1 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -365,86 +365,6 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) return perf_mmap__aio_mmap(map, mp); } -static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) -{ - struct perf_event_header *pheader; - u64 evt_head = *start; - int size = mask + 1; - - pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); - pheader = (struct perf_event_header *)(buf + (*start & mask)); - while (true) { - if (evt_head - *start >= (unsigned int)size) { - pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - *start > (unsigned int)size) - evt_head -= pheader->size; - *end = evt_head; - return 0; - } - - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); - - if (pheader->size == 0) { - pr_debug("Finished reading overwrite ring buffer: get start\n"); - *end = evt_head; - return 0; - } - - evt_head += pheader->size; - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); - } - WARN_ONCE(1, "Shouldn't get here\n"); - return -1; -} - -/* - * Report the start and end of the available data in ringbuffer - */ -static int __perf_mmap__read_init(struct mmap *md) -{ - u64 head = perf_mmap__read_head(&md->core); - u64 old = md->core.prev; - unsigned char *data = md->core.base + page_size; - unsigned long size; - - md->core.start = md->core.overwrite ? head : old; - md->core.end = md->core.overwrite ? old : head; - - if ((md->core.end - md->core.start) < md->core.flush) - return -EAGAIN; - - size = md->core.end - md->core.start; - if (size > (unsigned long)(md->core.mask) + 1) { - if (!md->core.overwrite) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - - md->core.prev = head; - perf_mmap__consume(&md->core); - return -EAGAIN; - } - - /* - * Backward ring buffer is full. We still have a chance to read - * most of data from it. - */ - if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) - return -EINVAL; - } - - return 0; -} - -int perf_mmap__read_init(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return -ENOENT; - - return __perf_mmap__read_init(map); -} - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { @@ -454,7 +374,7 @@ int perf_mmap__push(struct mmap *md, void *to, void *buf; int rc = 0; - rc = perf_mmap__read_init(md); + rc = perf_mmap__read_init(&md->core); if (rc < 0) return (rc == -EAGAIN) ? 1 : -1; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 89fb93267ff1..6d818ef51f05 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -54,6 +54,5 @@ int perf_mmap__push(struct mmap *md, void *to, size_t mmap__mmap_len(struct mmap *map); -int perf_mmap__read_init(struct mmap *md); void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 82a4fa6c87bd..64eec2a239d4 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1023,7 +1023,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (!md) return NULL; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto end; event = perf_mmap__read_event(md); -- Gitee From 2a1fb42e5caed37a9c4f8846d07e9bb49bffcd6b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:19 +0200 Subject: [PATCH 20/77] libperf: Adopt perf_mmap__read_done() from tools/perf commit 32fdc2ca7e2ae8ae5d0ff660ca7783acd8ee6396 upstream Move perf_mmap__read_init() from tools/perf to libperf and export it in the perf/mmap.h header. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/mmap.h | 1 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 17 +++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 17 ----------------- tools/perf/util/mmap.h | 1 - 20 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 6a0c3ff78e01..c90d925f7ae6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -142,7 +142,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe next_event: perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (!comm1_time || !comm2_time) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index b6a8078dd446..4c087a8c9fed 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -794,7 +794,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, break; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); return n; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1f0785d9b583..66e18bae2a22 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -896,7 +896,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } static void perf_top__mmap_read(struct perf_top *top) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 416d93dee556..d631847072a5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3520,7 +3520,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) draining = true; } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (trace->nr_events == before) { diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index 646e9052b003..4f946e7f724b 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -8,5 +8,6 @@ struct perf_mmap; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); +LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index bc3fbb213a3e..7e3ea2e9c917 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -42,6 +42,7 @@ LIBPERF_0.0.1 { perf_evlist__poll; perf_mmap__consume; perf_mmap__read_init; + perf_mmap__read_done; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index fdbc6c550dea..97297cba44e3 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -175,3 +175,20 @@ int perf_mmap__read_init(struct perf_mmap *map) return __perf_mmap__read_init(map); } + +/* + * Mandatory for overwrite mode + * The direction of overwrite mode is backward. + * The last perf_mmap__read() will set tail to map->core.prev. + * Need to correct the map->core.prev to head which is the end of next read. + */ +void perf_mmap__read_done(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return; + + map->prev = perf_mmap__read_head(map); +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 7ea60b6df366..917ef9e21757 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -54,7 +54,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, return TEST_FAIL; } } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } return TEST_OK; } diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index f871d817cdeb..fd5b7eb480f8 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -195,7 +195,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), if (type == PERF_RECORD_SAMPLE) count ++; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (count != expect) { diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index cf992e0b27ff..9947cda29bad 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -435,7 +435,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, if (ret < 0) return ret; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return 0; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e85da7e77269..e950907f6f57 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -49,7 +49,7 @@ static int find_comm(struct evlist *evlist, const char *comm) found += 1; perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return found; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 77f42f0ac15d..bb15d405a42c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -142,7 +142,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse nr_events[evsel->idx]++; perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: err = 0; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index d6a563120d93..c95eb1bbf396 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -124,7 +124,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_ok; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (nr_events == before) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2587cb8b2c0f..92a53be3b32b 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -279,7 +279,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } /* diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 808669507c30..ace20921ad55 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -120,7 +120,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) next_event: perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if ((u64) nr_samples == total_periods) { diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index bedfdec34972..8400fb17c170 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -280,7 +280,7 @@ static int process_events(struct evlist *evlist, if (ret < 0) goto out_free_nodes; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } events_array = calloc(cnt, sizeof(struct event_node)); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index e743df610880..270dc8b46909 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -129,7 +129,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if (!exited || !nr_exit) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5dd30b7e4597..776ab9d3c18f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1745,7 +1745,7 @@ static void *perf_evlist__poll_thread(void *arg) perf_mmap__consume(&map->core); got_data = true; } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } if (draining && !got_data) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 59379118c2f1..2dedef9b06fd 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -405,20 +405,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -/* - * Mandatory for overwrite mode - * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->core.prev. - * Need to correct the map->core.prev to head which is the end of next read. - */ -void perf_mmap__read_done(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return; - - map->core.prev = perf_mmap__read_head(&map->core); -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 6d818ef51f05..0b15702be1a5 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -54,5 +54,4 @@ int perf_mmap__push(struct mmap *md, void *to, size_t mmap__mmap_len(struct mmap *map); -void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ -- Gitee From 9d7838e568a58a748d8ba259ed7454365dc1b9ce Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Oct 2019 14:53:20 +0200 Subject: [PATCH 21/77] libperf: Adopt perf_mmap__read_event() from tools/perf commit 151ed5d70da87720022e4171227733a008b3c719 upstream Move perf_mmap__read_event() from tools/perf to libperf and export it in the perf/mmap.h header. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20191007125344.14268-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: PvsNarasimha Signed-off-by: mohanasv2 --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/lib/include/perf/mmap.h | 2 + tools/perf/lib/libperf.map | 1 + tools/perf/lib/mmap.c | 79 ++++++++++++++++++++ tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 77 ------------------- tools/perf/util/mmap.h | 2 - tools/perf/util/python.c | 2 +- 21 files changed, 98 insertions(+), 95 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index c90d925f7ae6..909ead08a6f6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -121,7 +121,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_COMM || diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4c087a8c9fed..858da896b518 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, if (err < 0) return (err == -EAGAIN) ? 0 : -1; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { perf_mmap__consume(&md->core); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 66e18bae2a22..8bd526b8c143 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -875,7 +875,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) if (perf_mmap__read_init(&md->core) < 0) return; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { int ret; ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d631847072a5..d196b287a801 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3503,7 +3503,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ++trace->nr_events; err = trace__deliver_event(trace, event); diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h index 4f946e7f724b..9508ad90d8b9 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/perf/lib/include/perf/mmap.h @@ -5,9 +5,11 @@ #include struct perf_mmap; +union perf_event; LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); +LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); #endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index 7e3ea2e9c917..8bb0d73e0c6c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -43,6 +43,7 @@ LIBPERF_0.0.1 { perf_mmap__consume; perf_mmap__read_init; perf_mmap__read_done; + perf_mmap__read_event; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c index 97297cba44e3..0752c193b0fb 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/perf/lib/mmap.c @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -192,3 +194,80 @@ void perf_mmap__read_done(struct perf_mmap *map) map->prev = perf_mmap__read_head(map); } + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event *perf_mmap__read(struct perf_mmap *map, + u64 *startp, u64 end) +{ + unsigned char *data = map->base + page_size; + union perf_event *event = NULL; + int diff = end - *startp; + + if (diff >= (int)sizeof(event->header)) { + size_t size; + + event = (union perf_event *)&data[*startp & map->mask]; + size = event->header.size; + + if (size < sizeof(event->header) || diff < (int)size) + return NULL; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + unsigned int offset = *startp; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = map->event_copy; + + do { + cpy = min(map->mask + 1 - (offset & map->mask), len); + memcpy(dst, &data[offset & map->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = (union perf_event *)map->event_copy; + } + + *startp += size; + } + + return event; +} + +/* + * Read event from ring buffer one by one. + * Return one event for each call. + * + * Usage: + * perf_mmap__read_init() + * while(event = perf_mmap__read_event()) { + * //process the event + * perf_mmap__consume() + * } + * perf_mmap__read_done() + */ +union perf_event *perf_mmap__read_event(struct perf_mmap *map) +{ + union perf_event *event; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + /* non-overwirte doesn't pause the ringbuffer */ + if (!map->overwrite) + map->end = perf_mmap__read_head(map); + + event = perf_mmap__read(map, &map->start, map->end); + + if (!map->overwrite) + map->prev = map->start; + + return event; +} diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 917ef9e21757..15cea518f5ad 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -39,7 +39,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, union perf_event *event; perf_mmap__read_init(&map->core); - while ((event = perf_mmap__read_event(map)) != NULL) { + while ((event = perf_mmap__read_event(&map->core)) != NULL) { const u32 type = event->header.type; switch (type) { diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fd5b7eb480f8..a439868738bd 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -189,7 +189,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; if (type == PERF_RECORD_SAMPLE) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 9947cda29bad..1f017e1b2a55 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -429,7 +429,7 @@ static int process_events(struct machine *machine, struct evlist *evlist, if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ret = process_event(machine, evlist, event, state); perf_mmap__consume(&md->core); if (ret < 0) diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e950907f6f57..50a0c9fcde7d 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -41,7 +41,7 @@ static int find_comm(struct evlist *evlist, const char *comm) md = &evlist->mmap[i]; if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_COMM && (pid_t)event->comm.pid == getpid() && (pid_t)event->comm.tid == getpid() && diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bb15d405a42c..5f4c0dbb4715 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -117,7 +117,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) { diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index c95eb1bbf396..c6b2d7aab608 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -96,7 +96,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; int tp_flags; struct perf_sample sample; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 92a53be3b32b..2195fc205e72 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -174,7 +174,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; const char *name = perf_event__name(type); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index ace20921ad55..bfb9986093d8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -103,7 +103,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8400fb17c170..fcb0d03dba4e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -273,7 +273,7 @@ static int process_events(struct evlist *evlist, if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { cnt += 1; ret = add_event(evlist, &events, event); perf_mmap__consume(&md->core); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 270dc8b46909..adaff9044331 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -123,7 +123,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_EXIT) nr_exit++; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 776ab9d3c18f..4a4928b7244b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1734,7 +1734,7 @@ static void *perf_evlist__poll_thread(void *arg) if (perf_mmap__read_init(&map->core)) continue; - while ((event = perf_mmap__read_event(map)) != NULL) { + while ((event = perf_mmap__read_event(&map->core)) != NULL) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); if (evsel && evsel->side_band.cb) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2dedef9b06fd..2a8bf0ab861c 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -29,83 +29,6 @@ size_t mmap__mmap_len(struct mmap *map) return perf_mmap__mmap_len(&map->core); } -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct mmap *map, - u64 *startp, u64 end) -{ - unsigned char *data = map->core.base + page_size; - union perf_event *event = NULL; - int diff = end - *startp; - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[*startp & map->core.mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) - return NULL; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { - unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->core.event_copy; - - do { - cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); - memcpy(dst, &data[offset & map->core.mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *)map->core.event_copy; - } - - *startp += size; - } - - return event; -} - -/* - * Read event from ring buffer one by one. - * Return one event for each call. - * - * Usage: - * perf_mmap__read_init() - * while(event = perf_mmap__read_event()) { - * //process the event - * perf_mmap__consume() - * } - * perf_mmap__read_done() - */ -union perf_event *perf_mmap__read_event(struct mmap *map) -{ - union perf_event *event; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return NULL; - - /* non-overwirte doesn't pause the ringbuffer */ - if (!map->core.overwrite) - map->core.end = perf_mmap__read_head(&map->core); - - event = perf_mmap__read(map, &map->core.start, map->core.end); - - if (!map->core.overwrite) - map->core.prev = map->core.start; - - return event; -} - int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, struct auxtrace_mmap_params *mp __maybe_unused, void *userpg __maybe_unused, diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 0b15702be1a5..bee4e83f7109 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -47,8 +47,6 @@ void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); -union perf_event *perf_mmap__read_event(struct mmap *map); - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 64eec2a239d4..25118605f3f8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1026,7 +1026,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (perf_mmap__read_init(&md->core) < 0) goto end; - event = perf_mmap__read_event(md); + event = perf_mmap__read_event(&md->core); if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; -- Gitee From d0ddf135a5667e52497db8b02bf984873f7955d5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 May 2020 12:18:21 -0300 Subject: [PATCH 22/77] perf evlist: Move the sideband thread routines to separate object commit 9a39994467d493eba38d8f69e42fd9c31cb1da9a upstream To avoid dragging more stuff into the perf python binding in the following csets. Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/Build | 1 + tools/perf/util/evlist.c | 117 ---------------------------- tools/perf/util/sideband_evlist.c | 125 ++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 117 deletions(-) create mode 100644 tools/perf/util/sideband_evlist.c diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 013ec02ad872..2b198f645e46 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -9,6 +9,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o perf-y += perf_event_attr_fprintf.o diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4a4928b7244b..4e3a4f352619 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1682,120 +1682,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } - -int perf_evlist__add_sb_event(struct evlist *evlist, - struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, - void *data) -{ - struct evsel *evsel; - - if (!attr->sample_id_all) { - pr_warning("enabling sample_id_all for all side band events\n"); - attr->sample_id_all = 1; - } - - evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); - if (!evsel) - return -1; - - evsel->side_band.cb = cb; - evsel->side_band.data = data; - evlist__add(evlist, evsel); - return 0; -} - -static void *perf_evlist__poll_thread(void *arg) -{ - struct evlist *evlist = arg; - bool draining = false; - int i, done = 0; - /* - * In order to read symbols from other namespaces perf to needs to call - * setns(2). This isn't permitted if the struct_fs has multiple users. - * unshare(2) the fs so that we may continue to setns into namespaces - * that we're observing when, for instance, reading the build-ids at - * the end of a 'perf record' session. - */ - unshare(CLONE_FS); - - while (!done) { - bool got_data = false; - - if (evlist->thread.done) - draining = true; - - if (!draining) - evlist__poll(evlist, 1000); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - struct mmap *map = &evlist->mmap[i]; - union perf_event *event; - - if (perf_mmap__read_init(&map->core)) - continue; - while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); - - if (evsel && evsel->side_band.cb) - evsel->side_band.cb(event, evsel->side_band.data); - else - pr_warning("cannot locate proper evsel for the side band event\n"); - - perf_mmap__consume(&map->core); - got_data = true; - } - perf_mmap__read_done(&map->core); - } - - if (draining && !got_data) - break; - } - return NULL; -} - -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target) -{ - struct evsel *counter; - - if (!evlist) - return 0; - - if (perf_evlist__create_maps(evlist, target)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__open(counter, evlist->core.cpus, - evlist->core.threads) < 0) - goto out_delete_evlist; - } - - if (evlist__mmap(evlist, UINT_MAX)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__enable(counter)) - goto out_delete_evlist; - } - - evlist->thread.done = 0; - if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) - goto out_delete_evlist; - - return 0; - -out_delete_evlist: - evlist__delete(evlist); - evlist = NULL; - return -1; -} - -void perf_evlist__stop_sb_thread(struct evlist *evlist) -{ - if (!evlist) - return; - evlist->thread.done = 1; - pthread_join(evlist->thread.th, NULL); - evlist__delete(evlist); -} diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c new file mode 100644 index 000000000000..073d201bb6ea --- /dev/null +++ b/tools/perf/util/sideband_evlist.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/mmap.h" +#include +#include +#include +#include +#include +#include + +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + perf_evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); + if (!evsel) + return -1; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + evlist__add(evlist, evsel); + return 0; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct evlist *evlist = arg; + bool draining = false; + int i, done = 0; + /* + * In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing when, for instance, reading the build-ids at + * the end of a 'perf record' session. + */ + unshare(CLONE_FS); + + while (!done) { + bool got_data = false; + + if (evlist->thread.done) + draining = true; + + if (!draining) + evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(&map->core)) + continue; + while ((event = perf_mmap__read_event(&map->core)) != NULL) { + struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(&map->core); + got_data = true; + } + perf_mmap__read_done(&map->core); + } + + if (draining && !got_data) + break; + } + return NULL; +} + +int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +{ + struct evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0) + goto out_delete_evlist; + } + + if (evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + evlist__delete(evlist); +} -- Gitee From fffc12928c5b4ddaf764c3c0649d4c39e1e5614b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 30 Sep 2019 17:36:23 -0700 Subject: [PATCH 23/77] perf tools: Avoid 'sample_reg_masks' being const + weak commit 42466b9f29b415c254dc4c2f4618e2a96951a406 upstream Being const + weak breaks with some compilers that constant-propagate from the weak symbol. This behavior is outside of the specification, but in LLVM is chosen to match GCC's behavior. LLVM's implementation was set in this patch: https://github.com/llvm/llvm-project/commit/f49573d1eedcf1e44893d5a062ac1b72c8419646 A const + weak symbol is set to be weak_odr: https://llvm.org/docs/LangRef.html ODR is one definition rule, and given there is one constant definition constant-propagation is possible. It is possible to get this code to miscompile with LLVM when applying link time optimization. As compilers become more aggressive, this is likely to break in more instances. Move the definition of sample_reg_masks to the conditional part of perf_regs.h and guard usage with HAVE_PERF_REGS_SUPPORT. This avoids the weak symbol. Fix an issue when HAVE_PERF_REGS_SUPPORT isn't defined from patch v1. In v3, add perf_regs.c for architectures that HAVE_PERF_REGS_SUPPORT but don't declare sample_regs_masks. Further notes: Jiri asked: "Is this just a precaution or you actualy saw some breakage?" Ian answered: "We saw a breakage with clang with thinlto enabled for linking. Our compiler team had recently seen, and were surprised by, a similar issue and were able to dig out the weak ODR issue." Signed-off-by: Ian Rogers Reviewed-by: Nick Desaulniers Acked-by: Jiri Olsa Cc: Albert Ou Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: clang-built-linux@googlegroups.com Cc: Guo Ren Cc: Kan Liang Cc: linux-riscv@lists.infradead.org Cc: Mao Han Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20191001003623.255186-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/arch/arm/util/Build | 2 ++ tools/perf/arch/arm/util/perf_regs.c | 6 ++++++ tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/perf_regs.c | 6 ++++++ tools/perf/arch/csky/util/Build | 2 ++ tools/perf/arch/csky/util/perf_regs.c | 6 ++++++ tools/perf/arch/riscv/util/Build | 2 ++ tools/perf/arch/riscv/util/perf_regs.c | 6 ++++++ tools/perf/arch/s390/util/Build | 1 + tools/perf/arch/s390/util/perf_regs.c | 6 ++++++ tools/perf/util/parse-regs-options.c | 8 ++++++-- tools/perf/util/perf_regs.c | 4 ---- tools/perf/util/perf_regs.h | 4 ++-- 13 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 tools/perf/arch/arm/util/perf_regs.c create mode 100644 tools/perf/arch/arm64/util/perf_regs.c create mode 100644 tools/perf/arch/csky/util/perf_regs.c create mode 100644 tools/perf/arch/riscv/util/perf_regs.c create mode 100644 tools/perf/arch/s390/util/perf_regs.c diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index 296f0eac5e18..37fc63708966 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,3 +1,5 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 4ed1ebdc5dc0..264ac3c74408 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ perf-y += header.o +perf-y += perf_regs.o perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/csky/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/riscv/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 22797f043b84..3d9d0f4f72ca 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,6 @@ perf-y += header.o perf-y += kvm-stat.o +perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/s390/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index 869ef7e22bd9..a4a100425b3a 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -13,7 +13,7 @@ static int __parse_regs(const struct option *opt, const char *str, int unset, bool intr) { uint64_t *mode = (uint64_t *)opt->value; - const struct sample_reg *r; + const struct sample_reg *r = NULL; char *s, *os = NULL, *p; int ret = -1; uint64_t mask; @@ -46,19 +46,23 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if (r->mask & mask) fprintf(stderr, "%s ", r->name); } +#endif fputc('\n', stderr); /* just printing available regs */ goto error; } +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } - if (!r->name) { +#endif + if (!r || !r->name) { ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", s, intr ? "-I" : "--user-regs="); goto error; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 2774cec1f15f..5ee47ae1509c 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -3,10 +3,6 @@ #include "perf_regs.h" #include "event.h" -const struct sample_reg __weak sample_reg_masks[] = { - SMPL_REG_END -}; - int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index ec7640cc4c91..a45499126184 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,8 +15,6 @@ struct sample_reg { #define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } -extern const struct sample_reg sample_reg_masks[]; - enum { SDT_ARG_VALID = 0, SDT_ARG_SKIP, @@ -27,6 +25,8 @@ uint64_t arch__intr_reg_mask(void); uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT +extern const struct sample_reg sample_reg_masks[]; + #include #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) -- Gitee From 12c4c9942ca097f6c685fdb506ac24c872dc905c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 5 Mar 2020 23:11:08 -0800 Subject: [PATCH 24/77] tools: Fix off-by 1 relative directory includes commit 441b62acd9c809e87bab45ad1d82b1b3b77cb4f0 upstream This is currently working due to extra include paths in the build. Committer testing: $ cd tools/include/uapi/asm/ Before this patch: $ ls -la ../../arch/x86/include/uapi/asm/errno.h ls: cannot access '../../arch/x86/include/uapi/asm/errno.h': No such file or directory $ After this patch; $ ls -la ../../../arch/x86/include/uapi/asm/errno.h -rw-rw-r--. 1 acme acme 31 Feb 20 12:42 ../../../arch/x86/include/uapi/asm/errno.h $ Check that that is still under tools/, i.e. hasn't escaped into the main kernel sources: $ cd ../../../arch/x86/include/uapi/asm/ $ pwd /home/acme/git/perf/tools/arch/x86/include/uapi/asm $ [Backport changes] 1.In tools/perf/arch/x86/util/intel-pt.c, the header file #include "../../util/evsel_config.h" is not present in the current kernel version. Including it would require backporting over 20 additional dependencies, which is not feasible at this stage. Therefore, this header inclusion has been intentionally omitted to avoid introducing complex dependency chains. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexios Zavras Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Igor Lubashev Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wei Li Link: http://lore.kernel.org/lkml/20200306071110.130202-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/include/uapi/asm/errno.h | 14 ++++++------ tools/perf/arch/arm64/util/arm-spe.c | 20 ++++++++--------- tools/perf/arch/arm64/util/perf_regs.c | 2 +- tools/perf/arch/powerpc/util/perf_regs.c | 4 ++-- tools/perf/arch/x86/util/auxtrace.c | 14 ++++++------ tools/perf/arch/x86/util/event.c | 12 +++++----- tools/perf/arch/x86/util/header.c | 4 ++-- tools/perf/arch/x86/util/intel-bts.c | 24 ++++++++++---------- tools/perf/arch/x86/util/intel-pt.c | 28 ++++++++++++------------ tools/perf/arch/x86/util/machine.c | 6 ++--- tools/perf/arch/x86/util/perf_regs.c | 8 +++---- tools/perf/arch/x86/util/pmu.c | 6 ++--- 12 files changed, 71 insertions(+), 71 deletions(-) diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index ce3c5945a1c4..637189ec1ab9 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -1,18 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #if defined(__i386__) || defined(__x86_64__) -#include "../../arch/x86/include/uapi/asm/errno.h" +#include "../../../arch/x86/include/uapi/asm/errno.h" #elif defined(__powerpc__) -#include "../../arch/powerpc/include/uapi/asm/errno.h" +#include "../../../arch/powerpc/include/uapi/asm/errno.h" #elif defined(__sparc__) -#include "../../arch/sparc/include/uapi/asm/errno.h" +#include "../../../arch/sparc/include/uapi/asm/errno.h" #elif defined(__alpha__) -#include "../../arch/alpha/include/uapi/asm/errno.h" +#include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) -#include "../../arch/mips/include/uapi/asm/errno.h" +#include "../../../arch/mips/include/uapi/asm/errno.h" #elif defined(__ia64__) -#include "../../arch/ia64/include/uapi/asm/errno.h" +#include "../../../arch/ia64/include/uapi/asm/errno.h" #elif defined(__xtensa__) -#include "../../arch/xtensa/include/uapi/asm/errno.h" +#include "../../../arch/xtensa/include/uapi/asm/errno.h" #else #include #endif diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 8d6821d9c3f6..27653be24447 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -11,17 +11,17 @@ #include #include -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/session.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/session.h" #include // page_size -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/arm-spe.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/arm-spe.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2864e2e3776d..2833e101a7c6 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/perf_regs.h" +#include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG_END diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index e9c436eeffc9..0a5242900248 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -4,8 +4,8 @@ #include #include -#include "../../util/perf_regs.h" -#include "../../util/debug.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" #include diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 96f4a2c11893..330d03216b0e 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -7,13 +7,13 @@ #include #include -#include "../../util/header.h" -#include "../../util/debug.h" -#include "../../util/pmu.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/evlist.h" +#include "../../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/evlist.h" static struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index d357c625c09f..91d938e88b42 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,12 +3,12 @@ #include #include -#include "../../util/event.h" -#include "../../util/synthetic-events.h" -#include "../../util/machine.h" -#include "../../util/tool.h" -#include "../../util/map.h" -#include "../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index aa6deb463bf3..578c8c568ffd 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -7,8 +7,8 @@ #include #include -#include "../../util/debug.h" -#include "../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/header.h" static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 7d22dbdf0f1d..c7cc05dd2852 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -11,18 +11,18 @@ #include #include -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/mmap.h" -#include "../../util/session.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/record.h" -#include "../../util/tsc.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-bts.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/tsc.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-bts.h" #include // page_size #define KiB(x) ((x) * 1024) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 761140ff14f2..00356704d23d 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,22 +13,22 @@ #include #include -#include "../../util/session.h" -#include "../../util/event.h" -#include "../../util/evlist.h" -#include "../../util/evsel.h" -#include "../../util/cpumap.h" -#include "../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/cpumap.h" +#include "../../../util/mmap.h" #include -#include "../../util/parse-events.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/target.h" -#include "../../util/tsc.h" +#include "../../../util/parse-events.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/target.h" +#include "../../../util/tsc.h" #include // page_size -#include "../../util/intel-pt.h" +#include "../../../util/intel-pt.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index e17e080e76f4..31679c35d493 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -5,9 +5,9 @@ #include #include // page_size -#include "../../util/machine.h" -#include "../../util/map.h" -#include "../../util/symbol.h" +#include "../../../util/machine.h" +#include "../../../util/map.h" +#include "../../../util/symbol.h" #include #include diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c218b83e063b..fca81b39b09f 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -5,10 +5,10 @@ #include #include -#include "../../perf-sys.h" -#include "../../util/perf_regs.h" -#include "../../util/debug.h" -#include "../../util/event.h" +#include "../../../perf-sys.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e33ef5bc31c5..d48d608517fd 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -4,9 +4,9 @@ #include #include -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/pmu.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/pmu.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { -- Gitee From 5e3231e45d5ea232e6f2ecba695f1ecbbd8f674d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 16:43:03 -0300 Subject: [PATCH 25/77] perf evlist: Factor out asprintf routine to build a tracepoint pid filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 05cea4492c9dd28439cc73de1047ab3b26033736 upstream Will be used to append such lists to existing filters. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-798vlyqfqw938ehoe8etivx1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/evlist.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4e3a4f352619..3a4d373ae1f7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1044,6 +1044,9 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) struct evsel *evsel; int err = 0; + if (filter == NULL) + return -1; + evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -1056,16 +1059,15 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; - int ret = -1; size_t i; for (i = 0; i < npids; ++i) { if (i == 0) { if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) - return -1; + return NULL; } else { char *tmp; @@ -1077,8 +1079,17 @@ int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t * } } - ret = perf_evlist__set_tp_filter(evlist, filter); + return filter; out_free: + free(filter); + return NULL; +} + +int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter = asprintf__tp_filter_pids(npids, pids); + int ret = perf_evlist__set_tp_filter(evlist, filter); + free(filter); return ret; } -- Gitee From 1d6becf14f3a74029a83a5bc9e1306d85d1b30db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Oct 2019 16:52:17 -0300 Subject: [PATCH 26/77] perf evlist: Introduce append_tp_filter() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 53c92f73389d049d72b2e1d1cbc81c007241d422 upstream Will be used by 'perf trace' to support 'perf trace --filter', we need to append to any pre-existing filter. When parse_filter() gets invoked to process --filter, it'll set the filter to that specified on the command line, later on, when we filter out 'perf trace' own pid to avoid an event feedback loop, we need to preserve the command line filter put in place by parse_filter(). Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-h9rot08qmxlnfmte0holt68x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/evlist.c | 20 ++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3a4d373ae1f7..49c528dfe1e9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1059,6 +1059,26 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) +{ + struct evsel *evsel; + int err = 0; + + if (filter == NULL) + return -1; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) + continue; + + err = perf_evsel__append_tp_filter(evsel, filter); + if (err) + break; + } + + return err; +} + static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 10fc888d6127..15de58642a90 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -133,6 +133,8 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); + struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); -- Gitee From 5c411b59aaffca0d5a4fdb7cb90ef33072886f15 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2019 20:10:50 -0300 Subject: [PATCH 27/77] perf string: Export asprintf__tp_filter_pids() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit da949f507a73df5b5ad5031cb23b82a4d81846eb upstream Will be used directly in 'perf trace' for setting up the command line argv array to pass to cmd_record, as this was how 'perf trace record' was implemented, following the model used in 'perf kvm record', 'perf sched record', etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-w3cuwjs63lxf5zpryy3145uv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/evlist.c | 3 ++- tools/perf/util/string2.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 49c528dfe1e9..8da83ae90b54 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -21,6 +21,7 @@ #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" +#include "util/string2.h" #include #include #include @@ -1079,7 +1080,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) return err; } -static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) +char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; size_t i; diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 708805f5573e..73df616ced43 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -4,6 +4,7 @@ #include #include +#include // pid_t #include #include @@ -32,6 +33,8 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); + char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); -- Gitee From 8352a32837049aa0f135e3a65f52fd8c886e99d3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 May 2020 11:49:08 -0300 Subject: [PATCH 28/77] perf tools: Move routines that probe for perf API features to separate file commit 40c7d2460e03b0916c5fcc5edbedae05b4b571fc upstream Trying to disentangle this a bit further, unfortunately it uses parse_events(), its interesting to have it separated anyway, so do it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/arch/arm/util/cs-etm.c | 1 + tools/perf/arch/x86/util/intel-pt.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/util/Build | 1 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 4 - tools/perf/util/intel-pt.c | 1 + tools/perf/util/perf_api_probe.c | 164 ++++++++++++++++++++++++++++ tools/perf/util/perf_api_probe.h | 14 +++ tools/perf/util/record.c | 155 +------------------------- 11 files changed, 186 insertions(+), 158 deletions(-) create mode 100644 tools/perf/util/perf_api_probe.c create mode 100644 tools/perf/util/perf_api_probe.h diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index b4fe7e41f847..4a9daac1a145 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -23,6 +23,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/perf_api_probe.h" #include "../../util/evsel_config.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 00356704d23d..fcc18501b9e4 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -24,6 +24,7 @@ #include "../../../util/pmu.h" #include "../../../util/debug.h" #include "../../../util/auxtrace.h" +#include "../../../util/perf_api_probe.h" #include "../../../util/record.h" #include "../../../util/target.h" #include "../../../util/tsc.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 077a0faa9b98..f34c450a9743 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -34,6 +34,7 @@ #include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" +#include "util/perf_api_probe.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" #include "util/trigger.h" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2b198f645e46..cf6b6be27e86 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -86,6 +86,7 @@ perf-y += counts.o perf-y += stat.o perf-y += stat-shadow.o perf-y += stat-display.o +perf-y += perf_api_probe.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 4086d5848734..efec868c74c3 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -33,6 +33,7 @@ #include "evsel.h" #include "evsel_config.h" #include "symbol.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8da83ae90b54..201cf1786f98 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -22,6 +22,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include "util/string2.h" +#include "util/perf_api_probe.h" #include #include #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 15de58642a90..6fb6114880d9 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -163,10 +163,6 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void perf_evlist__set_id_pos(struct evlist *evlist); -bool perf_can_sample_identifier(void); -bool perf_can_record_switch_events(void); -bool perf_can_record_cpu_wide(void); -bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index b40832419a27..a76d9886dbf2 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "time-utils.h" diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c new file mode 100644 index 000000000000..1337965673d7 --- /dev/null +++ b/tools/perf/util/perf_api_probe.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "perf-sys.h" +#include "util/cloexec.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/perf_api_probe.h" +#include +#include + +typedef void (*setup_probe_fn_t)(struct evsel *evsel); + +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +{ + struct evlist *evlist; + struct evsel *evsel; + unsigned long flags = perf_event_open_cloexec_flag(); + int err = -EAGAIN, fd; + static pid_t pid = -1; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + if (parse_events(evlist, str, NULL)) + goto out_delete; + + evsel = evlist__first(evlist); + + while (1) { + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (pid == -1 && errno == EACCES) { + pid = 0; + continue; + } + goto out_delete; + } + break; + } + close(fd); + + fn(evsel); + + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (errno == EINVAL) + err = -EINVAL; + goto out_delete; + } + close(fd); + err = 0; + +out_delete: + evlist__delete(evlist); + return err; +} + +static bool perf_probe_api(setup_probe_fn_t fn) +{ + const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_cpu_map *cpus; + int cpu, ret, i = 0; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + do { + ret = perf_do_probe_api(fn, cpu, try[i++]); + if (!ret) + return true; + } while (ret == -EAGAIN && try[i]); + + return false; +} + +static void perf_probe_sample_identifier(struct evsel *evsel) +{ + evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; +} + +static void perf_probe_comm_exec(struct evsel *evsel) +{ + evsel->core.attr.comm_exec = 1; +} + +static void perf_probe_context_switch(struct evsel *evsel) +{ + evsel->core.attr.context_switch = 1; +} + +bool perf_can_sample_identifier(void) +{ + return perf_probe_api(perf_probe_sample_identifier); +} + +bool perf_can_comm_exec(void) +{ + return perf_probe_api(perf_probe_comm_exec); +} + +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct perf_cpu_map *cpus; + int cpu, fd; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h new file mode 100644 index 000000000000..706c3c6426e2 --- /dev/null +++ b/tools/perf/util/perf_api_probe.h @@ -0,0 +1,14 @@ + +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_API_PROBE_H +#define __PERF_API_PROBE_H + +#include + +bool perf_can_aux_sample(void); +bool perf_can_comm_exec(void); +bool perf_can_record_cpu_wide(void); +bool perf_can_record_switch_events(void); +bool perf_can_sample_identifier(void); + +#endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7def66168503..5084ad6db1b3 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,163 +10,10 @@ #include #include #include "cloexec.h" +#include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" -typedef void (*setup_probe_fn_t)(struct evsel *evsel); - -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) -{ - struct evlist *evlist; - struct evsel *evsel; - unsigned long flags = perf_event_open_cloexec_flag(); - int err = -EAGAIN, fd; - static pid_t pid = -1; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - if (parse_events(evlist, str, NULL)) - goto out_delete; - - evsel = evlist__first(evlist); - - while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (pid == -1 && errno == EACCES) { - pid = 0; - continue; - } - goto out_delete; - } - break; - } - close(fd); - - fn(evsel); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (errno == EINVAL) - err = -EINVAL; - goto out_delete; - } - close(fd); - err = 0; - -out_delete: - evlist__delete(evlist); - return err; -} - -static bool perf_probe_api(setup_probe_fn_t fn) -{ - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; - struct perf_cpu_map *cpus; - int cpu, ret, i = 0; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - - return false; -} - -static void perf_probe_sample_identifier(struct evsel *evsel) -{ - evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; -} - -static void perf_probe_comm_exec(struct evsel *evsel) -{ - evsel->core.attr.comm_exec = 1; -} - -static void perf_probe_context_switch(struct evsel *evsel) -{ - evsel->core.attr.context_switch = 1; -} - -bool perf_can_sample_identifier(void) -{ - return perf_probe_api(perf_probe_sample_identifier); -} - -static bool perf_can_comm_exec(void) -{ - return perf_probe_api(perf_probe_comm_exec); -} - -bool perf_can_record_switch_events(void) -{ - return perf_probe_api(perf_probe_context_switch); -} - -bool perf_can_record_cpu_wide(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .exclude_kernel = 1, - }; - struct perf_cpu_map *cpus; - int cpu, fd; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); - if (fd < 0) - return false; - close(fd); - - return true; -} - -/* - * Architectures are expected to know if AUX area sampling is supported by the - * hardware. Here we check for kernel support. - */ -bool perf_can_aux_sample(void) -{ - struct perf_event_attr attr = { - .size = sizeof(struct perf_event_attr), - .exclude_kernel = 1, - /* - * Non-zero value causes the kernel to calculate the effective - * attribute size up to that byte. - */ - .aux_sample_size = 1, - }; - int fd; - - fd = sys_perf_event_open(&attr, -1, 0, -1, 0); - /* - * If the kernel attribute is big enough to contain aux_sample_size - * then we assume that it is supported. We are relying on the kernel to - * validate the attribute size before anything else that could be wrong. - */ - if (fd < 0 && errno == E2BIG) - return false; - if (fd >= 0) - close(fd); - - return true; -} - void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain) { -- Gitee From 82ff61a4e9bf8b53a403706d9c8c0f24edde0569 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Apr 2020 17:54:27 -0300 Subject: [PATCH 29/77] perf evlist: Allow reusing the side band thread for more purposes commit 976be84504b8285d43dc890b02ceff432cd0dd4b upstream I.e. so far we had just one event in that side band thread, a dummy one with attr.bpf_event set, so that 'perf record' can go ahead and ask the kernel for further information about BPF programs being loaded. Allow for more than one event to be there, so that we can use it as well for the upcoming --switch-output-event feature. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Cc: Song Liu Link: http://lore.kernel.org/lkml/20200429131106.27974-6-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/evlist.h | 1 + tools/perf/util/sideband_evlist.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6fb6114880d9..b791f6473097 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -111,6 +111,7 @@ int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, perf_evsel__sb_cb_t cb, void *data); +void evlist__set_cb(struct evlist *evlist, perf_evsel__sb_cb_t cb, void *data); int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 073d201bb6ea..1d6f470d64c4 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -4,6 +4,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/mmap.h" +#include "util/perf_api_probe.h" #include #include #include @@ -80,6 +81,19 @@ static void *perf_evlist__poll_thread(void *arg) return NULL; } +void evlist__set_cb(struct evlist *evlist, perf_evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_id_all = 1; + evsel->core.attr.watermark = 1; + evsel->core.attr.wakeup_watermark = 1; + evsel->side_band.cb = cb; + evsel->side_band.data = data; + } +} + int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) { struct evsel *counter; @@ -90,6 +104,15 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) if (perf_evlist__create_maps(evlist, target)) goto out_delete_evlist; + if (evlist->core.nr_entries > 1) { + bool can_sample_identifier = perf_can_sample_identifier(); + + evlist__for_each_entry(evlist, counter) + perf_evsel__set_sample_id(counter, can_sample_identifier); + + perf_evlist__set_id_pos(evlist); + } + evlist__for_each_entry(evlist, counter) { if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0) goto out_delete_evlist; -- Gitee From 74923445a65ad7be235bb7850b37e224555c0c76 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Nov 2019 14:42:22 +0200 Subject: [PATCH 30/77] perf pmu: When using default config, record which bits of config were changed by the user commit a1ac7de6902c1ea6def7a743f1d2e6ba429684b3 upstream Default config for a PMU is defined before selected events are parsed. That allows the user-entered config to override the default config. However that does not allow for changing the default config based on other options. For example, if the user chooses AUX area sampling mode, in the case of Intel PT, the psb_period needs to be small for sampling, so there is a need to set the default psb_period to 0 (2 KiB) in that case. However that should not override a value set by the user. To allow for that, when using default config, record which bits of config were changed by the user. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20191115124225.5247-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/util/evsel.c | 2 ++ tools/perf/util/evsel_config.h | 2 ++ tools/perf/util/parse-events.c | 42 +++++++++++++++++++++++++++++++++- tools/perf/util/pmu.c | 10 ++++++++ tools/perf/util/pmu.h | 1 + 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 797534dbee08..36238cc5cf80 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -849,6 +849,8 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; + case PERF_EVSEL__CONFIG_TERM_CFG_CHG: + break; default: break; } diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 6e654ede8fbe..1f8d2fe0b66e 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -26,6 +26,7 @@ enum evsel_term_type { PERF_EVSEL__CONFIG_TERM_PERCORE, PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, + PERF_EVSEL__CONFIG_TERM_CFG_CHG, }; struct perf_evsel_config_term { @@ -46,6 +47,7 @@ struct perf_evsel_config_term { bool percore; bool aux_output; u32 aux_sample_size; + u64 cfg_chg; } val; bool weak; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 229ed0713280..011189a8aa0d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1273,7 +1273,40 @@ do { \ break; } } -#undef ADD_EVSEL_CONFIG + return 0; +} + +/* + * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for + * each bit of attr->config that the user has changed. + */ +static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, + struct list_head *head_terms) +{ + struct parse_events_term *term; + u64 bits = 0; + int type; + + list_for_each_entry(term, head_config, list) { + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_USER: + type = perf_pmu__format_type(&pmu->format, term->config); + if (type != PERF_PMU_FORMAT_VALUE_CONFIG) + continue; + bits |= perf_pmu__format_bits(&pmu->format, term->config); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG: + bits = ~(u64)0; + break; + default: + break; + } + } + + if (bits) + ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + +#undef ADD_CONFIG_TERM return 0; } @@ -1402,6 +1435,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; + /* + * When using default config, record which bits of attr->config were + * changed by the user. + */ + if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct perf_evsel_config_term *pos, *tmp; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 36e3a777d20d..5f1ece607471 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -945,6 +945,16 @@ __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) return bits; } +int perf_pmu__format_type(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + + if (!format) + return -1; + + return format->value; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index ed9154b182bf..d5e87aa9fd5b 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -71,6 +71,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct list_head *head_terms, bool zero, struct parse_events_error *error); __u64 perf_pmu__format_bits(struct list_head *formats, const char *name); +int perf_pmu__format_type(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, -- Gitee From 959e956d67b31e0ca5d5e98c408f9a9a976c6bfd Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 17 Jan 2020 13:52:50 +0800 Subject: [PATCH 31/77] perf parse: Refactor 'struct perf_evsel_config_term' commit e884602b57c07fae54ff357e4b996b2053b47c1e upstream The struct perf_evsel_config_term::val is a union which contains fields 'callgraph', 'drv_cfg' and 'branch' as string pointers. This leads to the complex code logic for handling every type's string separately, and it's hard to release string as a general way. This patch refactors the structure to add a common field 'str' in the 'val' union as string pointer and remove the other three fields 'callgraph', 'drv_cfg' and 'branch'. Without passing field name, the patch simplifies the string handling with macro ADD_CONFIG_TERM_STR() for string pointer assignment. This patch fixes multiple warnings of line over 80 characters detected by checkpatch tool. Signed-off-by: Leo Yan Reviewed-by: Andi Kleen Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200117055251.24058-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/util/evsel.c | 6 +-- tools/perf/util/evsel_config.h | 4 +- tools/perf/util/parse-events.c | 62 ++++++++++++++++++++----------- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 4a9daac1a145..b8df8054dd5e 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -227,7 +227,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) continue; - sink = term->val.drv_cfg; + sink = term->val.str; snprintf(path, PATH_MAX, "sinks/%s", sink); ret = perf_pmu__scan_file(pmu, path, "%x", &hash); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 36238cc5cf80..ec0af4dd87ea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -808,12 +808,12 @@ static void apply_config_terms(struct evsel *evsel, perf_evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: - callgraph_buf = term->val.callgraph; + callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: - if (term->val.branch && strcmp(term->val.branch, "no")) { + if (term->val.str && strcmp(term->val.str, "no")) { perf_evsel__set_sample_bit(evsel, BRANCH_STACK); - parse_branch_str(term->val.branch, + parse_branch_str(term->val.str, &attr->branch_sample_type); } else perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 1f8d2fe0b66e..b4a65201e4f7 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -36,18 +36,16 @@ struct perf_evsel_config_term { u64 period; u64 freq; bool time; - char *callgraph; - char *drv_cfg; u64 stack_user; int max_stack; bool inherit; bool overwrite; - char *branch; unsigned long max_events; bool percore; bool aux_output; u32 aux_sample_size; u64 cfg_chg; + char *str; } val; bool weak; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 011189a8aa0d..e52056fcc7f4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1201,8 +1201,7 @@ static int config_attr(struct perf_event_attr *attr, static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { -#define ADD_CONFIG_TERM(__type, __name, __val) \ -do { \ +#define ADD_CONFIG_TERM(__type) \ struct perf_evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ @@ -1211,9 +1210,19 @@ do { \ \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ - __t->val.__name = __val; \ __t->weak = term->weak; \ - list_add_tail(&__t->list, head_terms); \ + list_add_tail(&__t->list, head_terms) + +#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \ +do { \ + ADD_CONFIG_TERM(__type); \ + __t->val.__name = __val; \ +} while (0) + +#define ADD_CONFIG_TERM_STR(__type, __val) \ +do { \ + ADD_CONFIG_TERM(__type); \ + __t->val.str = __val; \ } while (0) struct parse_events_term *term; @@ -1221,53 +1230,62 @@ do { \ list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM(PERIOD, period, term->val.num); + ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM(FREQ, freq, term->val.num); + ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM(TIME, time, term->val.num); + ADD_CONFIG_TERM_VAL(TIME, time, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - ADD_CONFIG_TERM(BRANCH, branch, term->val.str); + ADD_CONFIG_TERM_STR(BRANCH, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: - ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + ADD_CONFIG_TERM_VAL(STACK_USER, stack_user, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 0 : 1); break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: - ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: - ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 0 : 1); break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: - ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); + ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_PERCORE: - ADD_CONFIG_TERM(PERCORE, percore, - term->val.num ? true : false); + ADD_CONFIG_TERM_VAL(PERCORE, percore, + term->val.num ? true : false); break; case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: - ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: - ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num); + ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, + term->val.num); break; default: break; @@ -1304,7 +1322,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, } if (bits) - ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits); #undef ADD_CONFIG_TERM return 0; -- Gitee From 696fcc4227dba221609e326f3081d614ce570e2a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Mar 2020 10:50:58 -0300 Subject: [PATCH 32/77] tools headers UAPI: Update tools's copy of linux/perf_event.h commit 6339998d22ecae5d6435dd87b4904ff6e16bfe56 upstream To get the changes in: bbfd5e4fab63 ("perf/core: Add new branch sample type for HW index of raw branch records") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h This update is a prerequisite to adding support for the HW index of raw branch records. Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra (Intel) Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200304134902.GB12612@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Arukonda Rahul Signed-off-by: mohanasv2 --- tools/include/uapi/linux/perf_event.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index db439100de3a..29ec1a54bbaa 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- Gitee From 4713a208e7c14e9a5b3556b52dfb62bee850437f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:01 -0800 Subject: [PATCH 33/77] perf evsel: Support PERF_SAMPLE_BRANCH_HW_INDEX commit d3f85437ad6a55113882d730beaa75759452da8f upstream A new branch sample type PERF_SAMPLE_BRANCH_HW_INDEX has been introduced in latest kernel. Enable HW_INDEX by default in LBR call stack mode. If kernel doesn't support the sample type, switching it off. Add HW_INDEX in attr_fprintf as well. User can check whether the branch sample type is set via debug information or header. Committer testing: First collect some samples with LBR callchains, system wide, for a few seconds: # perf record --call-graph lbr -a sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.625 MB perf.data (224 samples) ] # Now lets use 'perf evlist -v' to look at the branch_sample_type: # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES|HW_INDEX # So the machine has the kernel feature, and it was correctly added to perf_event_attr.branch_sample_type, for the default 'cycles' event. If we do it in another machine, where the kernel lacks the HW_INDEX feature, we get: # perf record --call-graph lbr -a sleep 2s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.690 MB perf.data (499 samples) ] # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES # No HW_INDEX in attr.branch_sample_type. Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/util/evsel.c | 15 ++++++++++++--- tools/perf/util/evsel.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ec0af4dd87ea..ff0a5da9d99d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS; + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_HW_INDEX; } } else pr_warning("Cannot use LBR callstack with branch stack. " @@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel, if (param->record_mode == CALLCHAIN_LBR) { perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | - PERF_SAMPLE_BRANCH_CALL_STACK); + PERF_SAMPLE_BRANCH_CALL_STACK | + PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { perf_evsel__reset_sample_bit(evsel, REGS_USER); @@ -1662,6 +1664,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, evsel->core.attr.ksymbol = 0; if (perf_missing_features.bpf) evsel->core.attr.bpf_event = 0; + if (perf_missing_features.branch_hw_idx) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->core.attr.sample_id_all = 0; @@ -1773,7 +1777,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { + if (!perf_missing_features.branch_hw_idx && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; pr_debug2("Kernel has no attr.aux_output support, bailing out\n"); goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index da65be7ca536..529f3bfd8a5d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -123,6 +123,7 @@ struct perf_missing_features { bool ksymbol; bool bpf; bool aux_output; + bool branch_hw_idx; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 651203126c71..355d3458d4e6 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name -- Gitee From 46767548707e48774024b3317f009ff0c0a74cda Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:28 +0900 Subject: [PATCH 34/77] perf/core: Add PERF_RECORD_CGROUP event commit 96aaab686505c449e24d76e76507290dcc30e008 upstream To support cgroup tracking, add CGROUP event to save a link between cgroup path and id number. This is needed since cgroups can go away when userspace tries to read the cgroup info (from the id) later. The attr.cgroup bit was also added to enable cgroup tracking from userspace. This event will be generated when a new cgroup becomes active. Userspace might need to synthesize those events for existing cgroups. Committer testing: From the resulting kernel, using /sys/kernel/btf/vmlinux: $ pahole perf_event_attr | grep -w cgroup -B5 -A1 __u64 write_backward:1; /* 40:27 8 */ __u64 namespaces:1; /* 40:28 8 */ __u64 ksymbol:1; /* 40:29 8 */ __u64 bpf_event:1; /* 40:30 8 */ __u64 aux_output:1; /* 40:31 8 */ __u64 cgroup:1; /* 40:32 8 */ __u64 __reserved_1:31; /* 40:33 8 */ $ Reported-by: kbuild test robot Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo [staticize perf_event_cgroup function] Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- include/uapi/linux/perf_event.h | 13 +++- kernel/events/core.c | 111 ++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7fa9f958cd9b..b77d271b06b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -381,7 +381,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1013,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6befff36dbc0..e58c1dcd075c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -392,6 +392,7 @@ static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; static atomic_t nr_ksymbol_events __read_mostly; static atomic_t nr_bpf_events __read_mostly; +static atomic_t nr_cgroup_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4468,6 +4469,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.namespaces) atomic_dec(&nr_namespaces_events); + if (event->attr.cgroup) + atomic_dec(&nr_cgroup_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -7572,6 +7575,105 @@ void perf_event_namespaces(struct task_struct *task) NULL); } +/* + * cgroup tracking + */ +#ifdef CONFIG_CGROUP_PERF + +struct perf_cgroup_event { + char *path; + int path_size; + struct { + struct perf_event_header header; + u64 id; + char path[]; + } event_id; +}; + +static int perf_event_cgroup_match(struct perf_event *event) +{ + return event->attr.cgroup; +} + +static void perf_event_cgroup_output(struct perf_event *event, void *data) +{ + struct perf_cgroup_event *cgroup_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + u16 header_size = cgroup_event->event_id.header.size; + int ret; + + if (!perf_event_cgroup_match(event)) + return; + + perf_event_header__init_id(&cgroup_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + cgroup_event->event_id.header.size); + if (ret) + goto out; + + perf_output_put(&handle, cgroup_event->event_id); + __output_copy(&handle, cgroup_event->path, cgroup_event->path_size); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +out: + cgroup_event->event_id.header.size = header_size; +} + +static void perf_event_cgroup(struct cgroup *cgrp) +{ + struct perf_cgroup_event cgroup_event; + char path_enomem[16] = "//enomem"; + char *pathname; + size_t size; + + if (!atomic_read(&nr_cgroup_events)) + return; + + cgroup_event = (struct perf_cgroup_event){ + .event_id = { + .header = { + .type = PERF_RECORD_CGROUP, + .misc = 0, + .size = sizeof(cgroup_event.event_id), + }, + .id = cgroup_id(cgrp), + }, + }; + + pathname = kmalloc(PATH_MAX, GFP_KERNEL); + if (pathname == NULL) { + cgroup_event.path = path_enomem; + } else { + /* just to be sure to have enough space for alignment */ + cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64)); + cgroup_event.path = pathname; + } + + /* + * Since our buffer works in 8 byte units we need to align our string + * size to a multiple of 8. However, we must guarantee the tail end is + * zero'd out to avoid leaking random bits to userspace. + */ + size = strlen(cgroup_event.path) + 1; + while (!IS_ALIGNED(size, sizeof(u64))) + cgroup_event.path[size++] = '\0'; + + cgroup_event.event_id.header.size += size; + cgroup_event.path_size = size; + + perf_iterate_sb(perf_event_cgroup_output, + &cgroup_event, + NULL); + + kfree(pathname); +} + +#endif + /* * mmap tracking */ @@ -10588,6 +10690,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_comm_events); if (event->attr.namespaces) atomic_inc(&nr_namespaces_events); + if (event->attr.cgroup) + atomic_inc(&nr_cgroup_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -12589,6 +12693,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css) kfree(jc); } +static int perf_cgroup_css_online(struct cgroup_subsys_state *css) +{ + perf_event_cgroup(css->cgroup); + return 0; +} + static int __perf_cgroup_move(void *info) { struct task_struct *task = info; @@ -12610,6 +12720,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset) struct cgroup_subsys perf_event_cgrp_subsys = { .css_alloc = perf_cgroup_css_alloc, .css_free = perf_cgroup_css_free, + .css_online = perf_cgroup_css_online, .attach = perf_cgroup_attach, /* * Implicitly enable on dfl hierarchy so that perf events can -- Gitee From 629f8900c446aacae942db791ccd88c065771b7e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:29 +0900 Subject: [PATCH 35/77] perf/core: Add PERF_SAMPLE_CGROUP feature commit 6546b19f95acc986807de981402bbac6b3a94b0f upstream The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in the sample. It will add a 64-bit id to identify current cgroup and it's the file handle in the cgroup file system. Userspace should use this information with PERF_RECORD_CGROUP event to match which cgroup it belongs. I put it before PERF_SAMPLE_AUX for simplicity since it just needs a 64-bit word. But if we want bigger samples, I can work on that direction too. Committer testing: $ pahole perf_sample_data | grep -w cgroup -B5 -A5 /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ struct perf_regs regs_intr; /* 312 16 */ /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */ u64 stack_user_size; /* 328 8 */ u64 phys_addr; /* 336 8 */ u64 cgroup; /* 344 8 */ /* size: 384, cachelines: 6, members: 22 */ /* padding: 32 */ }; $ Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 3 ++- init/Kconfig | 3 ++- kernel/events/core.c | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09e0cca1eaaf..3f4f867a26d1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1027,6 +1027,7 @@ struct perf_sample_data { u64 stack_user_size; u64 phys_addr; + u64 cgroup; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b77d271b06b1..dc33e3051819 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; diff --git a/init/Kconfig b/init/Kconfig index 3aa73f27028f..57f06ffcf26e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1120,7 +1120,8 @@ config CGROUP_PERF help This option extends the perf per-cpu mode to restrict monitoring to threads which belong to the cgroup specified and run on the - designated cpu. + designated cpu. Or this can be used to have cgroup ID in samples + so that it can monitor performance events among cgroups. Say N if unsure. diff --git a/kernel/events/core.c b/kernel/events/core.c index e58c1dcd075c..644dcf01d171 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1760,6 +1760,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_PHYS_ADDR) size += sizeof(data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + size += sizeof(data->cgroup); + event->header_size = size; } @@ -6694,6 +6697,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_PHYS_ADDR) perf_output_put(handle, data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + perf_output_put(handle, data->cgroup); + if (sample_type & PERF_SAMPLE_AUX) { perf_output_put(handle, data->aux_size); @@ -6896,6 +6902,16 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_PHYS_ADDR) data->phys_addr = perf_virt_to_phys(data->addr); +#ifdef CONFIG_CGROUP_PERF + if (sample_type & PERF_SAMPLE_CGROUP) { + struct cgroup *cgrp; + + /* protected by RCU */ + cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup; + data->cgroup = cgroup_id(cgrp); + } +#endif + if (sample_type & PERF_SAMPLE_AUX) { u64 size; @@ -11060,6 +11076,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->sample_type & PERF_SAMPLE_REGS_INTR) ret = perf_reg_validate(attr->sample_regs_intr); + +#ifndef CONFIG_CGROUP_PERF + if (attr->sample_type & PERF_SAMPLE_CGROUP) + return -EINVAL; +#endif + out: return ret; -- Gitee From a37ccdbd13ff351737227aee3b4ffa765e172568 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 27 Mar 2020 11:07:17 -0300 Subject: [PATCH 36/77] tools headers UAPI: Update tools's copy of linux/perf_event.h commit 03590fb409bce428bce0b4535a488dedfbed0c00 upstream To get the changes in: 6546b19f95ac ("perf/core: Add PERF_SAMPLE_CGROUP feature") 96aaab686505 ("perf/core: Add PERF_RECORD_CGROUP event") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h This update is a prerequisite to adding support for the HW index of raw branch records. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/include/uapi/linux/perf_event.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 29ec1a54bbaa..bc8c4816ba38 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -381,7 +382,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1014,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; -- Gitee From 6ae5a661e6839668d832b4e8c7e86e949219f342 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Fri, 8 Nov 2019 15:11:28 +0530 Subject: [PATCH 37/77] perf tool: Provide an option to print perf_event_open args and return value commit ccd26741f5e6bdf2c18ea38b32e6a347c5648a97 upstream Perf record with verbose=2 already prints this information along with whole lot of other traces which requires lot of scrolling. Introduce an option to print only perf_event_open() arguments and return value. Sample o/p: $ perf --debug perf-event-open=1 record -- ls > /dev/null ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|PERIOD read_format ID disabled 1 inherit 1 exclude_kernel 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 ------------------------------------------------------------ sys_perf_event_open: pid 4308 cpu 0 group_fd -1 flags 0x8 = 4 sys_perf_event_open: pid 4308 cpu 1 group_fd -1 flags 0x8 = 5 sys_perf_event_open: pid 4308 cpu 2 group_fd -1 flags 0x8 = 6 sys_perf_event_open: pid 4308 cpu 3 group_fd -1 flags 0x8 = 8 sys_perf_event_open: pid 4308 cpu 4 group_fd -1 flags 0x8 = 9 sys_perf_event_open: pid 4308 cpu 5 group_fd -1 flags 0x8 = 10 sys_perf_event_open: pid 4308 cpu 6 group_fd -1 flags 0x8 = 11 sys_perf_event_open: pid 4308 cpu 7 group_fd -1 flags 0x8 = 12 ------------------------------------------------------------ perf_event_attr: type 1 size 112 config 0x9 watermark 1 sample_id_all 1 bpf_event 1 { wakeup_events, wakeup_watermark } 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -13 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (9 samples) ] Committer notes: Just like the 'verbose' variable this new 'debug_peo_args' needs to be added to util/python.c, since we don't link the debug.o file in the python binding, which ended up making 'perf test python' fail with: # perf test -v python 18: 'import perf' in python : --- start --- test child forked, pid 19237 Traceback (most recent call last): File "", line 1, in ImportError: /tmp/build/perf/python/perf.so: undefined symbol: debug_peo_args test child finished with -1 ---- end ---- 'import perf' in python: FAILED! # After adding that new variable to util/python.c: # perf test -v python 18: 'import perf' in python : --- start --- test child forked, pid 22364 test child finished with 0 ---- end ---- 'import perf' in python: Ok # Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191108094128.28769-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/Documentation/perf.txt | 2 ++ tools/perf/util/debug.c | 2 ++ tools/perf/util/debug.h | 9 ++++++++ tools/perf/util/evsel.c | 36 +++++++++++++++---------------- tools/perf/util/python.c | 1 + 5 files changed, 32 insertions(+), 18 deletions(-) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 401f0ed67439..3f37ded13f8c 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -24,6 +24,8 @@ OPTIONS data-convert - data convert command debug messages stderr - write debug output (option -v) to stderr in browser mode + perf-event-open - Print perf_event_open() arguments and + return value --buildid-dir:: Setup buildid cache directory. It has higher priority than diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 682146d04379..1ab270c2254c 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,6 +24,7 @@ #include int verbose; +int debug_peo_args; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; @@ -180,6 +181,7 @@ static struct debug_variable { { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, { .name = "data-convert", .ptr = &debug_data_convert }, + { .name = "perf-event-open", .ptr = &debug_peo_args }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index d25ae1c4cee9..f1734abd98dd 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -8,6 +8,7 @@ #include extern int verbose; +extern int debug_peo_args; extern bool quiet, dump_trace; extern int debug_ordered_events; extern int debug_data_convert; @@ -30,6 +31,14 @@ extern int debug_data_convert; #define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) +/* Special macro to print perf_event_open arguments/return value. */ +#define pr_debug2_peo(fmt, ...) { \ + if (debug_peo_args) \ + pr_debugN(0, pr_fmt(fmt), ##__VA_ARGS__); \ + else \ + pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__); \ +} + #define pr_time_N(n, var, t, fmt, ...) \ eprintf_time(n, var, t, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ff0a5da9d99d..f26f6fd2169c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1547,7 +1547,7 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, static void display_attr(struct perf_event_attr *attr) { - if (verbose >= 2) { + if (verbose >= 2 || debug_peo_args) { fprintf(stderr, "%.60s\n", graph_dotted_line); fprintf(stderr, "perf_event_attr:\n"); perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL); @@ -1563,7 +1563,7 @@ static int perf_event_open(struct evsel *evsel, int fd; while (1) { - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", + pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpu, group_fd, flags); fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags); @@ -1583,9 +1583,9 @@ static int perf_event_open(struct evsel *evsel, break; } - pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); + pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); evsel->core.attr.precise_ip--; - pr_debug2("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip); + pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip); display_attr(&evsel->core.attr); } @@ -1706,12 +1706,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, continue; } - pr_debug2("\nsys_perf_event_open failed, error %d\n", + pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", err); goto try_fallback; } - pr_debug2(" = %d\n", fd); + pr_debug2_peo(" = %d\n", fd); if (evsel->bpf_fd >= 0) { int evt_fd = fd; @@ -1784,58 +1784,58 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, goto fallback_missing_features; } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; - pr_debug2("Kernel has no attr.aux_output support, bailing out\n"); + pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); goto out_close; } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { perf_missing_features.bpf = true; - pr_debug2("switching off bpf_event\n"); + pr_debug2_peo("switching off bpf_event\n"); goto fallback_missing_features; } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { perf_missing_features.ksymbol = true; - pr_debug2("switching off ksymbol\n"); + pr_debug2_peo("switching off ksymbol\n"); goto fallback_missing_features; } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { perf_missing_features.write_backward = true; - pr_debug2("switching off write_backward\n"); + pr_debug2_peo("switching off write_backward\n"); goto out_close; } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { perf_missing_features.clockid_wrong = true; - pr_debug2("switching off clockid\n"); + pr_debug2_peo("switching off clockid\n"); goto fallback_missing_features; } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { perf_missing_features.clockid = true; - pr_debug2("switching off use_clockid\n"); + pr_debug2_peo("switching off use_clockid\n"); goto fallback_missing_features; } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; - pr_debug2("switching off cloexec flag\n"); + pr_debug2_peo("switching off cloexec flag\n"); goto fallback_missing_features; } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { perf_missing_features.mmap2 = true; - pr_debug2("switching off mmap2\n"); + pr_debug2_peo("switching off mmap2\n"); goto fallback_missing_features; } else if (!perf_missing_features.exclude_guest && (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { perf_missing_features.exclude_guest = true; - pr_debug2("switching off exclude_guest, exclude_host\n"); + pr_debug2_peo("switching off exclude_guest, exclude_host\n"); goto fallback_missing_features; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; - pr_debug2("switching off sample_id_all\n"); + pr_debug2_peo("switching off sample_id_all\n"); goto retry_sample_id; } else if (!perf_missing_features.lbr_flags && (evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS))) { perf_missing_features.lbr_flags = true; - pr_debug2("switching off branch sample type no (cycles/flags)\n"); + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); goto fallback_missing_features; } else if (!perf_missing_features.group_read && evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && perf_evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; - pr_debug2("switching off group read\n"); + pr_debug2_peo("switching off group read\n"); goto fallback_missing_features; } out_close: diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 25118605f3f8..83212c65848b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -65,6 +65,7 @@ struct perf_env perf_env; * implementing 'verbose' and 'eprintf'. */ int verbose; +int debug_peo_args; int eprintf(int level, int var, const char *fmt, ...); -- Gitee From 9019fcc57408b99f8c0460f3543144f3708b61ea Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:34 +0900 Subject: [PATCH 38/77] perf record: Add --all-cgroups option commit 8fb4b67939e169fca68174e9ac7be79fe9a04498 upstream The --all-cgroups option is to enable cgroup profiling support. It tells kernel to record CGROUP events in the ring buffer so that perf report can identify task/cgroup association later. [root@seventh ~]# perf record --all-cgroups --namespaces /wb/cgtest [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.042 MB perf.data (558 samples) ] [root@seventh ~]# perf report --stdio -s cgroup_id,cgroup,pid # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 558 of event 'cycles' # Event count (approx.): 458017341 # # Overhead cgroup id (dev/inode) Cgroup Pid:Command # ........ ..................... .......... ............... # 33.15% 4/0xeffffffb /sub 9615:looper0 32.83% 4/0xf00002f5 /sub/cgrp2 9620:looper2 32.79% 4/0xf00002f4 /sub/cgrp1 9619:looper1 0.35% 4/0xf00002f5 /sub/cgrp2 9618:cgtest 0.34% 4/0xf00002f4 /sub/cgrp1 9617:cgtest 0.32% 4/0xeffffffb / 9615:looper0 0.11% 4/0xeffffffb /sub 9617:cgtest 0.10% 4/0xeffffffb /sub 9618:cgtest # # (Tip: Sample related events with: perf record -e '{cycles,instructions}:S') # [root@seventh ~]# Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-8-namhyung@kernel.org Link: http://lore.kernel.org/lkml/20200402015249.3800462-1-namhyung@kernel.org [ Extracted the HAVE_FILE_HANDLE from the followup patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/Documentation/perf-record.txt | 5 ++++- tools/perf/builtin-record.c | 11 +++++++++++ tools/perf/util/evsel.c | 11 ++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/record.h | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 414a9f8b658d..63d9ac3995c2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -389,7 +389,10 @@ displayed with the weight and local_weight sort keys. This currently works for abort events and some memory events in precise mode on modern Intel CPUs. --namespaces:: -Record events of type PERF_RECORD_NAMESPACES. +Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key. + +--all-cgroups:: +Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key. --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f34c450a9743..3ab5be1b7c52 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1374,6 +1374,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (rec->opts.record_namespaces) tool->namespace_events = true; + if (rec->opts.record_cgroup) { +#ifdef HAVE_FILE_HANDLE + tool->cgroup_events = true; +#else + pr_err("cgroup tracking is not supported\n"); + return -1; +#endif + } + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -2248,6 +2257,8 @@ static struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, "Record namespaces events"), + OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, + "Record cgroup events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f26f6fd2169c..04938694da6e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1104,6 +1104,11 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_namespaces) attr->namespaces = track; + if (opts->record_cgroup) { + attr->cgroup = track && !perf_missing_features.cgroup; + perf_evsel__set_sample_bit(evsel, CGROUP); + } + if (opts->record_switch_events) attr->context_switch = track; @@ -1777,7 +1782,11 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.branch_hw_idx && + if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.branch_hw_idx && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { perf_missing_features.branch_hw_idx = true; pr_debug2("switching off branch HW index support\n"); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 529f3bfd8a5d..9d6050831eca 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -124,6 +124,7 @@ struct perf_missing_features { bool bpf; bool aux_output; bool branch_hw_idx; + bool cgroup; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 941f9ae49e18..ae3db7a272be 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -34,6 +34,7 @@ struct record_opts { bool auxtrace_snapshot_on_exit; bool auxtrace_sample_mode; bool record_namespaces; + bool record_cgroup; bool record_switch_events; bool all_kernel; bool all_user; -- Gitee From 417dd10558c237505fda67b9777486f8dfc37562 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Apr 2020 16:12:15 -0300 Subject: [PATCH 39/77] perf evsel: Rename *perf_evsel__*set_sample_*() to *evsel__*set_sample_*() commit 862b2f8fbc5b3a13d096b06560b6408f93388cf9 upstream As they are not 'struct evsel' methods, not part of tools/lib/perf/, aka libperf, to whom the perf_ prefix belongs. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/arch/arm/util/cs-etm.c | 4 +- tools/perf/arch/arm64/util/arm-spe.c | 12 ++-- tools/perf/arch/x86/util/intel-bts.c | 2 +- tools/perf/arch/x86/util/intel-pt.c | 20 +++---- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-kvm.c | 18 +++--- tools/perf/tests/hists_cumulate.c | 8 +-- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/perf-record.c | 6 +- tools/perf/tests/switch-tracking.c | 10 ++-- tools/perf/util/auxtrace.c | 4 +- tools/perf/util/evlist.c | 4 +- tools/perf/util/evsel.c | 88 ++++++++++++++-------------- tools/perf/util/evsel.h | 17 +++--- tools/perf/util/record.c | 2 +- tools/perf/util/sideband_evlist.c | 2 +- 17 files changed, 100 insertions(+), 103 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index b8df8054dd5e..8cefba31143a 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -402,7 +402,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * when a context switch happened. */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID | ETM_OPT_TS); @@ -426,7 +426,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); } out: diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 27653be24447..e3593063b3d1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ perf_evlist__to_front(evlist, arm_spe_evsel); - perf_evsel__set_sample_bit(arm_spe_evsel, CPU); - perf_evsel__set_sample_bit(arm_spe_evsel, TIME); - perf_evsel__set_sample_bit(arm_spe_evsel, TID); + evsel__set_sample_bit(arm_spe_evsel, CPU); + evsel__set_sample_bit(arm_spe_evsel, TIME); + evsel__set_sample_bit(arm_spe_evsel, TID); /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); @@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); - perf_evsel__set_sample_bit(tracking_evsel, CPU); - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); return 0; } diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index c7cc05dd2852..138d0058a6ab 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -219,7 +219,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + evsel__set_sample_bit(intel_bts_evsel, CPU); } /* Add dummy event to keep tracking */ diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index fcc18501b9e4..4b4dffd2b4c4 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -420,8 +420,8 @@ static int intel_pt_track_switches(struct evlist *evlist) evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); evsel->core.system_wide = true; evsel->no_aux_samples = true; @@ -729,10 +729,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; - perf_evsel__set_sample_bit(switch_evsel, TID); - perf_evsel__set_sample_bit(switch_evsel, TIME); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); + evsel__set_sample_bit(switch_evsel, TID); + evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); opts->record_switch_events = false; ptr->have_sched_switch = 3; @@ -766,7 +766,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + evsel__set_sample_bit(intel_pt_evsel, CPU); } /* Add dummy event to keep tracking */ @@ -790,11 +790,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ - perf_evsel__set_sample_bit(tracking_evsel, CPU); + evsel__set_sample_bit(tracking_evsel, CPU); } - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); } /* diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8db8fc9bddef..2cfb6e5da381 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -433,7 +433,7 @@ static int __cmd_annotate(struct perf_annotate *ann) total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); /* Don't sort callchain */ - perf_evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__reset_sample_bit(pos, CALLCHAIN); perf_evsel__output_resort(pos, NULL); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 265682296836..9dc27770d848 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1052,7 +1052,7 @@ static void data_process(void) data__fprintf(); /* Don't sort callchain for perf diff */ - perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + evsel__reset_sample_bit(evsel_base, CALLCHAIN); hists__process(hists_base); } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 858da896b518..20366ba14de1 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1033,16 +1033,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_event_attr *attr = &pos->core.attr; /* make sure these *are* set */ - perf_evsel__set_sample_bit(pos, TID); - perf_evsel__set_sample_bit(pos, TIME); - perf_evsel__set_sample_bit(pos, CPU); - perf_evsel__set_sample_bit(pos, RAW); + evsel__set_sample_bit(pos, TID); + evsel__set_sample_bit(pos, TIME); + evsel__set_sample_bit(pos, CPU); + evsel__set_sample_bit(pos, RAW); /* make sure these are *not*; want as small a sample as possible */ - perf_evsel__reset_sample_bit(pos, PERIOD); - perf_evsel__reset_sample_bit(pos, IP); - perf_evsel__reset_sample_bit(pos, CALLCHAIN); - perf_evsel__reset_sample_bit(pos, ADDR); - perf_evsel__reset_sample_bit(pos, READ); + evsel__reset_sample_bit(pos, PERIOD); + evsel__reset_sample_bit(pos, IP); + evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__reset_sample_bit(pos, ADDR); + evsel__reset_sample_bit(pos, READ); attr->mmap = 0; attr->comm = 0; attr->task = 0; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 6367c8f6ca22..7a542f1c1c78 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5f4c0dbb4715..e9d530d7e7de 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -86,7 +86,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } evsels[i]->core.attr.wakeup_events = 1; - perf_evsel__set_sample_id(evsels[i], false); + evsel__set_sample_id(evsels[i], false); evlist__add(evlist, evsels[i]); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2195fc205e72..83adfd846ccd 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * Config the evsels, setting attr->comm on the first one, etc. */ evsel = evlist__first(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TID); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, TIME); perf_evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index fcb0d03dba4e..b08c8a0898d3 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ switch_evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__set_sample_bit(switch_evsel, TIME); switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; @@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - perf_evsel__set_sample_bit(cycles_evsel, CPU); - perf_evsel__set_sample_bit(cycles_evsel, TIME); + evsel__set_sample_bit(cycles_evsel, CPU); + evsel__set_sample_bit(cycles_evsel, TIME); /* Fourth event */ err = parse_events(evlist, "dummy:u", NULL); @@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index efec868c74c3..ea11526bef07 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -702,10 +702,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); return -EINVAL; } - perf_evsel__set_sample_bit(evsel, AUX); + evsel__set_sample_bit(evsel, AUX); opts->auxtrace_sample_mode = true; } else { - perf_evsel__reset_sample_bit(evsel, AUX); + evsel__reset_sample_bit(evsel, AUX); } } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 201cf1786f98..03a066ce1f6d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1006,7 +1006,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__set_sample_bit(evsel, bit); + __evsel__set_sample_bit(evsel, bit); } void __perf_evlist__reset_sample_bit(struct evlist *evlist, @@ -1015,7 +1015,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__reset_sample_bit(evsel, bit); + __evsel__reset_sample_bit(evsel, bit); } int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 04938694da6e..e86db917bfb9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -184,7 +184,7 @@ void perf_evsel__calc_id_pos(struct evsel *evsel) evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type); } -void __perf_evsel__set_sample_bit(struct evsel *evsel, +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (!(evsel->core.attr.sample_type & bit)) { @@ -194,7 +194,7 @@ void __perf_evsel__set_sample_bit(struct evsel *evsel, } } -void __perf_evsel__reset_sample_bit(struct evsel *evsel, +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (evsel->core.attr.sample_type & bit) { @@ -204,14 +204,14 @@ void __perf_evsel__reset_sample_bit(struct evsel *evsel, } } -void perf_evsel__set_sample_id(struct evsel *evsel, +void evsel__set_sample_id(struct evsel *evsel, bool can_sample_identifier) { if (can_sample_identifier) { - perf_evsel__reset_sample_bit(evsel, ID); - perf_evsel__set_sample_bit(evsel, IDENTIFIER); + evsel__reset_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, IDENTIFIER); } else { - perf_evsel__set_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, ID); } evsel->core.attr.read_format |= PERF_FORMAT_ID; } @@ -693,7 +693,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); attr->sample_max_stack = param->max_stack; @@ -708,7 +708,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, "to get user callchain information. " "Falling back to framepointers.\n"); } else { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | @@ -722,8 +722,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { - perf_evsel__set_sample_bit(evsel, REGS_USER); - perf_evsel__set_sample_bit(evsel, STACK_USER); + evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { attr->sample_regs_user |= DWARF_MINIMAL_REGS; pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " @@ -760,16 +760,16 @@ perf_evsel__reset_callgraph(struct evsel *evsel, { struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); if (param->record_mode == CALLCHAIN_LBR) { - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { - perf_evsel__reset_sample_bit(evsel, REGS_USER); - perf_evsel__reset_sample_bit(evsel, STACK_USER); + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, STACK_USER); } } @@ -793,32 +793,32 @@ static void apply_config_terms(struct evsel *evsel, if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); else - perf_evsel__reset_sample_bit(evsel, TIME); + evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: if (term->val.str && strcmp(term->val.str, "no")) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); parse_branch_str(term->val.str, &attr->branch_sample_type); } else - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); break; case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; @@ -897,8 +897,8 @@ static void apply_config_terms(struct evsel *evsel, /* set perf-event callgraph */ if (param.enabled) { if (sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, DATA_SRC); evsel->core.attr.mmap_data = track; } perf_evsel__config_callchain(evsel, opts, ¶m); @@ -965,17 +965,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, attr->inherit = !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; - perf_evsel__set_sample_bit(evsel, IP); - perf_evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, IP); + evsel__set_sample_bit(evsel, TID); if (evsel->sample_read) { - perf_evsel__set_sample_bit(evsel, READ); + evsel__set_sample_bit(evsel, READ); /* * We need ID even in case of single event, because * PERF_SAMPLE_READ process ID specific data. */ - perf_evsel__set_sample_id(evsel, false); + evsel__set_sample_id(evsel, false); /* * Apply group format only if we belong to group @@ -994,7 +994,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { @@ -1033,7 +1033,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, ADDR); attr->mmap_data = track; } @@ -1050,16 +1050,16 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_intr_regs && !evsel->no_aux_samples) { attr->sample_regs_intr = opts->sample_intr_regs; - perf_evsel__set_sample_bit(evsel, REGS_INTR); + evsel__set_sample_bit(evsel, REGS_INTR); } if (opts->sample_user_regs && !evsel->no_aux_samples) { attr->sample_regs_user |= opts->sample_user_regs; - perf_evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, REGS_USER); } if (target__has_cpu(&opts->target) || opts->sample_cpu) - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, CPU); /* * When the user explicitly disabled time don't force it here. @@ -1068,31 +1068,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, (!perf_missing_features.sample_id_all && (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || opts->sample_time_set))) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, TIME); - perf_evsel__set_sample_bit(evsel, RAW); - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, RAW); + evsel__set_sample_bit(evsel, CPU); } if (opts->sample_address) - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) - perf_evsel__set_sample_bit(evsel, PHYS_ADDR); + evsel__set_sample_bit(evsel, PHYS_ADDR); if (opts->no_buffering) { attr->watermark = 0; attr->wakeup_events = 1; } if (opts->branch_stack && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = opts->branch_stack; } if (opts->sample_weight) - perf_evsel__set_sample_bit(evsel, WEIGHT); + evsel__set_sample_bit(evsel, WEIGHT); attr->task = track; attr->mmap = track; @@ -1106,14 +1106,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_cgroup) { attr->cgroup = track && !perf_missing_features.cgroup; - perf_evsel__set_sample_bit(evsel, CGROUP); + evsel__set_sample_bit(evsel, CGROUP); } if (opts->record_switch_events) attr->context_switch = track; if (opts->sample_transaction) - perf_evsel__set_sample_bit(evsel, TRANSACTION); + evsel__set_sample_bit(evsel, TRANSACTION); if (opts->running_time) { evsel->core.attr.read_format |= @@ -1176,9 +1176,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, /* The --period option takes the precedence. */ if (opts->period_set) { if (opts->period) - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); else - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } /* @@ -1187,7 +1187,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * if BRANCH_STACK bit is set. */ if (opts->initial_delay && is_dummy_event(evsel)) - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); } int perf_evsel__set_filter(struct evsel *evsel, const char *filter) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9d6050831eca..1c9d021c0304 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -213,19 +213,16 @@ const char *perf_evsel__name(struct evsel *evsel); const char *perf_evsel__group_name(struct evsel *evsel); int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); -void __perf_evsel__set_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); -void __perf_evsel__reset_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); -#define perf_evsel__set_sample_bit(evsel, bit) \ - __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__set_sample_bit(evsel, bit) \ + __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) -#define perf_evsel__reset_sample_bit(evsel, bit) \ - __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__reset_sample_bit(evsel, bit) \ + __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) -void perf_evsel__set_sample_id(struct evsel *evsel, - bool use_sample_identifier); +void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); int perf_evsel__set_filter(struct evsel *evsel, const char *filter); int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 5084ad6db1b3..24e3c867d3a7 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -62,7 +62,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (sample_id) { evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + evsel__set_sample_id(evsel, use_sample_identifier); } perf_evlist__set_id_pos(evlist); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index 1d6f470d64c4..c51a8dfce08b 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -108,7 +108,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) bool can_sample_identifier = perf_can_sample_identifier(); evlist__for_each_entry(evlist, counter) - perf_evsel__set_sample_id(counter, can_sample_identifier); + evsel__set_sample_id(counter, can_sample_identifier); perf_evlist__set_id_pos(evlist); } -- Gitee From e3c3aefaf1bd0c577544204ca2e13eb735b71dd4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Oct 2020 06:57:46 -0700 Subject: [PATCH 40/77] perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE commit 8d97e71811aaafe4abf611dc24822fd6e73df1a1 upstream Current perf can report both virtual addresses and physical addresses, but not the MMU page size. Without the MMU page size information of the utilized page, users cannot decide whether to promote/demote large pages to optimize memory usage. Add a new sample type for the data MMU page size. Current perf already has a facility to collect data virtual addresses. A page walker is required to walk the pages tables and calculate the MMU page size from a given virtual address. On some platforms, e.g., X86, the page walker is invoked in an NMI handler. So the page walker must be NMI-safe and low overhead. Besides, the page walker should work for both user and kernel virtual address. The existing generic page walker, e.g., walk_page_range_novma(), is a little bit complex and doesn't guarantee the NMI-safe. The follow_page() is only for user-virtual address. Add a new function perf_get_page_size() to walk the page tables and calculate the MMU page size. In the function: - Interrupts have to be disabled to prevent any teardown of the page tables. - For user space threads, the current->mm is used for the page walker. For kernel threads and the like, the current->mm is NULL. The init_mm is used for the page walker. The active_mm is not used here, because it can be NULL. Quote from Peter Zijlstra, "context_switch() can set prev->active_mm to NULL when it transfers it to @next. It does this before @current is updated. So an NMI that comes in between this active_mm swizzling and updating @current will see !active_mm." - The MMU page size is calculated from the page table level. The method should work for all architectures, but it has only been verified on X86. Should there be some architectures, which support perf, where the method doesn't work, it can be fixed later separately. Reporting the wrong page size would not be fatal for the architecture. Some under discussion features may impact the method in the future. Quote from Dave Hansen, "There are lots of weird things folks are trying to do with the page tables, like Address Space Isolation. For instance, if you get a perf NMI when running userspace, current->mm->pgd is *different* than the PGD that was in use when userspace was running. It's close enough today, but it might not stay that way." If the case happens later, lots of consecutive page walk errors will happen. The worst case is that lots of page-size '0' are returned, which would not be fatal. In the perf tool, a check is implemented to detect this case. Once it happens, a kernel patch could be implemented accordingly then. Suggested-by: Peter Zijlstra Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-2-kan.liang@linux.intel.com Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 4 +- kernel/events/core.c | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3f4f867a26d1..6ed8f63260cb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1028,6 +1028,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; + u64 data_page_size; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index dc33e3051819..c413bca7971f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -143,8 +143,9 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 23, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -878,6 +879,7 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/kernel/events/core.c b/kernel/events/core.c index 644dcf01d171..3e30058c96ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "internal.h" @@ -1763,6 +1764,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_CGROUP) size += sizeof(data->cgroup); + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + size += sizeof(data->data_page_size); + event->header_size = size; } @@ -6700,6 +6704,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_CGROUP) perf_output_put(handle, data->cgroup); + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + perf_output_put(handle, data->data_page_size); + if (sample_type & PERF_SAMPLE_AUX) { perf_output_put(handle, data->aux_size); @@ -6757,6 +6764,94 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +#ifdef CONFIG_MMU + +/* + * Return the MMU page size of a given virtual address + */ +static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(mm, addr); + if (pgd_none(*pgd)) + return 0; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return 0; + + if (p4d_leaf(*p4d)) + return 1ULL << P4D_SHIFT; + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return 0; + + if (pud_leaf(*pud)) + return 1ULL << PUD_SHIFT; + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return 0; + + if (pmd_leaf(*pmd)) + return 1ULL << PMD_SHIFT; + + pte = pte_offset_map(pmd, addr); + if (!pte_present(*pte)) { + pte_unmap(pte); + return 0; + } + + pte_unmap(pte); + return PAGE_SIZE; +} + +#else + +static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +#endif + +static u64 perf_get_page_size(unsigned long addr) +{ + struct mm_struct *mm; + unsigned long flags; + u64 size; + + if (!addr) + return 0; + + /* + * Software page-table walkers must disable IRQs, + * which prevents any tear down of the page tables. + */ + local_irq_save(flags); + + mm = current->mm; + if (!mm) { + /* + * For kernel threads and the like, use init_mm so that + * we can find kernel memory. + */ + mm = &init_mm; + } + + size = __perf_get_page_size(mm, addr); + + local_irq_restore(flags); + + return size; +} + static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; struct perf_callchain_entry * @@ -6912,6 +7007,14 @@ void perf_prepare_sample(struct perf_event_header *header, } #endif + /* + * PERF_DATA_PAGE_SIZE requires PERF_SAMPLE_ADDR. If the user doesn't + * require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr, + * but the value will not dump to the userspace. + */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + data->data_page_size = perf_get_page_size(data->addr); + if (sample_type & PERF_SAMPLE_AUX) { u64 size; -- Gitee From 8fc6622a3664d2e9b614cca79b0e77fc291035ce Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:52 -0800 Subject: [PATCH 41/77] tools headers UAPI: Update tools's copy of linux/perf_event.h commit 47d982202f8cfaac6f208c9109fa15cb6a0181f7 upstream To get the changes in: commit 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE") commit 995f088efebe ("perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE") This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/include/uapi/linux/perf_event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bc8c4816ba38..74bf68b3f16b 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,8 +143,10 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -878,6 +880,8 @@ enum perf_event_type { * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX + * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, -- Gitee From 92bef004f8670192086e50c265bcaa580d5d6248 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:30 +0900 Subject: [PATCH 42/77] perf tools: Basic support for CGROUP event commit ba78c1c5461c2fc2f57b777e971b3a9ec0df5666 upstream Implement basic functionality to support cgroup tracking. Each cgroup can be identified by inode number which can be read from userspace too. The actual cgroup processing will come in the later patch. [Backport changes] 1. In the upstream commit, the event.h file is located at tools/lib/perf/include/perf/event.h, whereas in the current kernel it resides at tools/perf/lib/include/perf/event.h. We skipped a patch that moved these files (and their content) from tools/perf/lib/* to tools/lib/perf/* because applying it caused significant conflicts and additional deviations. Since that patch only involved file moves without any functional changes, there is no change in code part except the path. So, the changes from upstream were incorporate directly into the files under tools/perf/lib/include/perf/event.h. 2.The patch also shows differences due to a change in file order between the upstream commit and the backported patch. In the upstream commit, the file order is 1.tools/lib/perf/include/perf/event.h 2.tools/perf/builtin-diff.c 3.tools/perf/builtin-report.c 4.... whereas in backported patch, the file order is 1.tools/perf/builtin-diff.c 2.tools/perf/builtin-report.c 3.tools/perf/lib/include/perf/event.h 4.... All upstream changes have been correctly backported to the respective files. However, the diff tool compares files on a line-by-line basis, thus, the changed order causes it to report deviations even though there are no actual code differences in the files builtin-diff.c and builtin-report.c. Reported-by: kernel test robot Signed-off-by: Namhyung Kim Cc: Adrian Hunter [ fix perf test failure on sampling parsing ] Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/builtin-diff.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/lib/include/perf/event.h | 7 +++++++ tools/perf/tests/sample-parsing.c | 6 +++++- tools/perf/util/event.c | 18 ++++++++++++++++++ tools/perf/util/event.h | 6 ++++++ tools/perf/util/evsel.c | 6 ++++++ tools/perf/util/machine.c | 12 ++++++++++++ tools/perf/util/machine.h | 3 +++ tools/perf/util/perf_event_attr_fprintf.c | 2 ++ tools/perf/util/session.c | 4 ++++ tools/perf/util/synthetic-events.c | 8 ++++++++ tools/perf/util/tool.h | 1 + 13 files changed, 74 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9dc27770d848..48ce1f99c1ed 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -448,6 +448,7 @@ static struct perf_diff pdiff = { .fork = perf_event__process_fork, .lost = perf_event__process_lost, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dc228bdf2bbc..f5c5e0cdfcb3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1050,6 +1050,7 @@ int cmd_report(int argc, const char **argv) .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h index 18106899cb4e..69b44d2cc0f5 100644 --- a/tools/perf/lib/include/perf/event.h +++ b/tools/perf/lib/include/perf/event.h @@ -105,6 +105,12 @@ struct perf_record_bpf_event { __u8 tag[BPF_TAG_SIZE]; // prog tag }; +struct perf_record_cgroup { + struct perf_event_header header; + __u64 id; + char path[PATH_MAX]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -352,6 +358,7 @@ union perf_event { struct perf_record_mmap2 mmap2; struct perf_record_comm comm; struct perf_record_namespaces namespaces; + struct perf_record_cgroup cgroup; struct perf_record_fork fork; struct perf_record_lost lost; struct perf_record_lost_samples lost_samples; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2a01dd1d725c..12b8278a0e35 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -150,6 +150,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_PHYS_ADDR) COMP(phys_addr); + if (type & PERF_SAMPLE_CGROUP) + COMP(cgroup); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -228,6 +231,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .regs = regs, }, .phys_addr = 113, + .cgroup = 114, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, @@ -331,7 +335,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bfaa9afdb8b4..49b809419a12 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -54,6 +54,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", + [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) return ret; } +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n", + event->cgroup.id, event->cgroup.path); +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, return machine__process_namespaces_event(machine, event, sample); } +int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_cgroup_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_NAMESPACES: ret += perf_event__fprintf_namespaces(event, fp); break; + case PERF_RECORD_CGROUP: + ret += perf_event__fprintf_cgroup(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 85223159737c..0ad3ba22817d 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 cgroup; u32 flags; u16 insn_len; u8 cpumode; @@ -321,6 +322,10 @@ int perf_event__process_namespaces(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_cgroup(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -376,6 +381,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e86db917bfb9..cfa91984bb29 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2245,6 +2245,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->cgroup = 0; + if (type & PERF_SAMPLE_CGROUP) { + data->cgroup = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8c3addc2e9e1..c02ec8fcce55 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -650,6 +650,16 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused, return err; } +int machine__process_cgroup_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cgroup(event, stdout); + + return 0; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -1885,6 +1895,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_mmap_event(machine, event, sample); break; case PERF_RECORD_NAMESPACES: ret = machine__process_namespaces_event(machine, event, sample); break; + case PERF_RECORD_CGROUP: + ret = machine__process_cgroup_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 18e13c0ccd6a..4f734337661a 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -127,6 +127,9 @@ int machine__process_switch_event(struct machine *machine, int machine__process_namespaces_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_cgroup_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 355d3458d4e6..b94fa07f5d32 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), + bit_name(CGROUP), { .name = NULL, } }; #undef bit_name @@ -132,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(ksymbol, p_unsigned); PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRf(aux_output, p_unsigned); + PRINT_ATTRf(cgroup, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 17caadb33e6c..f163e408353a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -467,6 +467,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->comm = process_event_stub; if (tool->namespaces == NULL) tool->namespaces = process_event_stub; + if (tool->cgroup == NULL) + tool->cgroup = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) @@ -1431,6 +1433,8 @@ static int machines__deliver_event(struct machines *machines, return tool->comm(tool, event, sample, machine); case PERF_RECORD_NAMESPACES: return tool->namespaces(tool, event, sample, machine); + case PERF_RECORD_CGROUP: + return tool->cgroup(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index e357f2b268de..7f967c274b04 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1228,6 +1228,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_PHYS_ADDR) result += sizeof(u64); + if (type & PERF_SAMPLE_CGROUP) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1401,6 +1404,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CGROUP) { + *array = sample->cgroup; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2abbf668b8de..472ef5eb4068 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -46,6 +46,7 @@ struct perf_tool { mmap2, comm, namespaces, + cgroup, fork, exit, lost, -- Gitee From cf87e91672dc7eba01cac4a75157c0f9a5f3386a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 09:27:53 -0800 Subject: [PATCH 43/77] perf record: Support new sample type for data page size commit 542b88fd12769bf5be307b11ca0f94a6140bba82 upstream Support new sample type PERF_SAMPLE_DATA_PAGE_SIZE for page size. Add new option --data-page-size to record sample data page size. Signed-off-by: Kan Liang Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201130172803.2676-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 9 +++++++++ tools/perf/util/perf_event_attr_fprintf.c | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 8 ++++++++ 7 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 63d9ac3995c2..91b4ffe3b914 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -287,6 +287,9 @@ OPTIONS --phys-data:: Record the sample physical addresses. +--data-page-size:: + Record the sampled data address data page size. + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3ab5be1b7c52..33de5d56e91c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2203,6 +2203,8 @@ static struct option __record_options[] = { OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, "Record the sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, + "Record the sampled data address data page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 0ad3ba22817d..01b0cb05db8f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -135,6 +135,7 @@ struct perf_sample { u32 raw_size; u64 data_src; u64 phys_addr; + u64 data_page_size; u64 cgroup; u32 flags; u16 insn_len; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cfa91984bb29..52af2a1a978c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1109,6 +1109,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CGROUP); } + if (opts->sample_data_page_size) + evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -2251,6 +2254,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->data_page_size = 0; + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + data->data_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index b94fa07f5d32..68188c7c188a 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index ae3db7a272be..99034d644322 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -22,6 +22,7 @@ struct record_opts { bool raw_samples; bool sample_address; bool sample_phys_addr; + bool sample_data_page_size; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 7f967c274b04..41df28804296 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1231,6 +1231,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_CGROUP) result += sizeof(u64); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1409,6 +1412,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) { + *array = sample->data_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; -- Gitee From 03a5cd1f9ea862144bcdadff9942da09d479e59c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Dec 2020 14:04:05 -0300 Subject: [PATCH 44/77] perf evsel: Emit warning about kernel not supporting the data page size sample_type bit commit 456ef4c11c06f0b8c53acaf796d77d2033f079f2 upstream Before we had this unhelpful message: $ perf record --data-page-size sleep 1 Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles:u). /bin/dmesg | grep -i perf may provide additional information. $ Add support to the perf_missing_features variable to remember what caused evsel__open() to fail and then use that information in evsel__open_strerror(). $ perf record --data-page-size sleep 1 Error: Asking for the data page size isn't supported by this kernel. $ Cc: Kan Liang Cc: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201207170759.GB129853@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/util/evsel.c | 9 ++++++++- tools/perf/util/evsel.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 52af2a1a978c..8c92521a7e42 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1785,7 +1785,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { + if (!perf_missing_features.data_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { perf_missing_features.cgroup = true; pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); goto out_close; @@ -2560,6 +2565,8 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) + return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); if (perf_missing_features.clockid) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1c9d021c0304..4f42464daac4 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -125,6 +125,7 @@ struct perf_missing_features { bool aux_output; bool branch_hw_idx; bool cgroup; + bool data_page_size; }; extern struct perf_missing_features perf_missing_features; -- Gitee From dc2c61f7f4bc207ec253f29aa1b19b647b36c11e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:49 -0800 Subject: [PATCH 45/77] perf record: Add support for PERF_SAMPLE_CODE_PAGE_SIZE commit c1de7f3d84ca324c7cda85c3ce27b11741af2124 upstream Adds the infrastructure to sample the code address page size. Introduce a new --code-page-size option for perf record. Signed-off-by: Kan Liang Originally-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 18 +++++++++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/perf_event_attr_fprintf.c | 2 +- tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 8 ++++++++ 8 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 91b4ffe3b914..5a666750bd4b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -290,6 +290,9 @@ OPTIONS --data-page-size:: Record the sampled data address data page size. +--code-page-size:: + Record the sampled code address (ip) page size + -T:: --timestamp:: Record the sample timestamps. Use it with 'perf report -D' to see the diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 33de5d56e91c..1d9c1dc20789 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2205,6 +2205,8 @@ static struct option __record_options[] = { "Record the sample physical addresses"), OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, "Record the sampled data address data page size"), + OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, + "Record the sampled code address (ip) page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 01b0cb05db8f..c8d31205a3dd 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -136,6 +136,7 @@ struct perf_sample { u64 data_src; u64 phys_addr; u64 data_page_size; + u64 code_page_size; u64 cgroup; u32 flags; u16 insn_len; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8c92521a7e42..0f08f9577d4c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1112,6 +1112,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->sample_data_page_size) evsel__set_sample_bit(evsel, DATA_PAGE_SIZE); + if (opts->sample_code_page_size) + evsel__set_sample_bit(evsel, CODE_PAGE_SIZE); + if (opts->record_switch_events) attr->context_switch = track; @@ -1785,7 +1788,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.data_page_size && + if (!perf_missing_features.code_page_size && + (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); + goto out_close; + } else if (!perf_missing_features.data_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { perf_missing_features.data_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); @@ -2265,6 +2273,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } + data->code_page_size = 0; + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + data->code_page_size = *array; + array++; + } + if (type & PERF_SAMPLE_AUX) { OVERFLOW_CHECK_u64(array); sz = *array++; @@ -2565,6 +2579,8 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, "We found oprofile daemon running, please stop it and try again."); break; case EINVAL: + if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size) + return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel."); if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size) return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel."); if (evsel->core.attr.write_backward && perf_missing_features.write_backward) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4f42464daac4..4d7f46c785a1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -126,6 +126,7 @@ struct perf_missing_features { bool branch_hw_idx; bool cgroup; bool data_page_size; + bool code_page_size; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 68188c7c188a..a97a95f3b1be 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), - bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), + bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 99034d644322..e6a552a7882e 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -23,6 +23,7 @@ struct record_opts { bool sample_address; bool sample_phys_addr; bool sample_data_page_size; + bool sample_code_page_size; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 41df28804296..f78edda3986e 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1234,6 +1234,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) result += sizeof(u64); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + result += sizeof(u64); + if (type & PERF_SAMPLE_AUX) { result += sizeof(u64); result += sample->aux_sample.size; @@ -1417,6 +1420,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo array++; } + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) { + *array = sample->code_page_size; + array++; + } + if (type & PERF_SAMPLE_AUX) { sz = sample->aux_sample.size; *array++ = sz; -- Gitee From 9fd9e932b14924a6bd47e3096de29b2d3ec1fc42 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 1 Oct 2020 06:57:49 -0700 Subject: [PATCH 46/77] perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE commit 995f088efebe1eba0282a6ffa12411b37f8990c2 upstream When studying code layout, it is useful to capture the page size of the sampled code address. Add a new sample type for code page size. The new sample type requires collecting the ip. The code page size can be calculated from the NMI-safe perf_get_page_size(). For large PEBS, it's very unlikely that the mapping is gone for the earlier PEBS records. Enable the feature for the large PEBS. The worst case is that page-size '0' is returned. Signed-off-by: Kan Liang Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-5-kan.liang@linux.intel.com Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- arch/x86/events/perf_event.h | 2 +- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 4 +++- kernel/events/core.c | 11 ++++++++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 232db8465cfe..9a377bd754f9 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -134,7 +134,7 @@ struct amd_nb { PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ - PERF_SAMPLE_PERIOD) + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE) #define PEBS_GP_REGS \ ((1ULL << PERF_REG_X86_AX) | \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6ed8f63260cb..78c8abdad2e3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1029,6 +1029,7 @@ struct perf_sample_data { u64 phys_addr; u64 cgroup; u64 data_page_size; + u64 code_page_size; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c413bca7971f..625f5ba89283 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -144,8 +144,9 @@ enum perf_event_sample_format { PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, - PERF_SAMPLE_MAX = 1U << 23, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; @@ -880,6 +881,7 @@ enum perf_event_type { * { u64 size; * char data[size]; } && PERF_SAMPLE_AUX * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE + * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/kernel/events/core.c b/kernel/events/core.c index 3e30058c96ce..b7b3a9a361dd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1767,6 +1767,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) size += sizeof(data->data_page_size); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + size += sizeof(data->code_page_size); + event->header_size = size; } @@ -6707,6 +6710,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) perf_output_put(handle, data->data_page_size); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + perf_output_put(handle, data->code_page_size); + if (sample_type & PERF_SAMPLE_AUX) { perf_output_put(handle, data->aux_size); @@ -6887,7 +6893,7 @@ void perf_prepare_sample(struct perf_event_header *header, __perf_event_header__init_id(header, data, event); - if (sample_type & PERF_SAMPLE_IP) + if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE)) data->ip = perf_instruction_pointer(regs); if (sample_type & PERF_SAMPLE_CALLCHAIN) { @@ -7015,6 +7021,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) data->data_page_size = perf_get_page_size(data->addr); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + data->code_page_size = perf_get_page_size(data->ip); + if (sample_type & PERF_SAMPLE_AUX) { u64 size; -- Gitee From dd3f4bec14b5f6771038bcfc3e9d812dc4ef6d97 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Oct 2020 06:57:47 -0700 Subject: [PATCH 47/77] perf/x86/intel: Support PERF_SAMPLE_DATA_PAGE_SIZE commit 76a5433f95f32d8a17c9f836be2084ed947c466b upstream The new sample type, PERF_SAMPLE_DATA_PAGE_SIZE, requires the virtual address. Update the data->addr if the sample type is set. The large PEBS is disabled with the sample type, because perf doesn't support munmap tracking yet. The PEBS buffer for large PEBS cannot be flushed for each munmap. Wrong page size may be calculated. The large PEBS can be enabled later separately when munmap tracking is supported. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201001135749.2804-3-kan.liang@linux.intel.com Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- arch/x86/events/intel/ds.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7c7319154e1b..09b0c637833c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -963,7 +963,8 @@ static void adaptive_pebs_record_size_update(void) #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ - PERF_SAMPLE_TRANSACTION) + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_DATA_PAGE_SIZE) static u64 pebs_update_adaptive_cfg(struct perf_event *event) { @@ -1339,6 +1340,10 @@ static u64 get_data_src(struct perf_event *event, u64 aux) return val; } +#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_DATA_PAGE_SIZE) + static void setup_pebs_fixed_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -1453,7 +1458,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, } - if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && + if ((sample_type & PERF_SAMPLE_ADDR_TYPE) && x86_pmu.intel_cap.pebs_format >= 1) data->addr = pebs->dla; @@ -1581,7 +1586,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) data->data_src.val = get_data_src(event, meminfo->aux); - if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) + if (sample_type & PERF_SAMPLE_ADDR_TYPE) data->addr = meminfo->address; if (sample_type & PERF_SAMPLE_TRANSACTION) -- Gitee From 15937ac19803b1a2af2742bf2faec70c547398c8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 28 Jan 2021 14:40:07 -0800 Subject: [PATCH 48/77] perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT commit 2a6c6b7d7ad346f0679d0963cb19b3f0ea7ef32c upstream Current PERF_SAMPLE_WEIGHT sample type is very useful to expresses the cost of an action represented by the sample. This allows the profiler to scale the samples to be more informative to the programmer. It could also help to locate a hotspot, e.g., when profiling by memory latencies, the expensive load appear higher up in the histograms. But current PERF_SAMPLE_WEIGHT sample type is solely determined by one factor. This could be a problem, if users want two or more factors to contribute to the weight. For example, Golden Cove core PMU can provide both the instruction latency and the cache Latency information as factors for the memory profiling. For current X86 platforms, although meminfo::latency is defined as a u64, only the lower 32 bits include the valid data in practice (No memory access could last than 4G cycles). The higher 32 bits can be used to store new factors. Add a new sample type, PERF_SAMPLE_WEIGHT_STRUCT, to indicate the new sample weight structure. It shares the same space as the PERF_SAMPLE_WEIGHT sample type. Users can apply either the PERF_SAMPLE_WEIGHT sample type or the PERF_SAMPLE_WEIGHT_STRUCT sample type to retrieve the sample weight, but they cannot apply both sample types simultaneously. Currently, only X86 and PowerPC use the PERF_SAMPLE_WEIGHT sample type. - For PowerPC, there is nothing changed for the PERF_SAMPLE_WEIGHT sample type. There is no effect for the new PERF_SAMPLE_WEIGHT_STRUCT sample type. PowerPC can re-struct the weight field similarly later. - For X86, the same value will be dumped for the PERF_SAMPLE_WEIGHT sample type or the PERF_SAMPLE_WEIGHT_STRUCT sample type for now. The following patches will apply the new factors for the PERF_SAMPLE_WEIGHT_STRUCT sample type. The field in the union perf_sample_weight should be shared among different architectures. A generic name is required, but it's hard to abstract a name that applies to all architectures. For example, on X86, the fields are to store all kinds of latency. While on PowerPC, it stores MMCRA[TECX/TECM], which should not be latency. So a general name prefix 'var$NUM' is used here. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1611873611-156687-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- arch/powerpc/perf/core-book3s.c | 2 +- arch/x86/events/intel/ds.c | 17 ++++++------- include/linux/perf_event.h | 4 ++-- include/uapi/linux/perf_event.h | 42 +++++++++++++++++++++++++++++++-- kernel/events/core.c | 11 +++++---- 5 files changed, 59 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 58de6cec907b..f5f973077c28 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2136,7 +2136,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight); + ppmu->get_mem_weight(&data.weight.full); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 09b0c637833c..0fc8ce368e88 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -962,7 +962,8 @@ static void adaptive_pebs_record_size_update(void) } #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ - PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_WEIGHT_TYPE | \ PERF_SAMPLE_TRANSACTION | \ PERF_SAMPLE_DATA_PAGE_SIZE) @@ -989,7 +990,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && (attr->sample_regs_intr & PEBS_GP_REGS); - tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) && + tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == x86_pmu.rtm_abort_event); @@ -1371,8 +1372,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * Use latency for weight (only avail with PEBS-LL) */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) - data->weight = pebs->lat; + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) + data->weight.full = pebs->lat; /* * data.data_src encodes the data source @@ -1464,8 +1465,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) - data->weight = intel_get_tsx_weight(pebs->tsx_tuning); + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) + data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); if (sample_type & PERF_SAMPLE_TRANSACTION) data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, @@ -1579,8 +1580,8 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } if (format_size & PEBS_DATACFG_MEMINFO) { - if (sample_type & PERF_SAMPLE_WEIGHT) - data->weight = meminfo->latency ?: + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + data->weight.full = meminfo->latency ?: intel_get_tsx_weight(meminfo->tsx_tuning); if (sample_type & PERF_SAMPLE_DATA_SRC) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 78c8abdad2e3..2aa48df4e89f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -992,7 +992,7 @@ struct perf_sample_data { struct perf_raw_record *raw; struct perf_branch_stack *br_stack; u64 period; - u64 weight; + union perf_sample_weight weight; u64 txn; union perf_mem_data_src data_src; @@ -1047,7 +1047,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->raw = NULL; data->br_stack = NULL; data->period = period; - data->weight = 0; + data->weight.full = 0; data->data_src.val = PERF_MEM_NA; data->txn = 0; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 625f5ba89283..81c8bfa18e90 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -145,12 +145,14 @@ enum perf_event_sample_format { PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -872,7 +874,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -1207,4 +1226,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index b7b3a9a361dd..418cbf41c7fd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1746,8 +1746,8 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_PERIOD) size += sizeof(data->period); - if (sample_type & PERF_SAMPLE_WEIGHT) - size += sizeof(data->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + size += sizeof(data->weight.full); if (sample_type & PERF_SAMPLE_READ) size += event->read_size; @@ -6675,8 +6675,8 @@ void perf_output_sample(struct perf_output_handle *handle, data->regs_user.regs); } - if (sample_type & PERF_SAMPLE_WEIGHT) - perf_output_put(handle, data->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + perf_output_put(handle, data->weight.full); if (sample_type & PERF_SAMPLE_DATA_SRC) perf_output_put(handle, data->data_src.val); @@ -11193,6 +11193,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->sample_type & PERF_SAMPLE_CGROUP) return -EINVAL; #endif + if ((attr->sample_type & PERF_SAMPLE_WEIGHT) && + (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) + return -EINVAL; out: return ret; -- Gitee From f889f61d6f37830e994409ab3ec09f37e22f10d0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 2 Feb 2021 12:09:05 -0800 Subject: [PATCH 49/77] tools headers uapi: Update tools's copy of linux/perf_event.h commit 81898ef1303d8fb5a3256b09b3140b4eee83dad8 upstream To get the changes in these csets: 2a6c6b7d7ad346f0 ("perf/core: Add PERF_SAMPLE_WEIGHT_STRUCT") 61b985e3e775a3a7 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") This cures the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Committer notes: Picked by hand as I had already merged the MMAP buildid patch that also touches perf_event.h and is also only in {acme,tip}/perf/core, not yet upstream. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/1612296553-21962-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/include/uapi/linux/perf_event.h | 54 +++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 74bf68b3f16b..d4d99f945d13 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -145,12 +145,14 @@ enum perf_event_sample_format { PERF_SAMPLE_CGROUP = 1U << 21, PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -872,7 +874,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -1086,14 +1105,16 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ - mem_rsvd:24; + mem_blk:3, /* access blocked */ + mem_rsvd:21; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:24, + __u64 mem_rsvd:21, + mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1176,6 +1197,12 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1207,4 +1234,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ -- Gitee From 7ba16fb99f34b5c0d1a847c6a845bb5e77aa611a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 2 Feb 2021 12:09:09 -0800 Subject: [PATCH 50/77] perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT commit ea8d0ed6eae37b01953a29bca98112d9e2507a84 upstream The new sample type, PERF_SAMPLE_WEIGHT_STRUCT, is an alternative of the PERF_SAMPLE_WEIGHT sample type. Users can apply either the PERF_SAMPLE_WEIGHT sample type or the PERF_SAMPLE_WEIGHT_STRUCT sample type to retrieve the sample weight, but they cannot apply both sample types simultaneously. The new sample type shares the same space as the PERF_SAMPLE_WEIGHT sample type. The lower 32 bits are exactly the same for both sample type. The higher 32 bits may be different for different architecture. Add arch specific arch_evsel__set_sample_weight() to set the new sample type for X86. Only store the lower 32 bits for the sample->weight if the new sample type is applied. In practice, no memory access could last than 4G cycles. No data will be lost. If the kernel doesn't support the new sample type. Fall back to the PERF_SAMPLE_WEIGHT sample type. There is no impact for other architectures. Committer notes: Fixup related to PERF_SAMPLE_CODE_PAGE_SIZE, present in acme/perf/core but not upstream yet. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/1612296553-21962-6-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/arch/x86/util/Build | 1 + tools/perf/arch/x86/util/evsel.c | 8 +++++++ tools/perf/util/evsel.c | 28 +++++++++++++++++++---- tools/perf/util/evsel.h | 3 +++ tools/perf/util/intel-pt.c | 22 +++++++++++++++--- tools/perf/util/perf_event_attr_fprintf.c | 1 + tools/perf/util/session.c | 2 +- tools/perf/util/synthetic-events.c | 6 +++-- 8 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 tools/perf/arch/x86/util/evsel.c diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 47f9c56e744f..1504b356f827 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -6,6 +6,7 @@ perf-y += perf_regs.o perf-y += group.o perf-y += machine.o perf-y += event.o +perf-y += evsel.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c new file mode 100644 index 000000000000..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/x86/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0f08f9577d4c..a1425c23ef06 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -925,6 +925,11 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel return found_term; } +void __weak arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT); +} + /* * The enable_on_exec/disabled value strategy: * @@ -1092,7 +1097,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_weight) - evsel__set_sample_bit(evsel, WEIGHT); + arch_evsel__set_sample_weight(evsel); attr->task = track; attr->mmap = track; @@ -1654,6 +1659,10 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.weight_struct) { + evsel__set_sample_bit(evsel, WEIGHT); + evsel__reset_sample_bit(evsel, WEIGHT_STRUCT); + } if (perf_missing_features.clockid_wrong) evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */ if (perf_missing_features.clockid) { @@ -1788,7 +1797,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.code_page_size && + if (!perf_missing_features.weight_struct && + (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.code_page_size && (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { perf_missing_features.code_page_size = true; pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); @@ -2220,9 +2234,15 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, } } - if (type & PERF_SAMPLE_WEIGHT) { + if (type & PERF_SAMPLE_WEIGHT_TYPE) { + union perf_sample_weight weight; + OVERFLOW_CHECK_u64(array); - data->weight = *array; + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else + data->weight = weight.var1_dw; array++; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4d7f46c785a1..9b71dcd74dee 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -127,6 +127,7 @@ struct perf_missing_features { bool cgroup; bool data_page_size; bool code_page_size; + bool weight_struct; }; extern struct perf_missing_features perf_missing_features; @@ -230,6 +231,8 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter); int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter); +void arch_evsel__set_sample_weight(struct evsel *evsel); + int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a76d9886dbf2..bdb4b2586a7d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1803,13 +1803,29 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) sample.addr = items->mem_access_address; - if (sample_type & PERF_SAMPLE_WEIGHT) { + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { /* * Refer kernel's setup_pebs_adaptive_sample_data() and * intel_hsw_weight(). */ - if (items->has_mem_access_latency) - sample.weight = items->mem_access_latency; + if (items->has_mem_access_latency) { + u64 weight = items->mem_access_latency >> 32; + + /* + * Starts from SPR, the mem access latency field + * contains both cache latency [47:32] and instruction + * latency [15:0]. The cache latency is the same as the + * mem access latency on previous platforms. + * + * In practice, no memory access could last than 4G + * cycles. Use latency >> 32 to distinguish the + * different format of the mem access latency field. + */ + if (weight > 0) + sample.weight = weight & 0xffff; + else + sample.weight = items->mem_access_latency; + } if (!sample.weight && items->has_tsx_aux_info) { /* Cycles last block */ sample.weight = (u32)items->tsx_aux_info; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index a97a95f3b1be..533377743dbc 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -36,6 +36,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX), bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE), + bit_name(WEIGHT_STRUCT), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f163e408353a..e8a4516070b3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1250,7 +1250,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); - if (sample_type & PERF_SAMPLE_WEIGHT) + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) printf("... weight: %" PRIu64 "\n", sample->weight); if (sample_type & PERF_SAMPLE_DATA_SRC) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index f78edda3986e..2c0f7ec3f8b7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1206,7 +1206,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } } - if (type & PERF_SAMPLE_WEIGHT) + if (type & PERF_SAMPLE_WEIGHT_TYPE) result += sizeof(u64); if (type & PERF_SAMPLE_DATA_SRC) @@ -1379,8 +1379,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } } - if (type & PERF_SAMPLE_WEIGHT) { + if (type & PERF_SAMPLE_WEIGHT_TYPE) { *array = sample->weight; + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + *array &= 0xffffffff; array++; } -- Gitee From c50ec34fedf19fd6755319cd37162a57c48ede19 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 May 2020 13:27:04 -0300 Subject: [PATCH 51/77] perf evsel: Rename perf_evsel__new*() to evsel__new*() commit 8f6725a2c95c8b8719a95e72d68698b68401980f upstream As these are 'struct evsel' methods, not part of tools/lib/perf/, aka libperf, to whom the perf_ prefix belongs. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/builtin-trace.c | 10 +++++----- tools/perf/tests/evsel-tp-sched.c | 8 ++++---- tools/perf/tests/mmap-basic.c | 4 ++-- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 4 ++-- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/util/evlist.c | 8 ++++---- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 12 ++++++------ tools/perf/util/parse-events.c | 5 ++--- tools/perf/util/sideband_evlist.c | 2 +- 12 files changed, 32 insertions(+), 33 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d196b287a801..6e2d13c5c950 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -394,11 +394,11 @@ static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) { - struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); + struct evsel *evsel = evsel__newtp("raw_syscalls", direction); /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ if (IS_ERR(evsel)) - evsel = perf_evsel__newtp("syscalls", direction); + evsel = evsel__newtp("syscalls", direction); if (IS_ERR(evsel)) return NULL; @@ -2706,7 +2706,7 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) return found; } -static struct evsel *perf_evsel__new_pgfault(u64 config) +static struct evsel *evsel__new_pgfault(u64 config) { struct evsel *evsel; struct perf_event_attr attr = { @@ -3333,7 +3333,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if ((trace->trace_pgfaults & TRACE_PFMAJ)) { - pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); + pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); if (pgfault_maj == NULL) goto out_error_mem; perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); @@ -3341,7 +3341,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if ((trace->trace_pgfaults & TRACE_PFMIN)) { - pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); + pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); if (pgfault_min == NULL) goto out_error_mem; perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 261e6eaaee99..3805e5a74e17 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -35,11 +35,11 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name, int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) { - struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); + struct evsel *evsel = evsel__newtp("sched", "sched_switch"); int ret = 0; if (IS_ERR(evsel)) { - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } @@ -66,10 +66,10 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes evsel__delete(evsel); - evsel = perf_evsel__newtp("sched", "sched_wakeup"); + evsel = evsel__newtp("sched", "sched_wakeup"); if (IS_ERR(evsel)) { - pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel)); + pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); return -1; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index e9d530d7e7de..3fcd6e56ecb3 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -79,9 +79,9 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse char name[64]; snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); - evsels[i] = perf_evsel__newtp("syscalls", name); + evsels[i] = evsel__newtp("syscalls", name); if (IS_ERR(evsels[i])) { - pr_debug("perf_evsel__new(%s)\n", name); + pr_debug("evsel__new(%s)\n", name); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 93c176523e38..e6013b99d2c7 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -44,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int CPU_ZERO(&cpu_set); - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index c6b2d7aab608..d6b3ddf0c9ff 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -46,9 +46,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { - pr_debug("%s: perf_evsel__newtp\n", __func__); + pr_debug("%s: evsel__newtp\n", __func__); goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 5ebffae18605..7e9e57a1275b 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -27,7 +27,7 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m return -1; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); + evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index bfb9986093d8..4b9b731977c8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -56,7 +56,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) evsel = evsel__new(&attr); if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + pr_debug("evsel__new\n"); goto out_delete_evlist; } evlist__add(evlist, evsel); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 03a066ce1f6d..92a94b52e51c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -213,7 +213,7 @@ void perf_evlist__set_leader(struct evlist *evlist) int __perf_evlist__add_default(struct evlist *evlist, bool precise) { - struct evsel *evsel = perf_evsel__new_cycles(precise); + struct evsel *evsel = evsel__new_cycles(precise); if (evsel == NULL) return -ENOMEM; @@ -229,7 +229,7 @@ int perf_evlist__add_dummy(struct evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); + struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); if (evsel == NULL) return -ENOMEM; @@ -246,7 +246,7 @@ static int evlist__add_attrs(struct evlist *evlist, size_t i; for (i = 0; i < nr_attrs; i++) { - evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); + evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); if (evsel == NULL) goto out_delete_partial_list; list_add_tail(&evsel->core.node, &head); @@ -305,7 +305,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, int perf_evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { - struct evsel *evsel = perf_evsel__newtp(sys, name); + struct evsel *evsel = evsel__newtp(sys, name); if (IS_ERR(evsel)) return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a1425c23ef06..cdd8738e9cdf 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -259,7 +259,7 @@ void evsel__init(struct evsel *evsel, evsel->pmu_name = NULL; } -struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) +struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); @@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *perf_evsel__new_cycles(bool precise) +struct evsel *evsel__new_cycles(bool precise) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, @@ -334,7 +334,7 @@ struct evsel *perf_evsel__new_cycles(bool precise) /* * Returns pointer with encoded error via interface. */ -struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) { struct evsel *evsel = zalloc(perf_evsel__object.size); int err = -ENOMEM; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9b71dcd74dee..4aa78039cef7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -160,24 +160,24 @@ int perf_evsel__object_config(size_t object_size, struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel); bool perf_evsel__is_aux_event(struct evsel *evsel); -struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); +struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); static inline struct evsel *evsel__new(struct perf_event_attr *attr) { - return perf_evsel__new_idx(attr, 0); + return evsel__new_idx(attr, 0); } -struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx); +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); /* * Returns pointer with encoded error via interface. */ -static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name) +static inline struct evsel *evsel__newtp(const char *sys, const char *name) { - return perf_evsel__newtp_idx(sys, name, 0); + return evsel__newtp_idx(sys, name, 0); } -struct evsel *perf_evsel__new_cycles(bool precise); +struct evsel *evsel__new_cycles(bool precise); struct tep_event *event_format__new(const char *sys, const char *name); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e52056fcc7f4..378700519613 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -343,7 +343,7 @@ __add_event(struct list_head *list, int *idx, event_attr_init(attr); - evsel = perf_evsel__new_idx(attr, *idx); + evsel = evsel__new_idx(attr, *idx); if (!evsel) return NULL; @@ -526,9 +526,8 @@ static int add_tracepoint(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { - struct evsel *evsel; + struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++); - evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++); if (IS_ERR(evsel)) { tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); return PTR_ERR(evsel); diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c index c51a8dfce08b..4d01b4f80b3c 100644 --- a/tools/perf/util/sideband_evlist.c +++ b/tools/perf/util/sideband_evlist.c @@ -22,7 +22,7 @@ int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *att attr->sample_id_all = 1; } - evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); + evsel = evsel__new_idx(attr, evlist->core.nr_entries); if (!evsel) return -1; -- Gitee From b23b41b23e2701bc135d6681bcf029305a483d4f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Apr 2020 10:36:15 -0700 Subject: [PATCH 52/77] perf record: Add dummy event during system wide synthesis commit 0a892c1c947230f9373c9a3c94df1babe989861f upstream During the processing of /proc during event synthesis new processes may start. Add a dummy event if /proc is to be processed, to capture mmaps for starting processes. This reuses the existing logic for initial-delay. v3 fixes the attr test of test-record-C0 v2 fixes the dummy event configuration and a branch stack issue. Suggested-by: Stephane Eranian Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200422173615.59436-1-irogers@google.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 19 +++++++--- tools/perf/tests/attr/system-wide-dummy | 50 +++++++++++++++++++++++++ tools/perf/tests/attr/test-record-C0 | 12 +++++- 3 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 tools/perf/tests/attr/system-wide-dummy diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1d9c1dc20789..e2994c3a8f2c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -757,19 +757,28 @@ static int record__open(struct record *rec) int rc = 0; /* - * For initial_delay we need to add a dummy event so that we can track - * PERF_RECORD_MMAP while we wait for the initial delay to enable the - * real events, the ones asked by the user. + * For initial_delay or system wide, we need to add a dummy event so + * that we can track PERF_RECORD_MMAP to cover the delay of waiting or + * event synthesis. */ - if (opts->initial_delay) { + if (opts->initial_delay || target__has_cpu(&opts->target)) { if (perf_evlist__add_dummy(evlist)) return -ENOMEM; + /* Disable tracking of mmaps on lead event. */ pos = evlist__first(evlist); pos->tracking = 0; + /* Set up dummy event. */ pos = evlist__last(evlist); pos->tracking = 1; - pos->core.attr.enable_on_exec = 1; + /* + * Enable the dummy event when the process is forked for + * initial_delay, immediately for system wide. + */ + if (opts->initial_delay) + pos->core.attr.enable_on_exec = 1; + else + pos->immediate = 1; } perf_evlist__config(evlist, opts, &callchain_param); diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy new file mode 100644 index 000000000000..eba723cc0d38 --- /dev/null +++ b/tools/perf/tests/attr/system-wide-dummy @@ -0,0 +1,50 @@ +# Event added by system-wide or CPU perf-record to handle the race of +# processes starting while /proc is processed. +[event] +fd=1 +group_fd=-1 +cpu=* +pid=-1 +flags=8 +type=1 +size=120 +config=9 +sample_period=4000 +sample_type=455 +read_format=4 +# Event will be enabled right away. +disabled=0 +inherit=1 +pinned=0 +exclusive=0 +exclude_user=0 +exclude_kernel=0 +exclude_hv=0 +exclude_idle=0 +mmap=1 +comm=1 +freq=1 +inherit_stat=0 +enable_on_exec=0 +task=1 +watermark=0 +precise_ip=0 +mmap_data=0 +sample_id_all=1 +exclude_host=0 +exclude_guest=0 +exclude_callchain_kernel=0 +exclude_callchain_user=0 +mmap2=1 +comm_exec=1 +context_switch=0 +write_backward=0 +namespaces=0 +use_clockid=0 +wakeup_events=0 +bp_type=0 +config1=0 +config2=0 +branch_sample_type=0 +sample_regs_user=0 +sample_stack_user=0 diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0 index 93818054ae20..317730b906dd 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/attr/test-record-C0 @@ -9,6 +9,14 @@ cpu=0 # no enable on exec for CPU attached enable_on_exec=0 -# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD +# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | +# PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD # + PERF_SAMPLE_CPU added by -C 0 -sample_type=391 +sample_type=455 + +# Dummy event handles mmaps, comm and task. +mmap=0 +comm=0 +task=0 + +[event:system-wide-dummy] -- Gitee From 6954039342a91e6fcdb180f0a6274e3e92f2585d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Jun 2020 09:16:20 -0300 Subject: [PATCH 53/77] perf evlist: Fix the class prefix for 'struct evlist' 'add' evsel methods commit e251abee87cf9c49a2ec1b143bd71f92b71557c1 upstream To differentiate from libperf's 'struct perf_evlist' methods. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-stat.c | 16 +++++++--------- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 3 +-- tools/perf/util/evlist.c | 17 +++++++---------- tools/perf/util/evlist.h | 17 ++++++++--------- 7 files changed, 27 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 20366ba14de1..a7ee11cd57c0 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1320,7 +1320,7 @@ static struct evlist *kvm_live_event_list(void) *name = '\0'; name++; - if (perf_evlist__add_newtp(evlist, sys, name, NULL)) { + if (evlist__add_newtp(evlist, sys, name, NULL)) { pr_err("Failed to add %s tracepoint to the list\n", *events_tp); free(tp); goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e2994c3a8f2c..047b30ca33b4 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -762,7 +762,7 @@ static int record__open(struct record *rec) * event synthesis. */ if (opts->initial_delay || target__has_cpu(&opts->target)) { - if (perf_evlist__add_dummy(evlist)) + if (evlist__add_dummy(evlist)) return -ENOMEM; /* Disable tracking of mmaps on lead event. */ @@ -2466,7 +2466,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0 && - __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { + __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { pr_err("Not enough memory for event selector list\n"); goto out; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ea183922c4ef..b5a82969dab4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1362,19 +1362,17 @@ static int add_default_attributes(void) if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; - if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { - if (perf_evlist__add_default_attrs(evsel_list, - frontend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) return -1; } if (pmu_have_event("cpu", "stalled-cycles-backend")) { - if (perf_evlist__add_default_attrs(evsel_list, - backend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) return -1; } - if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1384,21 +1382,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } static const char * const stat_record_usage[] = { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 8bd526b8c143..0fec22232daf 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1568,7 +1568,7 @@ int cmd_top(int argc, const char **argv) usage_with_options(top_usage, options); if (!top.evlist->core.nr_entries && - perf_evlist__add_default(top.evlist) < 0) { + evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); goto out_delete_evlist; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6e2d13c5c950..1f40c683bba8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3349,8 +3349,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) + evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; /* diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 92a94b52e51c..bda41564d406 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -79,7 +79,7 @@ struct evlist *perf_evlist__new_default(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_default(evlist)) { + if (evlist && evlist__add_default(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -91,7 +91,7 @@ struct evlist *perf_evlist__new_dummy(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_dummy(evlist)) { + if (evlist && evlist__add_dummy(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -211,7 +211,7 @@ void perf_evlist__set_leader(struct evlist *evlist) } } -int __perf_evlist__add_default(struct evlist *evlist, bool precise) +int __evlist__add_default(struct evlist *evlist, bool precise) { struct evsel *evsel = evsel__new_cycles(precise); @@ -222,7 +222,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int perf_evlist__add_dummy(struct evlist *evlist) +int evlist__add_dummy(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, @@ -238,8 +238,7 @@ int perf_evlist__add_dummy(struct evlist *evlist) return 0; } -static int evlist__add_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; LIST_HEAD(head); @@ -262,8 +261,7 @@ static int evlist__add_attrs(struct evlist *evlist, return -1; } -int __perf_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { size_t i; @@ -302,8 +300,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, return NULL; } -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler) +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { struct evsel *evsel = evsel__newtp(sys, name); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b791f6473097..e98644a65498 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -92,20 +92,20 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int __perf_evlist__add_default(struct evlist *evlist, bool precise); +int __evlist__add_default(struct evlist *evlist, bool precise); -static inline int perf_evlist__add_default(struct evlist *evlist) +static inline int evlist__add_default(struct evlist *evlist) { - return __perf_evlist__add_default(evlist, true); + return __evlist__add_default(evlist, true); } -int __perf_evlist__add_default_attrs(struct evlist *evlist, +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); -#define perf_evlist__add_default_attrs(evlist, array) \ - __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +#define evlist__add_default_attrs(evlist, array) \ + __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) -int perf_evlist__add_dummy(struct evlist *evlist); +int evlist__add_dummy(struct evlist *evlist); int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, @@ -116,8 +116,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler); +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); void __perf_evlist__set_sample_bit(struct evlist *evlist, enum perf_event_sample_format bit); -- Gitee From b2ce37159ff4ef90bb1344a355c8d870bf9b0627 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 4 Dec 2020 19:10:09 +0800 Subject: [PATCH 54/77] perf pmu: Add pmu_id() commit 51d548471510843e56d9f427aa6473ca0981c4a4 upstream Add a function to read the PMU id sysfs entry. This is only done for uncore PMUs where this would possibly be relevant. Signed-off-by: John Garry Acked-by: Kajol Jain Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1607080216-36968-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/pmu.c | 18 ++++++++++++++++++ tools/perf/util/pmu.h | 1 + 2 files changed, 19 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 5f1ece607471..95a4c824b0f3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -593,6 +593,7 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path) * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ +#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" @@ -634,6 +635,21 @@ static bool pmu_is_uncore(const char *name) return !!cpus; } +static char *pmu_id(const char *name) +{ + char path[PATH_MAX], *str; + size_t len; + + snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name); + + if (sysfs__read_str(path, &str, &len) < 0) + return NULL; + + str[len - 1] = 0; /* remove line feed */ + + return str; +} + /* * PMU CORE devices have different name other than cpu in sysfs on some * platforms. @@ -843,6 +859,8 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->name = strdup(name); pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + if (pmu->is_uncore) + pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d5e87aa9fd5b..9f843c762243 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -23,6 +23,7 @@ struct perf_event_attr; struct perf_pmu { char *name; + char *id; __u32 type; bool selectable; bool is_uncore; -- Gitee From df415bd4d2688cfd78ca831c1542077b50708bd7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:11 -0800 Subject: [PATCH 55/77] perf pmu: Use file system cache to optimize sysfs access commit d96645821e940bddff3fc5290656f83bf70d4c92 upstream pmu.c does a lot of redundant /sys accesses while parsing aliases and probing for PMUs. On large systems with a lot of PMUs this can get expensive (>2s): % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 27.25 1.227847 8 160888 16976 openat 26.42 1.190481 7 164224 164077 stat Add a cache to remember if specific file names exist or don't exist, which eliminates most of this overhead. Also optimize some stat() calls to be slightly cheaper access() Resulting in: 0.18 0.004166 2 1851 305 open 0.08 0.001970 2 829 622 access Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/Build | 1 + tools/perf/util/fncache.c | 63 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/fncache.h | 7 +++++ tools/perf/util/pmu.c | 34 +++++++-------------- tools/perf/util/srccode.c | 9 +----- 5 files changed, 83 insertions(+), 31 deletions(-) create mode 100644 tools/perf/util/fncache.c create mode 100644 tools/perf/util/fncache.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index cf6b6be27e86..3df3a5089d8e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -49,6 +49,7 @@ perf-y += header.o perf-y += callchain.o perf-y += values.o perf-y += debug.o +perf-y += fncache.o perf-y += machine.o perf-y += map.o perf-y += pstack.o diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c new file mode 100644 index 000000000000..6225cbc52310 --- /dev/null +++ b/tools/perf/util/fncache.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Manage a cache of file names' existence */ +#include +#include +#include +#include +#include "fncache.h" + +struct fncache { + struct hlist_node nd; + bool res; + char name[]; +}; + +#define FNHSIZE 61 + +static struct hlist_head fncache_hash[FNHSIZE]; + +unsigned shash(const unsigned char *s) +{ + unsigned h = 0; + while (*s) + h = 65599 * h + *s++; + return h ^ (h >> 16); +} + +static bool lookup_fncache(const char *name, bool *res) +{ + int h = shash((const unsigned char *)name) % FNHSIZE; + struct fncache *n; + + hlist_for_each_entry(n, &fncache_hash[h], nd) { + if (!strcmp(n->name, name)) { + *res = n->res; + return true; + } + } + return false; +} + +static void update_fncache(const char *name, bool res) +{ + struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); + int h = shash((const unsigned char *)name) % FNHSIZE; + + if (!n) + return; + strcpy(n->name, name); + n->res = res; + hlist_add_head(&n->nd, &fncache_hash[h]); +} + +/* No LRU, only use when bounded in some other way. */ +bool file_available(const char *name) +{ + bool res; + + if (lookup_fncache(name, &res)) + return res; + res = access(name, R_OK) == 0; + update_fncache(name, res); + return res; +} diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h new file mode 100644 index 000000000000..fe020beaefb1 --- /dev/null +++ b/tools/perf/util/fncache.h @@ -0,0 +1,7 @@ +#ifndef _FCACHE_H +#define _FCACHE_H 1 + +unsigned shash(const unsigned char *s); +bool file_available(const char *name); + +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 95a4c824b0f3..594d8f788bb0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -25,6 +25,7 @@ #include "pmu-events/pmu-events.h" #include "string2.h" #include "strbuf.h" +#include "fncache.h" struct perf_pmu_format { char *name; @@ -83,7 +84,6 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) */ static int pmu_format(const char *name, struct list_head *format) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -93,8 +93,8 @@ static int pmu_format(const char *name, struct list_head *format) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (stat(path, &st) < 0) - return 0; /* no error if format does not exist */ + if (!file_available(path)) + return 0; if (perf_pmu__format_parse(path, format)) return -1; @@ -471,7 +471,6 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) */ static int pmu_aliases(const char *name, struct list_head *head) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -481,8 +480,8 @@ static int pmu_aliases(const char *name, struct list_head *head) snprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events", sysfs, name); - if (stat(path, &st) < 0) - return 0; /* no error if 'events' does not exist */ + if (!file_available(path)) + return 0; if (pmu_aliases_parse(path, head)) return -1; @@ -521,7 +520,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, */ static int pmu_type(const char *name, __u32 *type) { - struct stat st; char path[PATH_MAX]; FILE *file; int ret = 0; @@ -533,7 +531,7 @@ static int pmu_type(const char *name, __u32 *type) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - if (stat(path, &st) < 0) + if (access(path, R_OK) < 0) return -1; file = fopen(path, "r"); @@ -625,14 +623,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name) static bool pmu_is_uncore(const char *name) { char path[PATH_MAX]; - struct perf_cpu_map *cpus; - const char *sysfs = sysfs__mountpoint(); + const char *sysfs; + sysfs = sysfs__mountpoint(); snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); - cpus = __pmu_cpumask(path); - perf_cpu_map__put(cpus); - - return !!cpus; + return file_available(path); } static char *pmu_id(const char *name) @@ -657,7 +652,6 @@ static char *pmu_id(const char *name) */ static int is_arm_pmu_core(const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -667,10 +661,7 @@ static int is_arm_pmu_core(const char *name) /* Look for cpu sysfs (specific to arm) */ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", sysfs, name); - if (stat(path, &st) == 0) - return 1; - - return 0; + return file_available(path); } static char *perf_pmu__getcpuid(struct perf_pmu *pmu) @@ -1584,7 +1575,6 @@ bool pmu_have_event(const char *pname, const char *name) static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { - struct stat st; char path[PATH_MAX]; const char *sysfs; @@ -1594,10 +1584,8 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - - if (stat(path, &st) < 0) + if (!file_available(path)) return NULL; - return fopen(path, "r"); } diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index d84ed8b6caaa..c29edaaca863 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,6 +16,7 @@ #include "srccode.h" #include "debug.h" #include // page_size +#include "fncache.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list); static long map_total_sz; static int num_srcfiles; -static unsigned shash(unsigned char *s) -{ - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); -} - static int countlines(char *map, int maplen) { int numl; -- Gitee From 8bea7659aa5556b7faa98a67797134de32248b8b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:18 +0800 Subject: [PATCH 56/77] perf pmu: Save detected hybrid pmus to a global pmu list commit 444624307c4e06d35de12df1cfe08a4964ac086f upstream We identify the cpu_core pmu and cpu_atom pmu by explicitly checking following files: For cpu_core, checks: "/sys/bus/event_source/devices/cpu_core/cpus" For cpu_atom, checks: "/sys/bus/event_source/devices/cpu_atom/cpus" If the 'cpus' file exists and it has data, the pmu exists. But in order not to hardcode the "cpu_core" and "cpu_atom", and make the code in a generic way. So if the path "/sys/bus/event_source/devices/cpu_xxx/cpus" exists, the hybrid pmu exists. All the detected hybrid pmus are linked to a global list 'perf_pmu__hybrid_pmus' and then next we just need to iterate the list to get all hybrid pmu by using perf_pmu__for_each_hybrid_pmu. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/Build | 1 + tools/perf/util/pmu-hybrid.c | 49 ++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu-hybrid.h | 18 +++++++++++++ tools/perf/util/pmu.c | 9 ++++++- tools/perf/util/pmu.h | 4 +++ 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/pmu-hybrid.c create mode 100644 tools/perf/util/pmu-hybrid.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 3df3a5089d8e..90c68a6aed1d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -68,6 +68,7 @@ perf-y += parse-events-bison.o perf-y += pmu.o perf-y += pmu-flex.o perf-y += pmu-bison.o +perf-y += pmu-hybrid.o perf-y += trace-event-read.o perf-y += trace-event-info.o perf-y += trace-event-scripting.o diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c new file mode 100644 index 000000000000..8ed0e6e1776d --- /dev/null +++ b/tools/perf/util/pmu-hybrid.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fncache.h" +#include "pmu-hybrid.h" + +LIST_HEAD(perf_pmu__hybrid_pmus); + +bool perf_pmu__hybrid_mounted(const char *name) +{ + char path[PATH_MAX]; + const char *sysfs; + FILE *file; + int n, cpu; + + if (strncmp(name, "cpu_", 4)) + return false; + + sysfs = sysfs__mountpoint(); + if (!sysfs) + return false; + + snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name); + if (!file_available(path)) + return false; + + file = fopen(path, "r"); + if (!file) + return false; + + n = fscanf(file, "%u", &cpu); + fclose(file); + if (n <= 0) + return false; + + return true; +} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h new file mode 100644 index 000000000000..35bed3714438 --- /dev/null +++ b/tools/perf/util/pmu-hybrid.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PMU_HYBRID_H +#define __PMU_HYBRID_H + +#include +#include +#include +#include +#include "pmu.h" + +extern struct list_head perf_pmu__hybrid_pmus; + +#define perf_pmu__for_each_hybrid_pmu(pmu) \ + list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list) + +bool perf_pmu__hybrid_mounted(const char *name); + +#endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 594d8f788bb0..d33c2cd6f17e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -26,6 +26,7 @@ #include "string2.h" #include "strbuf.h" #include "fncache.h" +#include "pmu-hybrid.h" struct perf_pmu_format { char *name; @@ -593,7 +594,6 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path) */ #define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" -#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" static struct perf_cpu_map *pmu_cpumask(const char *name) { @@ -625,6 +625,9 @@ static bool pmu_is_uncore(const char *name) char path[PATH_MAX]; const char *sysfs; + if (perf_pmu__hybrid_mounted(name)) + return false; + sysfs = sysfs__mountpoint(); snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); return file_available(path); @@ -852,6 +855,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); + pmu->is_hybrid = perf_pmu__hybrid_mounted(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); @@ -861,6 +865,9 @@ static struct perf_pmu *pmu_lookup(const char *name) list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); + if (pmu->is_hybrid) + list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); + pmu->default_config = perf_pmu__get_default_config(pmu); return pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 9f843c762243..93a90033e43d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "parse-events.h" @@ -18,6 +19,7 @@ enum { #define PERF_PMU_FORMAT_BITS 64 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" +#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" struct perf_event_attr; @@ -27,6 +29,7 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; + bool is_hybrid; bool auxtrace; int max_precise; struct perf_event_attr *default_config; @@ -34,6 +37,7 @@ struct perf_pmu { struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ struct list_head list; /* ELEM */ + struct list_head hybrid_list; }; struct perf_pmu_info { -- Gitee From b938636cf63a02dfa4eb08d2e7d4b7d409fbbe51 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 19 Mar 2020 13:25:01 -0700 Subject: [PATCH 57/77] perf pmu: Add support for PMU capabilities commit 9fbc61f832ebf432326a90e28184dade05ee34a8 upstream The PMU capabilities information, which is located at /sys/bus/event_source/devices//caps, is required by perf tool. For example, the max LBR information is required to stitch LBR call stack. Add perf_pmu__caps_parse() to parse the PMU capabilities information. The information is stored in a list. The following patch will store the capabilities information in perf header. Committer notes: Here's an example of such directories and its files in an i5 7th gen machine: [root@seventh ~]# ls -lad /sys/bus/event_source/devices/*/caps drwxr-xr-x. 2 root root 0 Apr 14 13:33 /sys/bus/event_source/devices/cpu/caps drwxr-xr-x. 2 root root 0 Apr 14 13:33 /sys/bus/event_source/devices/intel_pt/caps [root@seventh ~]# ls -la /sys/bus/event_source/devices/intel_pt/caps total 0 drwxr-xr-x. 2 root root 0 Apr 14 13:33 . drwxr-xr-x. 5 root root 0 Apr 14 13:12 .. -r--r--r--. 1 root root 4096 Apr 16 13:10 cr3_filtering -r--r--r--. 1 root root 4096 Apr 16 11:42 cycle_thresholds -r--r--r--. 1 root root 4096 Apr 16 13:10 ip_filtering -r--r--r--. 1 root root 4096 Apr 16 13:10 max_subleaf -r--r--r--. 1 root root 4096 Apr 14 13:33 mtc -r--r--r--. 1 root root 4096 Apr 14 13:33 mtc_periods -r--r--r--. 1 root root 4096 Apr 16 13:10 num_address_ranges -r--r--r--. 1 root root 4096 Apr 16 13:10 output_subsys -r--r--r--. 1 root root 4096 Apr 16 13:10 payloads_lip -r--r--r--. 1 root root 4096 Apr 16 13:10 power_event_trace -r--r--r--. 1 root root 4096 Apr 14 13:33 psb_cyc -r--r--r--. 1 root root 4096 Apr 14 13:33 psb_periods -r--r--r--. 1 root root 4096 Apr 16 13:10 ptwrite -r--r--r--. 1 root root 4096 Apr 16 13:10 single_range_output -r--r--r--. 1 root root 4096 Apr 16 12:03 topa_multiple_entries -r--r--r--. 1 root root 4096 Apr 16 13:10 topa_output [root@seventh ~]# cat /sys/bus/event_source/devices/intel_pt/caps/topa_output 1 [root@seventh ~]# cat /sys/bus/event_source/devices/intel_pt/caps/topa_multiple_entries 1 [root@seventh ~]# cat /sys/bus/event_source/devices/intel_pt/caps/mtc 1 [root@seventh ~]# cat /sys/bus/event_source/devices/intel_pt/caps/power_event_trace 0 [root@seventh ~]# [root@seventh ~]# ls -la /sys/bus/event_source/devices/cpu/caps/ total 0 drwxr-xr-x. 2 root root 0 Apr 14 13:33 . drwxr-xr-x. 6 root root 0 Apr 14 13:12 .. -r--r--r--. 1 root root 4096 Apr 16 13:10 branches -r--r--r--. 1 root root 4096 Apr 14 13:33 max_precise -r--r--r--. 1 root root 4096 Apr 16 13:10 pmu_name [root@seventh ~]# cat /sys/bus/event_source/devices/cpu/caps/max_precise 3 [root@seventh ~]# cat /sys/bus/event_source/devices/cpu/caps/branches 32 [root@seventh ~]# cat /sys/bus/event_source/devices/cpu/caps/pmu_name skylake [root@seventh ~]# Wow, first time I've heard about /sys/bus/event_source/devices/cpu/caps/max_precise, I think I'll use it! :-) Signed-off-by: Kan Liang Reviewed-by: Andi Kleen Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexey Budankov Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200319202517.23423-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/pmu.c | 82 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 9 +++++ 2 files changed, 91 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d33c2cd6f17e..a68b79de5268 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -861,6 +861,7 @@ static struct perf_pmu *pmu_lookup(const char *name) INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); + INIT_LIST_HEAD(&pmu->caps); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); @@ -1612,3 +1613,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, va_end(args); return ret; } + +static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) +{ + struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); + + if (!caps) + return -ENOMEM; + + caps->name = strdup(name); + if (!caps->name) + goto free_caps; + caps->value = strndup(value, strlen(value) - 1); + if (!caps->value) + goto free_name; + list_add_tail(&caps->list, list); + return 0; + +free_name: + zfree(caps->name); +free_caps: + free(caps); + + return -ENOMEM; +} + +/* + * Reading/parsing the given pmu capabilities, which should be located at: + * /sys/bus/event_source/devices//caps as sysfs group attributes. + * Return the number of capabilities + */ +int perf_pmu__caps_parse(struct perf_pmu *pmu) +{ + struct stat st; + char caps_path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + DIR *caps_dir; + struct dirent *evt_ent; + int nr_caps = 0; + + if (!sysfs) + return -1; + + snprintf(caps_path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); + + if (stat(caps_path, &st) < 0) + return 0; /* no error if caps does not exist */ + + caps_dir = opendir(caps_path); + if (!caps_dir) + return -EINVAL; + + while ((evt_ent = readdir(caps_dir)) != NULL) { + char path[PATH_MAX + NAME_MAX + 1]; + char *name = evt_ent->d_name; + char value[128]; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", caps_path, name); + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(value, sizeof(value), file) || + (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) { + fclose(file); + continue; + } + + nr_caps++; + fclose(file); + } + + closedir(caps_dir); + + return nr_caps; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 93a90033e43d..d13293836761 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -23,6 +23,12 @@ enum { struct perf_event_attr; +struct perf_pmu_caps { + char *name; + char *value; + struct list_head list; +}; + struct perf_pmu { char *name; char *id; @@ -36,6 +42,7 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; }; @@ -106,4 +113,6 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +int perf_pmu__caps_parse(struct perf_pmu *pmu); + #endif /* __PMU_H */ -- Gitee From 4a26e5b9ebd79d7c55bc366abd8eb9de29b930fe Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Mar 2021 13:11:38 +0800 Subject: [PATCH 58/77] perf pmu: Validate raw event with sysfs exported format bits commit e40647762fb5881360874e08e03e972d58d63c42 upstream A raw PMU event (eventsel+umask) in the form of rNNN is supported by perf but lacks of checking for the validity of raw encoding. For example, bit 16 and bit 17 are not valid on KBL but perf doesn't report warning when encoding with these bits. Before: # ./perf stat -e cpu/r031234/ -a -- sleep 1 Performance counter stats for 'system wide': 0 cpu/r031234/ 1.003798924 seconds time elapsed It may silently measure the wrong event! The kernel supported bits have been exported through /sys/devices//format/. Perf collects the information to 'struct perf_pmu_format' and links it to 'pmu->format' list. The 'struct perf_pmu_format' has a bitmap which records the valid bits for this format. For example, root@kbl-ppc:/sys/devices/cpu/format# cat umask config:8-15 The valid bits (bit8-bit15) are recorded in bitmap of format 'umask'. We collect total valid bits of all formats, save to a local variable 'masks' and reverse it. Now '~masks' represents total invalid bits. bits = config & ~masks; The set bits in 'bits' indicate the invalid bits used in config. Finally we use bitmap_scnprintf to report the invalid bits. Some architectures may not export supported bits through sysfs, so if masks is 0, perf_pmu__warn_invalid_config directly returns. After: Single event without name: # ./perf stat -e cpu/r031234/ -a -- sleep 1 WARNING: event 'N/A' not valid (bits 16-17 of config '31234' not supported by kernel)! Performance counter stats for 'system wide': 0 cpu/r031234/ 1.001597373 seconds time elapsed Multiple events with names: # ./perf stat -e cpu/rf01234,name=aaa/,cpu/r031234,name=bbb/ -a -- sleep 1 WARNING: event 'aaa' not valid (bits 20,22 of config 'f01234' not supported by kernel)! WARNING: event 'bbb' not valid (bits 16-17 of config '31234' not supported by kernel)! Performance counter stats for 'system wide': 0 aaa 0 bbb 1.001573787 seconds time elapsed Warnings are reported for invalid bits. Co-developed-by: Jiri Olsa Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210310051138.12154-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/parse-events.c | 3 +++ tools/perf/util/pmu.c | 33 +++++++++++++++++++++++++++++++++ tools/perf/util/pmu.h | 3 +++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 378700519613..27fd5501e053 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -342,6 +342,9 @@ __add_event(struct list_head *list, int *idx, cpu_list ? perf_cpu_map__new(cpu_list) : NULL; event_attr_init(attr); + if (pmu && attr->type == PERF_TYPE_RAW) + perf_pmu__warn_invalid_config(pmu, attr->config, name); + evsel = evsel__new_idx(attr, *idx); if (!evsel) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a68b79de5268..209c493bb325 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1694,3 +1694,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) return nr_caps; } + +void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + char *name) +{ + struct perf_pmu_format *format; + __u64 masks = 0, bits; + char buf[100]; + unsigned int i; + + list_for_each_entry(format, &pmu->format, list) { + if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG) + continue; + + for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS) + masks |= 1ULL << i; + } + + /* + * Kernel doesn't export any valid format bits. + */ + if (masks == 0) + return; + + bits = config & ~masks; + if (bits == 0) + return; + + bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf)); + + pr_warning("WARNING: event '%s' not valid (bits %s of config " + "'%llx' not supported by kernel)!\n", + name ?: "N/A", buf, config); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d13293836761..4866cddc6fcb 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -115,4 +115,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); +void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, + char *name); + #endif /* __PMU_H */ -- Gitee From d78abbdd6f4e8ef732fc54814eba5fcc70814fa7 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:19 +0800 Subject: [PATCH 59/77] perf pmu: Add hybrid helper functions commit c5a26ea490a16798d973e6fa352c6b8375646bc4 upstream The functions perf_pmu__is_hybrid and perf_pmu__find_hybrid_pmu can be used to identify the hybrid platform and return the found hybrid cpu pmu. All the detected hybrid pmus have been saved in 'perf_pmu__hybrid_pmus' list. So we just need to search this list. perf_pmu__hybrid_type_to_pmu converts the user specified string to hybrid pmu name. This is used to support the '--cputype' option in next patches. perf_pmu__has_hybrid checks the existing of hybrid pmu. Note that, we have to define it in pmu.c (make pmu-hybrid.c no more symbol dependency), otherwise perf test python would be failed. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-7-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- tools/perf/util/pmu-hybrid.c | 40 ++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu-hybrid.h | 4 ++++ tools/perf/util/pmu.c | 11 ++++++++++ tools/perf/util/pmu.h | 2 ++ 4 files changed, 57 insertions(+) diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c index 8ed0e6e1776d..f51ccaac60ee 100644 --- a/tools/perf/util/pmu-hybrid.c +++ b/tools/perf/util/pmu-hybrid.c @@ -47,3 +47,43 @@ bool perf_pmu__hybrid_mounted(const char *name) return true; } + +struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) +{ + struct perf_pmu *pmu; + + if (!name) + return NULL; + + perf_pmu__for_each_hybrid_pmu(pmu) { + if (!strcmp(name, pmu->name)) + return pmu; + } + + return NULL; +} + +bool perf_pmu__is_hybrid(const char *name) +{ + return perf_pmu__find_hybrid_pmu(name) != NULL; +} + +char *perf_pmu__hybrid_type_to_pmu(const char *type) +{ + char *pmu_name = NULL; + + if (asprintf(&pmu_name, "cpu_%s", type) < 0) + return NULL; + + if (perf_pmu__is_hybrid(pmu_name)) + return pmu_name; + + /* + * pmu may be not scanned, check the sysfs. + */ + if (perf_pmu__hybrid_mounted(pmu_name)) + return pmu_name; + + free(pmu_name); + return NULL; +} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h index 35bed3714438..d0fa7bc50a76 100644 --- a/tools/perf/util/pmu-hybrid.h +++ b/tools/perf/util/pmu-hybrid.h @@ -15,4 +15,8 @@ extern struct list_head perf_pmu__hybrid_pmus; bool perf_pmu__hybrid_mounted(const char *name); +struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); +bool perf_pmu__is_hybrid(const char *name); +char *perf_pmu__hybrid_type_to_pmu(const char *type); + #endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 209c493bb325..c0f8bdb65ccc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -39,6 +39,7 @@ int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; static LIST_HEAD(pmus); +static bool hybrid_scanned; /* * Parse & process all the sysfs attributes located under @@ -1727,3 +1728,13 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, "'%llx' not supported by kernel)!\n", name ?: "N/A", buf, config); } + +bool perf_pmu__has_hybrid(void) +{ + if (!hybrid_scanned) { + hybrid_scanned = true; + perf_pmu__scan(NULL); + } + + return !list_empty(&perf_pmu__hybrid_pmus); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4866cddc6fcb..57c7bf476350 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -118,4 +118,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, char *name); +bool perf_pmu__has_hybrid(void); + #endif /* __PMU_H */ -- Gitee From 43735c0d9b16c05bf683cacda60dfaff85d637a7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 28 Jan 2021 14:40:11 -0800 Subject: [PATCH 60/77] perf/x86/intel: Support CPUID 10.ECX to disable fixed counters commit 32451614da2a9cf4296f90d3606ac77814fb519d upstream With Architectural Performance Monitoring Version 5, CPUID 10.ECX cpu leaf indicates the fixed counter enumeration. This extends the previous count to a bitmap which allows disabling even lower fixed counters. It could be used by a Hypervisor. The existing intel_ctrl variable is used to remember the bitmask of the counters. All code that reads all counters is fixed to check this extra bitmask. Suggested-by: Peter Zijlstra (Intel) Originally-by: Andi Kleen Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1611873611-156687-6-git-send-email-kan.liang@linux.intel.com Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/events/core.c | 8 +++++++- arch/x86/events/intel/core.c | 34 ++++++++++++++++++++++++---------- arch/x86/events/perf_event.h | 5 +++++ 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index aba34228231d..d36dc994a8e0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -223,6 +223,8 @@ static bool check_hw_exists(void) if (ret) goto msr_fail; for (i = 0; i < x86_pmu.num_counters_fixed; i++) { + if (fixed_counter_disabled(i)) + continue; if (val & (0x03 << i*4)) { bios_fail = 1; val_fail = val; @@ -1505,6 +1507,8 @@ void perf_event_print_debug(void) cpu, idx, prev_left); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx)) + continue; rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1947,7 +1951,9 @@ static int __init init_hw_perf_events(void) pr_info("... generic registers: %d\n", x86_pmu.num_counters); pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); + pr_info("... fixed-purpose events: %lu\n", + hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1) + << INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl)); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); /* diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 091420463569..7a0cb583ddbe 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2554,8 +2554,11 @@ static void intel_pmu_reset(void) wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { + if (fixed_counter_disabled(idx)) + continue; wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } if (ds) ds->bts_index = ds->bts_buffer_base; @@ -4907,7 +4910,7 @@ __init int intel_pmu_init(void) union cpuid10_eax eax; union cpuid10_ebx ebx; struct event_constraint *c; - unsigned int unused; + unsigned int fixed_mask; struct extra_reg *er; bool pmem = false; int version, i; @@ -4929,7 +4932,7 @@ __init int intel_pmu_init(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired hw_event or not. */ - cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full); if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) return -ENODEV; @@ -4953,12 +4956,15 @@ __init int intel_pmu_init(void) * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) { + if (version > 1 && version < 5) { int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, assume); - } + + fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1; + } else if (version >= 5) + x86_pmu.num_counters_fixed = fls(fixed_mask); if (version >= 4) x86_pmu.counter_freezing = !disable_counter_freezing; @@ -5504,8 +5510,7 @@ __init int intel_pmu_init(void) x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; } - x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + x86_pmu.intel_ctrl |= (u64)fixed_mask << INTEL_PMC_IDX_FIXED; if (x86_pmu.event_constraints) { /* @@ -5518,13 +5523,22 @@ __init int intel_pmu_init(void) * events to the generic counters. */ if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) { + /* + * Disable topdown slots and metrics events, + * if slots event is not in CPUID. + */ + if (!(INTEL_PMC_MSK_FIXED_SLOTS & x86_pmu.intel_ctrl)) + c->idxmsk64 = 0; c->weight = hweight64(c->idxmsk64); continue; } - if (c->cmask == FIXED_EVENT_FLAGS - && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + if (c->cmask == FIXED_EVENT_FLAGS) { + /* Disabled fixed counters which are not in CPUID */ + c->idxmsk64 &= x86_pmu.intel_ctrl; + + if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } c->idxmsk64 &= ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9a377bd754f9..109fa8d11a5d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1038,6 +1038,11 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +static inline bool fixed_counter_disabled(int i) +{ + return !(x86_pmu.intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); -- Gitee From 4d1344229ed3dd2f445cb0989caa742a23c55861 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 12 Apr 2021 07:30:55 -0700 Subject: [PATCH 61/77] perf/x86: Factor out x86_pmu_show_pmu_cap commit e11c1a7eb302ac8f6f47c18fa662546405a5fd83 upstream The PMU capabilities are different among hybrid PMUs. Perf should dump the PMU capabilities information for each hybrid PMU. Factor out x86_pmu_show_pmu_cap() which shows the PMU capabilities information. The function will be reused later when registering a dedicated hybrid PMU. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/1618237865-33448-16-git-send-email-kan.liang@linux.intel.com Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/events/core.c | 25 ++++++++++++++++--------- arch/x86/events/perf_event.h | 3 +++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d36dc994a8e0..0388280b0004 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1890,6 +1890,20 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) static struct attribute_group x86_pmu_attr_group; static struct attribute_group x86_pmu_caps_group; +void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, + u64 intel_ctrl) +{ + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.cntval_bits); + pr_info("... generic registers: %d\n", num_counters); + pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %lu\n", + hweight64((((1ULL << num_counters_fixed) - 1) + << INTEL_PMC_IDX_FIXED) & intel_ctrl)); + pr_info("... event mask: %016Lx\n", intel_ctrl); +} + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1946,15 +1960,8 @@ static int __init init_hw_perf_events(void) pmu.attr_update = x86_pmu.attr_update; - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.cntval_bits); - pr_info("... generic registers: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %lu\n", - hweight64((((1ULL << x86_pmu.num_counters_fixed) - 1) - << INTEL_PMC_IDX_FIXED) & x86_pmu.intel_ctrl)); - pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + x86_pmu_show_pmu_cap(x86_pmu.num_counters, x86_pmu.num_counters_fixed, + x86_pmu.intel_ctrl); /* * Install callbacks. Core will call them for each online diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 109fa8d11a5d..ba5d948e8979 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -997,6 +997,9 @@ void x86_pmu_enable_event(struct perf_event *event); int x86_pmu_handle_irq(struct pt_regs *regs); +void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, + u64 intel_ctrl); + extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; -- Gitee From 4357708197d46cd08231657aa654bc9f61190648 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 3 Jul 2020 05:49:21 -0700 Subject: [PATCH 62/77] perf/core: Factor out functions to allocate/free the task_ctx_data commit ff9ff926889dd8026b4ba55266a010c27f68604f upstream The method to allocate/free the task_ctx_data is going to be changed in the following patch. Currently, the task_ctx_data is allocated/freed in several different places. To avoid repeatedly modifying the same codes in several different places, alloc_task_ctx_data() and free_task_ctx_data() are factored out to allocate/free the task_ctx_data. The modification only needs to be applied once. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1593780569-62993-16-git-send-email-kan.liang@linux.intel.com Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- kernel/events/core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 418cbf41c7fd..634f56700e7f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1183,12 +1183,22 @@ static void get_ctx(struct perf_event_context *ctx) refcount_inc(&ctx->refcount); } +static void *alloc_task_ctx_data(struct pmu *pmu) +{ + return kzalloc(pmu->task_ctx_size, GFP_KERNEL); +} + +static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data) +{ + kfree(task_ctx_data); +} + static void free_ctx(struct rcu_head *head) { struct perf_event_context *ctx; ctx = container_of(head, struct perf_event_context, rcu_head); - kfree(ctx->task_ctx_data); + free_task_ctx_data(ctx->pmu, ctx->task_ctx_data); kfree(ctx); } @@ -4316,7 +4326,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, goto errout; if (event->attach_state & PERF_ATTACH_TASK_DATA) { - task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL); + task_ctx_data = alloc_task_ctx_data(pmu); if (!task_ctx_data) { err = -ENOMEM; goto errout; @@ -4374,11 +4384,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, } } - kfree(task_ctx_data); + free_task_ctx_data(pmu, task_ctx_data); return ctx; errout: - kfree(task_ctx_data); + free_task_ctx_data(pmu, task_ctx_data); return ERR_PTR(err); } @@ -12315,8 +12325,7 @@ inherit_event(struct perf_event *parent_event, !child_ctx->task_ctx_data) { struct pmu *pmu = child_event->pmu; - child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size, - GFP_KERNEL); + child_ctx->task_ctx_data = alloc_task_ctx_data(pmu); if (!child_ctx->task_ctx_data) { free_event(child_event); return ERR_PTR(-ENOMEM); -- Gitee From a082318b16710b78a0ad0335a4b13ffd0932b9b9 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 3 Jul 2020 05:49:22 -0700 Subject: [PATCH 63/77] perf/core: Use kmem_cache to allocate the PMU specific data commit 217c2a633ebb36f1cc6d249f4ef2e4a809d46818 upstream Currently, the PMU specific data task_ctx_data is allocated by the function kzalloc() in the perf generic code. When there is no specific alignment requirement for the task_ctx_data, the method works well for now. However, there will be a problem once a specific alignment requirement is introduced in future features, e.g., the Architecture LBR XSAVE feature requires 64-byte alignment. If the specific alignment requirement is not fulfilled, the XSAVE family of instructions will fail to save/restore the xstate to/from the task_ctx_data. The function kzalloc() itself only guarantees a natural alignment. A new method to allocate the task_ctx_data has to be introduced, which has to meet the requirements as below: - must be a generic method can be used by different architectures, because the allocation of the task_ctx_data is implemented in the perf generic code; - must be an alignment-guarantee method (The alignment requirement is not changed after the boot); - must be able to allocate/free a buffer (smaller than a page size) dynamically; - should not cause extra CPU overhead or space overhead. Several options were considered as below: - One option is to allocate a larger buffer for task_ctx_data. E.g., ptr = kmalloc(size + alignment, GFP_KERNEL); ptr &= ~(alignment - 1); This option causes space overhead. - Another option is to allocate the task_ctx_data in the PMU specific code. To do so, several function pointers have to be added. As a result, both the generic structure and the PMU specific structure will become bigger. Besides, extra function calls are added when allocating/freeing the buffer. This option will increase both the space overhead and CPU overhead. - The third option is to use a kmem_cache to allocate a buffer for the task_ctx_data. The kmem_cache can be created with a specific alignment requirement by the PMU at boot time. A new pointer for kmem_cache has to be added in the generic struct pmu, which would be used to dynamically allocate a buffer for the task_ctx_data at run time. Although the new pointer is added to the struct pmu, the existing variable task_ctx_size is not required anymore. The size of the generic structure is kept the same. The third option which meets all the aforementioned requirements is used to replace kzalloc() for the PMU specific data allocation. A later patch will remove the kzalloc() method and the related variables. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1593780569-62993-17-git-send-email-kan.liang@linux.intel.com Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2aa48df4e89f..7f6f174f481d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -441,6 +441,11 @@ struct pmu { */ size_t task_ctx_size; + /* + * Kmem cache of PMU specific data + */ + struct kmem_cache *task_ctx_cache; + /* * Set up pmu-private data structures for an AUX area diff --git a/kernel/events/core.c b/kernel/events/core.c index 634f56700e7f..a3270243c917 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1185,12 +1185,18 @@ static void get_ctx(struct perf_event_context *ctx) static void *alloc_task_ctx_data(struct pmu *pmu) { + if (pmu->task_ctx_cache) + return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL); + return kzalloc(pmu->task_ctx_size, GFP_KERNEL); } static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data) { - kfree(task_ctx_data); + if (pmu->task_ctx_cache && task_ctx_data) + kmem_cache_free(pmu->task_ctx_cache, task_ctx_data); + else + kfree(task_ctx_data); } static void free_ctx(struct rcu_head *head) -- Gitee From fc06a82903c9d832a1ea3c0a2d24b29a7a7075c7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 3 Jul 2020 05:49:23 -0700 Subject: [PATCH 64/77] perf/x86/intel/lbr: Create kmem_cache for the LBR context data commit 33cad284497cf40f55ad6029c06011de3538ebed upstream A new kmem_cache method is introduced to allocate the PMU specific data task_ctx_data, which requires the PMU specific code to create a kmem_cache. Currently, the task_ctx_data is only used by the Intel LBR call stack feature, which is introduced since Haswell. The kmem_cache should be only created for Haswell and later platforms. There is no alignment requirement for the existing platforms. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1593780569-62993-18-git-send-email-kan.liang@linux.intel.com Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- arch/x86/events/intel/lbr.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index e01c48eaaf6b..aaf3852d26da 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1529,9 +1529,17 @@ void __init intel_pmu_lbr_init_snb(void) */ } +static inline struct kmem_cache * +create_lbr_kmem_cache(size_t size, size_t align) +{ + return kmem_cache_create("x86_lbr", size, align, 0, NULL); +} + /* haswell */ void intel_pmu_lbr_init_hsw(void) { + size_t size = sizeof(struct x86_perf_task_context); + x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = MSR_LBR_TOS; x86_pmu.lbr_from = MSR_LBR_NHM_FROM; @@ -1540,6 +1548,8 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + if (lbr_from_signext_quirk_needed()) static_branch_enable(&lbr_from_quirk_key); } @@ -1547,6 +1557,8 @@ void intel_pmu_lbr_init_hsw(void) /* skylake */ __init void intel_pmu_lbr_init_skl(void) { + size_t size = sizeof(struct x86_perf_task_context); + x86_pmu.lbr_nr = 32; x86_pmu.lbr_tos = MSR_LBR_TOS; x86_pmu.lbr_from = MSR_LBR_NHM_FROM; @@ -1556,6 +1568,8 @@ __init void intel_pmu_lbr_init_skl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + /* * SW branch filter usage: * - support syscall, sysret capture. @@ -1629,6 +1643,7 @@ void __init intel_pmu_arch_lbr_init(void) union cpuid28_ebx ebx; union cpuid28_ecx ecx; unsigned int unused_edx; + size_t size; u64 lbr_nr; /* Arch LBR Capabilities */ @@ -1653,8 +1668,10 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_br_type = ecx.split.lbr_br_type; x86_pmu.lbr_nr = lbr_nr; - x86_get_pmu()->task_ctx_size = sizeof(struct x86_perf_task_context_arch_lbr) + - lbr_nr * sizeof(struct lbr_entry); + size = sizeof(struct x86_perf_task_context_arch_lbr) + + lbr_nr * sizeof(struct lbr_entry); + x86_get_pmu()->task_ctx_size = size; + x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; -- Gitee From d63d8da6f913721eae87d02eea3fc5554bbc2718 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:14 +0800 Subject: [PATCH 65/77] tools headers uapi: Update tools's copy of linux/perf_event.h commit 412736119116d0161688e9061485fbc3e25f78d5 upstream To get the changes in: Liang Kan's patch 55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE") Kan's patch is in the tip/perf/core branch. So the next perf tool patches need this interface for hybrid support. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/include/uapi/linux/perf_event.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index d4d99f945d13..0eeab4d13bad 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -37,6 +37,21 @@ enum perf_type_id { PERF_TYPE_MAX, /* non-ABI */ }; +/* + * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA + * AA: hardware event ID + * EEEEEEEE: PMU type ID + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB + * BB: hardware cache ID + * CC: hardware cache op ID + * DD: hardware cache op result ID + * EEEEEEEE: PMU type ID + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + */ +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff + /* * Generalized performance event event_id types, used by the * attr.event_id parameter of the sys_perf_event_open() -- Gitee From 8cc907560a26d72a39b7759310366baff97ff992 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:26 +0800 Subject: [PATCH 66/77] perf record: Create two hybrid 'cycles' events by default commit b53a0755d5c2d19b13db897d6faf4969e03e45ae upstream When evlist is empty, for example no '-e' specified in perf record, one default 'cycles' event is added to evlist. While on hybrid platform, it needs to create two default 'cycles' events. One is for cpu_core, the other is for cpu_atom. This patch actually calls evsel__new_cycles() two times to create two 'cycles' events. # ./perf record -vv -a -- sleep 1 ... ------------------------------------------------------------ perf_event_attr: size 120 config 0x400000000 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 freq 1 precise_ip 3 sample_id_all 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 5 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 6 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 = 7 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 = 9 sys_perf_event_open: pid -1 cpu 4 group_fd -1 flags 0x8 = 10 sys_perf_event_open: pid -1 cpu 5 group_fd -1 flags 0x8 = 11 sys_perf_event_open: pid -1 cpu 6 group_fd -1 flags 0x8 = 12 sys_perf_event_open: pid -1 cpu 7 group_fd -1 flags 0x8 = 13 sys_perf_event_open: pid -1 cpu 8 group_fd -1 flags 0x8 = 14 sys_perf_event_open: pid -1 cpu 9 group_fd -1 flags 0x8 = 15 sys_perf_event_open: pid -1 cpu 10 group_fd -1 flags 0x8 = 16 sys_perf_event_open: pid -1 cpu 11 group_fd -1 flags 0x8 = 17 sys_perf_event_open: pid -1 cpu 12 group_fd -1 flags 0x8 = 18 sys_perf_event_open: pid -1 cpu 13 group_fd -1 flags 0x8 = 19 sys_perf_event_open: pid -1 cpu 14 group_fd -1 flags 0x8 = 20 sys_perf_event_open: pid -1 cpu 15 group_fd -1 flags 0x8 = 21 ------------------------------------------------------------ perf_event_attr: size 120 config 0x800000000 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 freq 1 precise_ip 3 sample_id_all 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 16 group_fd -1 flags 0x8 = 22 sys_perf_event_open: pid -1 cpu 17 group_fd -1 flags 0x8 = 23 sys_perf_event_open: pid -1 cpu 18 group_fd -1 flags 0x8 = 24 sys_perf_event_open: pid -1 cpu 19 group_fd -1 flags 0x8 = 25 sys_perf_event_open: pid -1 cpu 20 group_fd -1 flags 0x8 = 26 sys_perf_event_open: pid -1 cpu 21 group_fd -1 flags 0x8 = 27 sys_perf_event_open: pid -1 cpu 22 group_fd -1 flags 0x8 = 28 sys_perf_event_open: pid -1 cpu 23 group_fd -1 flags 0x8 = 29 ------------------------------------------------------------ We have to create evlist-hybrid.c otherwise due to the symbol dependency the perf test python would be failed. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-14-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 19 +++++++++++---- tools/perf/util/Build | 1 + tools/perf/util/evlist-hybrid.c | 41 +++++++++++++++++++++++++++++++++ tools/perf/util/evlist-hybrid.h | 12 ++++++++++ tools/perf/util/evlist.c | 5 +++- tools/perf/util/evsel.c | 6 ++--- tools/perf/util/evsel.h | 2 +- 7 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/evlist-hybrid.c create mode 100644 tools/perf/util/evlist-hybrid.h diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 047b30ca33b4..1c73eb666f51 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,6 +44,8 @@ #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" +#include "util/pmu-hybrid.h" +#include "util/evlist-hybrid.h" #include "asm/bug.h" #include "perf.h" @@ -2465,10 +2467,19 @@ int cmd_record(int argc, const char **argv) if (record.opts.overwrite) record.opts.tail_synthesize = true; - if (rec->evlist->core.nr_entries == 0 && - __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { - pr_err("Not enough memory for event selector list\n"); - goto out; + if (rec->evlist->core.nr_entries == 0) { + if (perf_pmu__has_hybrid()) { + err = evlist__add_default_hybrid(rec->evlist, + !record.opts.no_samples); + } else { + err = __evlist__add_default(rec->evlist, + !record.opts.no_samples); + } + + if (err < 0) { + pr_err("Not enough memory for event selector list\n"); + goto out; + } } if (rec->opts.target.tid && !rec->opts.no_inherit_set) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 90c68a6aed1d..8ed7fc0b91a8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -9,6 +9,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += evlist-hybrid.o perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c new file mode 100644 index 000000000000..e11998526f2e --- /dev/null +++ b/tools/perf/util/evlist-hybrid.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include "cpumap.h" +#include "evlist.h" +#include "evsel.h" +#include "../perf.h" +#include "util/pmu-hybrid.h" +#include "util/evlist-hybrid.h" +#include +#include +#include +#include +#include +#include +#include + +int evlist__add_default_hybrid(struct evlist *evlist, bool precise) +{ + struct evsel *evsel; + struct perf_pmu *pmu; + __u64 config; + struct perf_cpu_map *cpus; + + perf_pmu__for_each_hybrid_pmu(pmu) { + config = PERF_COUNT_HW_CPU_CYCLES | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT); + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, + config); + if (!evsel) + return -ENOMEM; + + cpus = perf_cpu_map__get(pmu->cpus); + evsel->core.cpus = cpus; + evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->pmu_name = strdup(pmu->name); + evlist__add(evlist, evsel); + } + + return 0; +} diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h new file mode 100644 index 000000000000..e25861649d8f --- /dev/null +++ b/tools/perf/util/evlist-hybrid.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_EVLIST_HYBRID_H +#define __PERF_EVLIST_HYBRID_H + +#include +#include +#include "evlist.h" +#include + +int evlist__add_default_hybrid(struct evlist *evlist, bool precise); + +#endif /* __PERF_EVLIST_HYBRID_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bda41564d406..7443238d0ca7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -23,6 +23,7 @@ #include "bpf-event.h" #include "util/string2.h" #include "util/perf_api_probe.h" +#include "util/evlist-hybrid.h" #include #include #include @@ -213,8 +214,10 @@ void perf_evlist__set_leader(struct evlist *evlist) int __evlist__add_default(struct evlist *evlist, bool precise) { - struct evsel *evsel = evsel__new_cycles(precise); + struct evsel *evsel; + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES); if (evsel == NULL) return -ENOMEM; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdd8738e9cdf..3bf71f2b2ba2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -292,11 +292,11 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise) +struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) { struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, + .type = type, + .config = config, .exclude_kernel = !perf_event_can_profile_kernel(), }; struct evsel *evsel; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4aa78039cef7..fed1ef2a9fb2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -177,7 +177,7 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name) return evsel__newtp_idx(sys, name, 0); } -struct evsel *evsel__new_cycles(bool precise); +struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); struct tep_event *event_format__new(const char *sys, const char *name); -- Gitee From 70065d568b42c49145500b8a69cfe7b3e62770f8 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 2 Nov 2021 11:01:12 +0530 Subject: [PATCH 67/77] perf evsel: Don't set exclude_guest by default commit eb39bf325631f9ae185abd16281079b7b9858737 upstream Perf tool sets exclude_guest by default while calling perf_event_open(). Because IBS does not have filtering capability, it always gets rejected by IBS PMU driver and thus perf falls back to non-precise sampling. Fix it by not setting exclude_guest by default on AMD. Before: $ sudo ./perf record -C 0 -vvv true |& grep precise precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 decreasing precise_ip by one (1) precise_ip 1 decreasing precise_ip by one (0) After: $ sudo ./perf record -C 0 -vvv true |& grep precise precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 Committer notes: Fixup init to zero for perf_env in older compilers: arch/x86/util/evsel.c:15:26: error: missing field 'os_release' initializer [-Werror,-Wmissing-field-initializers] struct perf_env env = {0}; ^ Committer notes: Namhyung remarked: It'd be nice if it can cover explicit "-e cycles:pp" as well. Ravi clarified: For explicit :pp modifier, evsel->precise_max does not get set and thus perf does not try with different attr->precise_ip values while exclude_guest set. So no issue with explicit :pp: $ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest" precise_ip 2 exclude_guest 1 precise_ip 2 exclude_guest 1 switching off exclude_guest, exclude_host precise_ip 2 ^C Also, with :P modifier, evsel->precise_max gets set but exclude_guest does not and thus :P also works fine: $ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest" precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 ^C Reported-by: Kim Phillips Signed-off-by: Ravi Bangoria Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211103072112.32312-1-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/arch/x86/util/evsel.c | 23 +++++++++++++++++++++++ tools/perf/util/evsel.c | 12 +++++++----- tools/perf/util/evsel.h | 1 + 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 2f733cdc8dbb..ac2899a25b7a 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -1,8 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "util/evsel.h" +#include "util/env.h" +#include "linux/string.h" void arch_evsel__set_sample_weight(struct evsel *evsel) { evsel__set_sample_bit(evsel, WEIGHT_STRUCT); } + +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) +{ + struct perf_env env = { .total_mem = 0, } ; + + if (!perf_env__cpuid(&env)) + return; + + /* + * On AMD, precise cycles event sampling internally uses IBS pmu. + * But IBS does not have filtering capabilities and perf by default + * sets exclude_guest = 1. This makes IBS pmu event init fail and + * thus perf ends up doing non-precise sampling. Avoid it by clearing + * exclude_guest. + */ + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + attr->exclude_guest = 0; + + free(env.cpuid); +} diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3bf71f2b2ba2..abdf91ca843d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) +struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) { struct perf_event_attr attr = { .type = type, @@ -303,18 +303,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) event_attr_init(&attr); - if (!precise) - goto new_event; - /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. */ -new_event: evsel = evsel__new(&attr); if (evsel == NULL) goto out; + arch_evsel__fixup_new_cycles(&evsel->core.attr); + evsel->precise_max = true; /* use asprintf() because free(evsel) assumes name is allocated */ @@ -930,6 +928,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } +void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) +{ +} + /* * The enable_on_exec/disabled value strategy: * diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index fed1ef2a9fb2..97848fd39b67 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -232,6 +232,7 @@ int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter); void arch_evsel__set_sample_weight(struct evsel *evsel); +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -- Gitee From aaf10c0d52712cc3123b5103f8d964c1186e1530 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:18 -0800 Subject: [PATCH 68/77] perf stat: Factor out open error handling commit e0e6a6ca3ac211cc07486330a2b89f41ea31b4dd upstream Factor out the open error handling into a separate function. This is useful for followon patches who need to duplicate this. No behavior change intended. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/builtin-stat.c | 100 +++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b5a82969dab4..60c3f47d569a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -420,6 +420,57 @@ static bool is_target_alive(struct target *_target, return false; } +enum counter_recovery { + COUNTER_SKIP, + COUNTER_RETRY, + COUNTER_FATAL, +}; + +static enum counter_recovery stat_handle_error(struct evsel *counter) +{ + char msg[BUFSIZ]; + /* + * PPC returns ENXIO for HW counters until 2.6.37 + * (behavior changed with commit b0a873e). + */ + if (errno == EINVAL || errno == ENOSYS || + errno == ENOENT || errno == EOPNOTSUPP || + errno == ENXIO) { + if (verbose > 0) + ui__warning("%s event is not supported by the kernel.\n", + perf_evsel__name(counter)); + counter->supported = false; + + if ((counter->leader != counter) || + !(counter->leader->core.nr_members > 1)) + return COUNTER_SKIP; + } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (verbose > 0) + ui__warning("%s\n", msg); + return COUNTER_RETRY; + } else if (target__has_per_thread(&target) && + evsel_list->core.threads && + evsel_list->core.threads->err_thread != -1) { + /* + * For global --per-thread case, skip current + * error thread. + */ + if (!thread_map__remove(evsel_list->core.threads, + evsel_list->core.threads->err_thread)) { + evsel_list->core.threads->err_thread = -1; + return COUNTER_RETRY; + } + } + + perf_evsel__open_strerror(counter, &target, + errno, msg, sizeof(msg)); + ui__error("%s\n", msg); + + if (child_pid != -1) + kill(child_pid, SIGTERM); + return COUNTER_FATAL; +} + static int __run_perf_stat(int argc, const char **argv, int run_idx) { int interval = stat_config.interval; @@ -469,47 +520,16 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) goto try_again; } - /* - * PPC returns ENXIO for HW counters until 2.6.37 - * (behavior changed with commit b0a873e). - */ - if (errno == EINVAL || errno == ENOSYS || - errno == ENOENT || errno == EOPNOTSUPP || - errno == ENXIO) { - if (verbose > 0) - ui__warning("%s event is not supported by the kernel.\n", - perf_evsel__name(counter)); - counter->supported = false; - - if ((counter->leader != counter) || - !(counter->leader->core.nr_members > 1)) - continue; - } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { - if (verbose > 0) - ui__warning("%s\n", msg); - goto try_again; - } else if (target__has_per_thread(&target) && - evsel_list->core.threads && - evsel_list->core.threads->err_thread != -1) { - /* - * For global --per-thread case, skip current - * error thread. - */ - if (!thread_map__remove(evsel_list->core.threads, - evsel_list->core.threads->err_thread)) { - evsel_list->core.threads->err_thread = -1; - goto try_again; - } + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: + continue; + default: + break; } - - perf_evsel__open_strerror(counter, &target, - errno, msg, sizeof(msg)); - ui__error("%s\n", msg); - - if (child_pid != -1) - kill(child_pid, SIGTERM); - - return -1; } counter->supported = true; -- Gitee From 6b7e47025e83c98b82a3ebd5c4151db792f61e04 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:12 -0800 Subject: [PATCH 69/77] perf affinity: Add infrastructure to save/restore affinity commit 267ed5d8593cedd6146eabe00d270629c9cff771 upstream The kernel perf subsystem has to IPI to the target CPU for many operations. On systems with many CPUs and when managing many events the overhead can be dominated by lots of IPIs. An alternative is to set up CPU affinity in the perf tool, then set up all the events for that CPU, and then move on to the next CPU. Add some affinity management infrastructure to enable such a model. Used in followon patches. Committer notes: Use zfree() in some places, add missing stdbool.h header, some minor coding style changes. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/util/Build | 1 + tools/perf/util/affinity.c | 73 ++++++++++++++++++++++++++++++ tools/perf/util/affinity.h | 17 +++++++ tools/perf/util/python-ext-sources | 1 + 4 files changed, 92 insertions(+) create mode 100644 tools/perf/util/affinity.c create mode 100644 tools/perf/util/affinity.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8ed7fc0b91a8..d9df8a2a83cd 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -79,6 +79,7 @@ perf-y += sort.o perf-y += hist.o perf-y += util.o perf-y += cpumap.o +perf-y += affinity.o perf-y += cputopo.o perf-y += cgroup.o perf-y += target.o diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c new file mode 100644 index 000000000000..a5e31f826828 --- /dev/null +++ b/tools/perf/util/affinity.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Manage affinity to optimize IPIs inside the kernel perf API. */ +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include "perf.h" +#include "cpumap.h" +#include "affinity.h" + +static int get_cpu_set_size(void) +{ + int sz = cpu__max_cpu() + 8 - 1; + /* + * sched_getaffinity doesn't like masks smaller than the kernel. + * Hopefully that's big enough. + */ + if (sz < 4096) + sz = 4096; + return sz / 8; +} + +int affinity__setup(struct affinity *a) +{ + int cpu_set_size = get_cpu_set_size(); + + a->orig_cpus = bitmap_alloc(cpu_set_size * 8); + if (!a->orig_cpus) + return -1; + sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus); + a->sched_cpus = bitmap_alloc(cpu_set_size * 8); + if (!a->sched_cpus) { + zfree(&a->orig_cpus); + return -1; + } + bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size); + a->changed = false; + return 0; +} + +/* + * perf_event_open does an IPI internally to the target CPU. + * It is more efficient to change perf's affinity to the target + * CPU and then set up all events on that CPU, so we amortize + * CPU communication. + */ +void affinity__set(struct affinity *a, int cpu) +{ + int cpu_set_size = get_cpu_set_size(); + + if (cpu == -1) + return; + a->changed = true; + set_bit(cpu, a->sched_cpus); + /* + * We ignore errors because affinity is just an optimization. + * This could happen for example with isolated CPUs or cpusets. + * In this case the IPIs inside the kernel's perf API still work. + */ + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus); + clear_bit(cpu, a->sched_cpus); +} + +void affinity__cleanup(struct affinity *a) +{ + int cpu_set_size = get_cpu_set_size(); + + if (a->changed) + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus); + zfree(&a->sched_cpus); + zfree(&a->orig_cpus); +} diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h new file mode 100644 index 000000000000..0ad6a18ef20c --- /dev/null +++ b/tools/perf/util/affinity.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef PERF_AFFINITY_H +#define PERF_AFFINITY_H 1 + +#include + +struct affinity { + unsigned long *orig_cpus; + unsigned long *sched_cpus; + bool changed; +}; + +void affinity__cleanup(struct affinity *a); +void affinity__set(struct affinity *a, int cpu); +int affinity__setup(struct affinity *a); + +#endif // PERF_AFFINITY_H diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 9af183860fbd..e7279ea6043a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -33,3 +33,4 @@ util/trace-event.c util/string.c util/symbol_fprintf.c util/units.c +util/affinity.c -- Gitee From 86b74265a7cf5774f04ddc82d810424623d70e6f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:15 -0800 Subject: [PATCH 70/77] perf evsel: Add iterator to iterate over events ordered by CPU commit a8cbe40fe9f4ba499cc60b8b6a6851c2c1963797 upstream Add some common code that is needed to iterate over all events in CPU order. Used in followon patches Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/util/cpumap.h | 1 + tools/perf/util/evlist.c | 32 ++++++++++++++++++++++++++++++++ tools/perf/util/evlist.h | 8 ++++++++ tools/perf/util/evsel.h | 1 + 4 files changed, 42 insertions(+) diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 2553bef1279d..dbc1d7e949ed 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -60,4 +60,5 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7443238d0ca7..d971c12b9653 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -324,6 +324,38 @@ static int perf_evlist__nr_threads(struct evlist *evlist, return perf_thread_map__nr(evlist->core.threads); } +void evlist__cpu_iter_start(struct evlist *evlist) +{ + struct evsel *pos; + + /* + * Reset the per evsel cpu_iter. This is needed because + * each evsel's cpumap may have a different index space, + * and some operations need the index to modify + * the FD xyarray (e.g. open, close) + */ + evlist__for_each_entry(evlist, pos) + pos->cpu_iter = 0; +} + +bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) +{ + if (ev->cpu_iter >= ev->core.cpus->nr) + return true; + if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) + return true; + return false; +} + +bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) +{ + if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) { + ev->cpu_iter++; + return false; + } + return true; +} + void evlist__disable(struct evlist *evlist) { struct evsel *pos; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e98644a65498..a7094df7b816 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -320,9 +320,17 @@ void perf_evlist__to_front(struct evlist *evlist, #define evlist__for_each_entry_safe(evlist, tmp, evsel) \ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel) +#define evlist__for_each_cpu(evlist, index, cpu) \ + evlist__cpu_iter_start(evlist); \ + perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) + void perf_evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); +void evlist__cpu_iter_start(struct evlist *evlist); +bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); +bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); + struct evsel * perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 97848fd39b67..ddb255bd837e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -95,6 +95,7 @@ struct evsel { bool collect_stat; bool weak_group; bool percore; + int cpu_iter; const char *pmu_name; struct { perf_evsel__sb_cb_t *cb; -- Gitee From 1a09d299b1055601be7a334caf1c15cdfd4601c4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:14 -0800 Subject: [PATCH 71/77] perf evlist: Maintain evlist->all_cpus commit a2408a70368ade9c99de27da78d49416313b8833 upstream Maintain a cpumap in the evlist that is the union of all the cpus of the events. This needs a cpumap merge operation, which is added together with tests. v2: Add tests for cpu map merge Fix handling of duplicates Rename _update to _merge Factor out sorting. Fix handling of NULL maps in merge v3: Add comments and empty lines to _merge Committer testing: # perf test "Merge cpu map" 52: Merge cpu map : Ok # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20191121001522.180827-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/lib/cpumap.c | 57 ++++++++++++++++++++++++ tools/perf/lib/evlist.c | 1 + tools/perf/lib/include/internal/evlist.h | 1 + tools/perf/lib/include/perf/cpumap.h | 2 + tools/perf/tests/builtin-test.c | 5 +++ tools/perf/tests/cpumap.c | 16 +++++++ tools/perf/tests/tests.h | 1 + 7 files changed, 83 insertions(+) diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index 2ca1fafa620d..5e1ae7a23591 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -272,3 +272,60 @@ int perf_cpu_map__max(struct perf_cpu_map *map) return max; } + +/* + * Merge two cpumaps + * + * orig either gets freed and replaced with a new map, or reused + * with no reference count change (similar to "realloc") + * other has its reference count increased. + */ + +struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + int *tmp_cpus; + int tmp_len; + int i, j, k; + struct perf_cpu_map *merged; + + if (!orig && !other) + return NULL; + if (!orig) { + perf_cpu_map__get(other); + return other; + } + if (!other) + return orig; + if (orig->nr == other->nr && + !memcmp(orig->map, other->map, orig->nr * sizeof(int))) + return orig; + + tmp_len = orig->nr + other->nr; + tmp_cpus = malloc(tmp_len * sizeof(int)); + if (!tmp_cpus) + return NULL; + + /* Standard merge algorithm from wikipedia */ + i = j = k = 0; + while (i < orig->nr && j < other->nr) { + if (orig->map[i] <= other->map[j]) { + if (orig->map[i] == other->map[j]) + j++; + tmp_cpus[k++] = orig->map[i++]; + } else + tmp_cpus[k++] = other->map[j++]; + } + + while (i < orig->nr) + tmp_cpus[k++] = orig->map[i++]; + + while (j < other->nr) + tmp_cpus[k++] = other->map[j++]; + assert(k <= tmp_len); + + merged = cpu_map__trim_new(k, tmp_cpus); + free(tmp_cpus); + perf_cpu_map__put(orig); + return merged; +} diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index d1496fee810c..b7e8b3b15f25 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -46,6 +46,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, perf_thread_map__put(evsel->threads); evsel->threads = perf_thread_map__get(evlist->threads); + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } static void perf_evlist__propagate_maps(struct perf_evlist *evlist) diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 9f440ab12b76..096bf0b05e04 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -17,6 +17,7 @@ struct perf_evlist { int nr_entries; bool has_user_cpus; struct perf_cpu_map *cpus; + struct perf_cpu_map *all_cpus; struct perf_thread_map *threads; int nr_mmaps; size_t mmap_len; diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h index ac9aa497f84a..6a17ad730cbc 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/perf/lib/include/perf/cpumap.h @@ -12,6 +12,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 55774baffc2a..d111d2ed1c10 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -259,6 +259,11 @@ static struct test generic_tests[] = { .desc = "Print cpu map", .func = test__cpu_map_print, }, + { + .desc = "Merge cpu map", + .func = test__cpu_map_merge, + }, + { .desc = "Probe SDT events", .func = test__sdt_event, diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 8a0d236202b0..4ac56741ac5f 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -120,3 +120,19 @@ int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_un TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40")); return 0; } + +int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + struct perf_cpu_map *a = perf_cpu_map__new("4,2,1"); + struct perf_cpu_map *b = perf_cpu_map__new("4,5,7"); + struct perf_cpu_map *c = perf_cpu_map__merge(a, b); + char buf[100]; + + TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5); + cpu_map__snprint(c, buf, sizeof(buf)); + TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7")); + perf_cpu_map__put(a); + perf_cpu_map__put(b); + perf_cpu_map__put(c); + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 72912eb473cb..27e3c65c6955 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -98,6 +98,7 @@ int test__event_update(struct test *test, int subtest); int test__event_times(struct test *test, int subtest); int test__backward_ring_buffer(struct test *test, int subtest); int test__cpu_map_print(struct test *test, int subtest); +int test__cpu_map_merge(struct test *test, int subtest); int test__sdt_event(struct test *test, int subtest); int test__is_printable_array(struct test *test, int subtest); int test__bitmap_print(struct test *test, int subtest); -- Gitee From 60c7518a83c39be56cfe2043968568a51522f0bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 20 Oct 2019 10:51:55 -0700 Subject: [PATCH 72/77] perf evsel: Avoid close(-1) commit 2ccfb8bc2143ca347609d1d4434176d73a78d805 upstream In some weak fallback cases close can be called a lot with -1. Check for this case and avoid calling close then. This is mainly to shut up valgrind which complains about this case. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20191020175202.32456-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/lib/evsel.c | 3 ++- tools/perf/util/evsel.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c index a8cb582e2721..5a89857b0381 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/perf/lib/evsel.c @@ -120,7 +120,8 @@ void perf_evsel__close_fd(struct perf_evsel *evsel) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - close(FD(evsel, cpu, thread)); + if (FD(evsel, cpu, thread) >= 0) + close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index abdf91ca843d..1470f6c328f1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1885,7 +1885,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, do { while (--thread >= 0) { - close(FD(evsel, cpu, thread)); + if (FD(evsel, cpu, thread) >= 0) + close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } thread = nthreads; -- Gitee From 4f91de354ecd788bbf225985946809a7956a7703 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:16 -0800 Subject: [PATCH 73/77] perf evsel: Add functions to close evsel on a CPU commit 99d6141d677a8cd0b35390a29527c8def42538b1 upstream Refactor the existing all CPU function to use the per CPU close internally. Export APIs to close per CPU. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/lib/evsel.c | 27 +++++++++++++++++++++------ tools/perf/lib/include/perf/evsel.h | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c index 5a89857b0381..ea775dacbd2d 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/perf/lib/evsel.c @@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, return err; } +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +{ + int thread; + + for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { + if (FD(evsel, cpu, thread) >= 0) + close(FD(evsel, cpu, thread)); + FD(evsel, cpu, thread) = -1; + } +} + void perf_evsel__close_fd(struct perf_evsel *evsel) { - int cpu, thread; + int cpu; for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; - } + perf_evsel__close_fd_cpu(evsel, cpu); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +{ + if (evsel->fd == NULL) + return; + + perf_evsel__close_fd_cpu(evsel, cpu); +} + int perf_evsel__read_size(struct perf_evsel *evsel) { u64 read_format = evsel->attr.read_format; diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h index 4388667f265c..ed10a914cd3f 100644 --- a/tools/perf/lib/include/perf/evsel.h +++ b/tools/perf/lib/include/perf/evsel.h @@ -28,6 +28,7 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -- Gitee From b27918c4f4a5499e30a4a4a5a83239088feb86fe Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 20 Nov 2019 16:15:19 -0800 Subject: [PATCH 74/77] perf stat: Use affinity for opening events commit 4804e0111662d7d89edf4e767a64c6f7e4778bb1 upstream Restructure the event opening in perf stat to cycle through the events by CPU after setting affinity to that CPU. This eliminates IPI overhead in the perf API. We have to loop through the CPU in the outter builtin-stat code instead of leaving that to low level functions. It has to change the weak group fallback strategy slightly. Since we cannot easily undo the opens for other CPUs move the weak group retry to a separate loop. Before with a large test case with 94 CPUs: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 42.75 4.050910 67 60046 110 perf_event_open After: 26.86 0.944396 16 58069 110 perf_event_open (the number changes slightly because the weak group retries work differently and the test case relies on weak groups) Committer notes: Added one of the hunks in a patch provided by Andi after I noticed that the "event times" 'perf test' entry was segfaulting. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-10-andi@firstfloor.org Link: http://lore.kernel.org/lkml/20191127232657.GL84886@tassilo.jf.intel.com # Fix Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 121 +++++++++++++++++++++++++++------ tools/perf/tests/event-times.c | 4 +- tools/perf/util/evlist.c | 10 ++- tools/perf/util/evlist.h | 3 +- tools/perf/util/evsel.c | 22 ++++-- tools/perf/util/evsel.h | 5 +- tools/perf/util/stat.c | 5 +- tools/perf/util/stat.h | 3 +- 9 files changed, 141 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1c73eb666f51..4f30f7c1e079 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -796,7 +796,7 @@ static int record__open(struct record *rec) if ((errno == EINVAL || errno == EBADF) && pos->leader != pos && pos->weak_group) { - pos = perf_evlist__reset_weak_group(evlist, pos); + pos = perf_evlist__reset_weak_group(evlist, pos, true); goto try_again; } rc = -errno; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 60c3f47d569a..40c6c2d822e1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/target.h" #include "util/time-utils.h" #include "util/top.h" +#include "util/affinity.h" #include "asm/bug.h" #include @@ -440,6 +441,11 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) ui__warning("%s event is not supported by the kernel.\n", perf_evsel__name(counter)); counter->supported = false; + /* + * errored is a sticky flag that means one of the counter's + * cpu event had a problem and needs to be reexamined. + */ + counter->errored = true; if ((counter->leader != counter) || !(counter->leader->core.nr_members > 1)) @@ -484,6 +490,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; + struct affinity affinity; + int i, cpu; + bool second_pass = false; if (interval) { ts.tv_sec = interval / USEC_PER_MSEC; @@ -508,30 +517,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (group) perf_evlist__set_leader(evsel_list); - evlist__for_each_entry(evsel_list, counter) { + if (affinity__setup(&affinity) < 0) + return -1; + + evlist__for_each_cpu (evsel_list, i, cpu) { + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evsel_list, counter) { + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (counter->reset_group || counter->errored) + continue; try_again: - if (create_perf_stat_counter(counter, &stat_config, &target) < 0) { - - /* Weak group failed. Reset the group. */ - if ((errno == EINVAL || errno == EBADF) && - counter->leader != counter && - counter->weak_group) { - counter = perf_evlist__reset_weak_group(evsel_list, counter); - goto try_again; + if (create_perf_stat_counter(counter, &stat_config, &target, + counter->cpu_iter - 1) < 0) { + + /* + * Weak group failed. We cannot just undo this here + * because earlier CPUs might be in group mode, and the kernel + * doesn't support mixing group and non group reads. Defer + * it to later. + * Don't close here because we're in the wrong affinity. + */ + if ((errno == EINVAL || errno == EBADF) && + counter->leader != counter && + counter->weak_group) { + perf_evlist__reset_weak_group(evsel_list, counter, false); + assert(counter->reset_group); + second_pass = true; + continue; + } + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: + continue; + default: + break; + } + } + counter->supported = true; + } + } - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again; - case COUNTER_SKIP: - continue; - default: - break; + if (second_pass) { + /* + * Now redo all the weak group after closing them, + * and also close errored counters. + */ + + evlist__for_each_cpu(evsel_list, i, cpu) { + affinity__set(&affinity, cpu); + /* First close errored or weak retry */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->reset_group && !counter->errored) + continue; + if (evsel__cpu_iter_skip_no_inc(counter, cpu)) + continue; + perf_evsel__close_cpu(&counter->core, counter->cpu_iter); + } + /* Now reopen weak */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->reset_group && !counter->errored) + continue; + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (!counter->reset_group) + continue; +try_again_reset: + pr_debug2("reopening weak %s\n", perf_evsel__name(counter)); + if (create_perf_stat_counter(counter, &stat_config, &target, + counter->cpu_iter - 1) < 0) { + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again_reset; + case COUNTER_SKIP: + continue; + default: + break; + } + } + counter->supported = true; } } - counter->supported = true; + } + affinity__cleanup(&affinity); + + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) { + perf_evsel__free_fd(&counter->core); + continue; + } l = strlen(counter->unit); if (l > stat_config.unit_width) diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 1ee8704e2284..1e8a9f5c356d 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; - err = perf_evsel__open_per_cpu(evsel, cpus); + err = perf_evsel__open_per_cpu(evsel, cpus, -1); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist) return -1; } - err = perf_evsel__open_per_cpu(evsel, cpus); + err = perf_evsel__open_per_cpu(evsel, cpus, -1); if (err == -EACCES) return TEST_SKIP; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d971c12b9653..84c252d28eb6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1722,7 +1722,8 @@ void perf_evlist__force_leader(struct evlist *evlist) } struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, - struct evsel *evsel) + struct evsel *evsel, + bool close) { struct evsel *c2, *leader; bool is_open = true; @@ -1739,10 +1740,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, if (c2 == evsel) is_open = false; if (c2->leader == leader) { - if (is_open) + if (is_open && close) perf_evsel__close(&c2->core); c2->leader = c2; c2->core.nr_members = 0; + /* + * Set this for all former members of the group + * to indicate they get reopened. + */ + c2->reset_group = true; } } return leader; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a7094df7b816..29c550216a62 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -342,5 +342,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist); void perf_evlist__force_leader(struct evlist *evlist); struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, - struct evsel *evsel); + struct evsel *evsel, + bool close); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1470f6c328f1..6b4e7bf75312 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1610,8 +1610,9 @@ static int perf_event_open(struct evsel *evsel, return fd; } -int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, - struct perf_thread_map *threads) +static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads, + int start_cpu, int end_cpu) { int cpu, thread, nthreads; unsigned long flags = PERF_FLAG_FD_CLOEXEC; @@ -1694,7 +1695,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, display_attr(&evsel->core.attr); - for (cpu = 0; cpu < cpus->nr; cpu++) { + for (cpu = start_cpu; cpu < end_cpu; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -1894,6 +1895,12 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } +int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads) +{ + return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1); +} + void evsel__close(struct evsel *evsel) { perf_evsel__close(&evsel->core); @@ -1901,9 +1908,14 @@ void evsel__close(struct evsel *evsel) } int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus) + struct perf_cpu_map *cpus, + int cpu) { - return evsel__open(evsel, cpus, NULL); + if (cpu == -1) + return evsel__open_cpu(evsel, cpus, NULL, 0, + cpus ? cpus->nr : 1); + + return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); } int perf_evsel__open_per_thread(struct evsel *evsel, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ddb255bd837e..af87690775a7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -94,6 +94,8 @@ struct evsel { struct evsel *metric_leader; bool collect_stat; bool weak_group; + bool reset_group; + bool errored; bool percore; int cpu_iter; const char *pmu_name; @@ -239,7 +241,8 @@ int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus); + struct perf_cpu_map *cpus, + int cpu); int perf_evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5156aa971fbb..93c95b4b13dd 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -465,7 +465,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, - struct target *target) + struct target *target, + int cpu) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel->leader; @@ -509,7 +510,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel)); + return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); return perf_evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index edbeb2f63e8d..0773e4b7ec44 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -211,7 +211,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, - struct target *target); + struct target *target, + int cpu); void perf_evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, -- Gitee From 210bbcd9291d5100ced70fcaeb79d082283d723e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Apr 2020 15:50:10 -0300 Subject: [PATCH 75/77] perf evsel: Rename perf_evsel__find_pmu() to evsel__find_pmu() commit e76026bdd51bcd4a0b9793c655891cde45367f5f upstream As it is a 'struct evsel' method, not part of tools/lib/perf/, aka libperf, to whom the perf_ prefix belongs. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/util/auxtrace.c | 2 +- tools/perf/util/evsel.h | 2 +- tools/perf/util/pmu.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ea11526bef07..185136bae6f6 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2379,7 +2379,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, static int perf_evsel__nr_addr_filter(struct evsel *evsel) { - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + struct perf_pmu *pmu = evsel__find_pmu(evsel); int nr_addr_filters = 0; if (!pmu) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index af87690775a7..2b4c33e773de 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -160,7 +160,7 @@ int perf_evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), void (*fini)(struct evsel *evsel)); -struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel); +struct perf_pmu *evsel__find_pmu(struct evsel *evsel); bool perf_evsel__is_aux_event(struct evsel *evsel); struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c0f8bdb65ccc..2677ea6b9344 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -901,7 +901,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) return NULL; } -struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) +struct perf_pmu *evsel__find_pmu(struct evsel *evsel) { struct perf_pmu *pmu = NULL; @@ -915,7 +915,7 @@ struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) bool perf_evsel__is_aux_event(struct evsel *evsel) { - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + struct perf_pmu *pmu = evsel__find_pmu(evsel); return pmu && pmu->auxtrace; } -- Gitee From 4755d1549be381da059082d0af325eea762b9f7c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 28 Feb 2020 08:30:00 -0800 Subject: [PATCH 76/77] perf tools: Add hw_idx in struct branch_stack commit 42bbabed09ce6208026648a71a45b4394c74585a upstream The low level index of raw branch records for the most recent branch can be recorded in a sample with PERF_SAMPLE_BRANCH_HW_INDEX branch_sample_type. Extend struct branch_stack to support it. However, if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, only nr and entries[] will be output by kernel. The pointer of entries[] could be wrong, since the output format is different with new struct branch_stack. Add a variable no_hw_idx in struct perf_sample to indicate whether the hw_idx is output. Add get_branch_entry() to return corresponding pointer of entries[0]. To make dummy branch sample consistent as new branch sample, add hw_idx in struct dummy_branch_stack for cs-etm and intel-pt. Apply the new struct branch_stack for synthetic events as well. Extend test case sample-parsing to support new struct branch_stack. Committer notes: Renamed get_branch_entries() to perf_sample__branch_entries() to have proper namespacing and pave the way for this to be moved to libperf, eventually. Add 'static' to that inline as it is in a header. Add 'hw_idx' to 'struct dummy_branch_stack' in cs-etm.c to fix the build on arm64. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Michael Ellerman Cc: Namhyung Kim Cc: Pavel Gerasimov Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Vitaly Slobodskoy Link: http://lore.kernel.org/lkml/20200228163011.19358-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/builtin-script.c | 70 ++++++++++--------- tools/perf/tests/sample-parsing.c | 7 +- tools/perf/util/branch.h | 22 ++++++ tools/perf/util/cs-etm.c | 2 + tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 5 ++ tools/perf/util/evsel.h | 5 ++ tools/perf/util/hist.c | 3 +- tools/perf/util/intel-pt.c | 2 + tools/perf/util/machine.c | 35 +++++----- .../scripting-engines/trace-event-python.c | 30 ++++---- tools/perf/util/session.c | 8 ++- tools/perf/util/synthetic-events.c | 6 +- 13 files changed, 125 insertions(+), 71 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bbf1f2d3387e..bb64dbfe043a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, return 0; for (i = 0; i < br->nr; i++) { - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); @@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; thread__find_symbol_fb(thread, sample->cpumode, from, &alf); thread__find_symbol_fb(thread, sample->cpumode, to, &alt); @@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && !alf.map->dso->adjust_symbols) @@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(br->entries + i), - br->entries[i].flags.in_tx ? 'X' : '-', - br->entries[i].flags.abort ? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -1011,6 +1014,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, struct machine *machine, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 start, end; int i, insn, len, nr, ilen, printed = 0; struct perf_insn x; @@ -1031,31 +1035,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += fprintf(fp, "%c", '\n'); /* Handle first from jump, of which we don't know the entry. */ - len = grab_bb(buffer, br->entries[nr-1].from, - br->entries[nr-1].from, + len = grab_bb(buffer, entries[nr-1].from, + entries[nr-1].from, machine, thread, &x.is64bit, &x.cpumode, false); if (len > 0) { - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, + printed += ip__fprintf_sym(entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } /* Print all blocks */ for (i = nr - 2; i >= 0; i--) { - if (br->entries[i].from || br->entries[i].to) + if (entries[i].from || entries[i].to) pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, - br->entries[i].from, - br->entries[i].to); - start = br->entries[i + 1].to; - end = br->entries[i].from; + entries[i].from, + entries[i].to); + start = entries[i + 1].to; + end = entries[i].from; len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); /* Patch up missing kernel transfers due to ring filters */ if (len == -ENXIO && i > 0) { - end = br->entries[--i].from; + end = entries[--i].from; pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); } @@ -1068,7 +1072,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); @@ -1092,9 +1096,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * Hit the branch? In this case we are already done, and the target * has not been executed yet. */ - if (br->entries[0].from == sample->ip) + if (entries[0].from == sample->ip) goto out; - if (br->entries[0].flags.abort) + if (entries[0].flags.abort) goto out; /* @@ -1105,7 +1109,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * between final branch and sample. When this happens just * continue walking after the last TO until we hit a branch. */ - start = br->entries[0].to; + start = entries[0].to; end = sample->ip; if (end < start) { /* Missing jump. Scan 128 bytes for the next branch */ diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 12b8278a0e35..31cca805daff 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); + COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) MCOMP(branch_stack->entries[i]); } @@ -189,7 +190,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 data[64]; } branch_stack = { /* 1 branch_entry */ - .data = {1, 211, 212, 213}, + .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 }; @@ -211,6 +212,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .transaction = 112, .raw_data = (void *)raw_data, .callchain = &callchain.callchain, + .no_hw_idx = false, .branch_stack = &branch_stack.branch_stack, .user_regs = { .abi = PERF_SAMPLE_REGS_ABI_64, @@ -248,6 +250,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) if (sample_type & PERF_SAMPLE_REGS_INTR) evsel.core.attr.sample_regs_intr = sample_regs; + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + for (i = 0; i < sizeof(regs); i++) *(i + (u8 *)regs) = i & 0xfe; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 88e00d268f6f..154a05cd03af 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -12,6 +12,7 @@ #include #include #include +#include "event.h" struct branch_flags { u64 mispred:1; @@ -39,9 +40,30 @@ struct branch_entry { struct branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries[0]; }; +/* + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. + * Otherwise, the output format of a sample with branch stack is + * struct branch_stack { + * u64 nr; + * struct branch_entry entries[0]; + * } + * Check whether the hw_idx is available, + * and return the corresponding pointer of entries[0]. + */ +static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) +{ + u64 *entry = (u64 *)sample->branch_stack; + + entry++; + if (sample->no_hw_idx) + return (struct branch_entry *)entry; + return (struct branch_entry *)(++entry); +} + struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f5a9cb408808..f9cc15f93c4a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1192,6 +1192,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; u64 ip; @@ -1222,6 +1223,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, if (etm->synth_opts.last_branch) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c8d31205a3dd..c2fd88a2c09e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -142,6 +142,7 @@ struct perf_sample { u16 insn_len; u8 cpumode; u16 misc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6b4e7bf75312..272f77ce05c4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2207,7 +2207,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (data->branch_stack->nr > max_branch_nr) return -EFAULT; + sz = data->branch_stack->nr * sizeof(struct branch_entry); + if (perf_evsel__has_branch_hw_idx(evsel)) + sz += sizeof(u64); + else + data->no_hw_idx = true; OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 2b4c33e773de..7a0eb975f8bf 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -402,6 +402,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +{ + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + static inline bool evsel__has_callchain(const struct evsel *evsel) { /* diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7b6eaf5e0bda..151b9e43c88f 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2572,9 +2572,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode) { struct branch_info *bi; + struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ - if (bs && bs->nr && bs->entries[0].flags.cycles) { + if (bs && bs->nr && entries[0].flags.cycles) { int i; bi = sample__resolve_bstack(sample, al); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bdb4b2586a7d..5e72294bc030 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1279,6 +1279,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1300,6 +1301,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c02ec8fcce55..7e3957602885 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2094,15 +2094,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, { unsigned int i; const struct branch_stack *bs = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); - bi[i].flags = bs->entries[i].flags; + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); + bi[i].flags = entries[i].flags; } return bi; } @@ -2198,6 +2199,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); int lbr_nr = lbr_stack->nr, j, k; bool branch; struct branch_flags *flags; @@ -2223,31 +2225,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = chain->ips[j]; else if (j > i + 1) { k = j - i - 2; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } else { if (j < lbr_nr) { k = lbr_nr - j - 1; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } @@ -2294,6 +2294,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int max_stack) { struct branch_stack *branch = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2341,7 +2342,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { if (callchain_param.order == ORDER_CALLEE) { - be[i] = branch->entries[i]; + be[i] = entries[i]; if (chain == NULL) continue; @@ -2360,7 +2361,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i].from >= chain->ips[first_call] - 8) first_call++; } else - be[i] = branch->entries[branch->nr - i - 1]; + be[i] = entries[branch->nr - i - 1]; } memset(iter, 0, sizeof(struct iterations) * nr); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 3b02c3f1b289..2bdd10c4c246 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; @@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(pyelem, "from", - PyLong_FromUnsignedLongLong(br->entries[i].from)); + PyLong_FromUnsignedLongLong(entries[i].from)); pydict_set_item_string_decref(pyelem, "to", - PyLong_FromUnsignedLongLong(br->entries[i].to)); + PyLong_FromUnsignedLongLong(entries[i].to)); pydict_set_item_string_decref(pyelem, "mispred", - PyBool_FromLong(br->entries[i].flags.mispred)); + PyBool_FromLong(entries[i].flags.mispred)); pydict_set_item_string_decref(pyelem, "predicted", - PyBool_FromLong(br->entries[i].flags.predicted)); + PyBool_FromLong(entries[i].flags.predicted)); pydict_set_item_string_decref(pyelem, "in_tx", - PyBool_FromLong(br->entries[i].flags.in_tx)); + PyBool_FromLong(entries[i].flags.in_tx)); pydict_set_item_string_decref(pyelem, "abort", - PyBool_FromLong(br->entries[i].flags.abort)); + PyBool_FromLong(entries[i].flags.abort)); pydict_set_item_string_decref(pyelem, "cycles", - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; char bf[512]; @@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "to", _PyUnicode_FromString(bf)); - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "pred", _PyUnicode_FromString(bf)); - if (br->entries[i].flags.in_tx) { + if (entries[i].flags.in_tx) { pydict_set_item_string_decref(pyelem, "in_tx", _PyUnicode_FromString("X")); } else { @@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, _PyUnicode_FromString("-")); } - if (br->entries[i].flags.abort) { + if (entries[i].flags.abort) { pydict_set_item_string_decref(pyelem, "abort", _PyUnicode_FromString("A")); } else { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e8a4516070b3..7710481ab01a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1006,6 +1006,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) { struct ip_callchain *callchain = sample->callchain; struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 kernel_callchain_nr = callchain->nr; unsigned int i; @@ -1042,10 +1043,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) i, callchain->ips[i]); printf("..... %2d: %016" PRIx64 "\n", - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + (int)(kernel_callchain_nr), entries[0].to); for (i = 0; i < lbr_stack->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + (int)(i + kernel_callchain_nr + 1), entries[i].from); } } @@ -1067,6 +1068,7 @@ static void callchain__printf(struct evsel *evsel, static void branch_stack__printf(struct perf_sample *sample, bool callstack) { + struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; printf("%s: nr:%" PRIu64 "\n", @@ -1074,7 +1076,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) sample->branch_stack->nr); for (i = 0; i < sample->branch_stack->nr; i++) { - struct branch_entry *e = &sample->branch_stack->entries[i]; + struct branch_entry *e = &entries[i]; if (!callstack) { printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2c0f7ec3f8b7..b7d0f6614daf 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1183,7 +1183,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); result += sz; } @@ -1353,7 +1354,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); memcpy(array, sample->branch_stack, sz); array = (void *)array + sz; } -- Gitee From a4f502a54af500f0e43fd2a10c9a593faf6b7fdc Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 4 Jun 2022 10:15:12 +0530 Subject: [PATCH 77/77] perf record ibs: Warn about sampling period skew commit 9ab95b0b15a092abb7c3309f3580f572a041f9ab upstream Samples without an L3 miss are discarded and counter is reset with random value (between 1-15 for fetch PMU and 1-127 for op PMU) when IBS L3 miss filtering is enabled. This causes a sampling period skew but there is no way to reconstruct aggregated sampling period. So print a warning at perf record if user sets l3missonly=1. Ex: # perf record -c 10000 -C 0 -e ibs_op/l3missonly=1/ WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled and tagged operation does not cause L3 Miss. This causes sampling period skew. [Backport changes] 1.In the tools/perf/arch/x86/util/evsel.c file, the #include "util/pmu.h" directive was added manually instead of backporting upstream commit d98079c05b5a54, in order to avoid introducing over 20 dependencies. 2.In tools/perf/arch/x86/util/evsel.c, inside arch__post_evsel_config(), the function name evsel__find_pmu() was manually changed back to perf_evsel__find_pmu() to match the current kernel. The upstream commit e76026bdd51bcd only renames the function without modifying its logic, but backporting it would require over 20 extra patches. So the change was made manually to avoid that overhead. Signed-off-by: Ravi Bangoria Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: like.xu.linux@gmail.com Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20220604044519.594-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: priyanka-mani Signed-off-by: mohanasv2 --- tools/perf/arch/x86/util/evsel.c | 52 ++++++++++++++++++++++++++++++++ tools/perf/util/evsel.c | 7 +++++ tools/perf/util/evsel.h | 1 + 3 files changed, 60 insertions(+) diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index ac2899a25b7a..6222bbb26c65 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -3,7 +3,12 @@ #include #include "util/evsel.h" #include "util/env.h" +#include "util/pmu.h" #include "linux/string.h" +#include "util/debug.h" + +#define IBS_FETCH_L3MISSONLY (1ULL << 59) +#define IBS_OP_L3MISSONLY (1ULL << 16) void arch_evsel__set_sample_weight(struct evsel *evsel) { @@ -29,3 +34,50 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) free(env.cpuid); } +static void ibs_l3miss_warn(void) +{ + pr_warning( +"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n" +"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n"); +} + +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) +{ + struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu; + static int warned_once; + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + + if (warned_once || is_amd == -1) + return; + + if (!is_amd) { + struct perf_env *env = perf_evsel__env(evsel); + + if (!perf_env__cpuid(env) || !env->cpuid || + !strstarts(env->cpuid, "AuthenticAMD")) { + is_amd = -1; + return; + } + is_amd = 1; + } + + evsel_pmu = evsel__find_pmu(evsel); + if (!evsel_pmu) + return; + + ibs_fetch_pmu = perf_pmu__find("ibs_fetch"); + ibs_op_pmu = perf_pmu__find("ibs_op"); + + if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_FETCH_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_OP_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } +} diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 272f77ce05c4..147855005d31 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -932,6 +932,11 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un { } +void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ +} + /* * The enable_on_exec/disabled value strategy: * @@ -1201,6 +1206,8 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (opts->initial_delay && is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); + + arch__post_evsel_config(evsel, attr); } int perf_evsel__set_filter(struct evsel *evsel, const char *filter) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7a0eb975f8bf..a7e7a25af2be 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -236,6 +236,7 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter); void arch_evsel__set_sample_weight(struct evsel *evsel); void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -- Gitee