mirror of
https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux
synced 2025-10-29 10:08:18 +10:00
Add PMU events for AMD Zen3 processors as documented in the AMD Processor
Programming Reference for Family 19h and Model 01h [1].
Below are the events which are new on Zen3:
PMCx041 ls_mab_alloc.{all_allocations|hardware_prefetcher_allocations|load_store_allocations}
PMCx043 ls_dmnd_fills_from_sys.ext_cache_local
PMCx044 ls_any_fills_from_sys.{mem_io_remote|ext_cache_remote|mem_io_local|ext_cache_local|int_cache|lcl_l2}
PMCx047 ls_misal_loads.{ma4k|ma64}
PMCx059 ls_sw_pf_dc_fills.ext_cache_local
PMCx05a ls_hw_pf_dc_fills.ext_cache_local
PMCx05f ls_alloc_mab_count
PMCx085 bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k
PMCx0ab de_dis_cops_from_decoder.disp_op_type.{any_integer_dispatch|any_fp_dispatch}
PMCx0cc ex_ret_ind_brch_instr
PMCx18e ic_tag_hit_miss.{all_instruction_cache_accesses|instruction_cache_miss|instruction_cache_hit}
PMCx1c7 ex_ret_msprd_brnch_instr_dir_msmtch
PMCx28f op_cache_hit_miss.{all_op_cache_accesses|op_cache_miss|op_cache_hit}
Section 2.1.17.2 "Performance Measurement" of "PPR for AMD Family 19h,
Model 01h, Revision B1 Processors - 55898 Rev 0.35 - Feb 5, 2021." lists
new metrics. Add them.
Preserve the events for Zen3 if they are measurable and non-zero as taken
from Zen2 directory even if the PPR of Zen3 [1] omits them. Those events
are the following:
PMCx000 fpu_pipe_assignment.{total|total0|total1|total2|total3}
PMCx004 fp_num_mov_elim_scal_op.{optimized|opt_potential|sse_mov_ops_elim|sse_mov_ops}
PMCx02D ls_rdtsc
PMCx040 ls_dc_accesses
PMCx046 ls_tablewalker.{iside|ic_type1|ic_type0|dside|dc_type1|dc_type0}
PMCx061 l2_request_g2.{group1|ls_rd_sized|ls_rd_sized_nc|ic_rd_sized|ic_rd_sized_nc|smc_inval|bus_lock_originator|bus_locks_responses}
PMCx062 l2_latency.l2_cycles_waiting_on_fills
PMCx063 l2_wcb_req.{wcb_write|wcb_close|zero_byte_store|cl_zero}
PMCx06d l2_fill_pending.l2_fill_busy
PMCx080 ic_fw32
PMCx081 ic_fw32_miss
PMCx086 bp_snp_re_sync
PMCx087 ic_fetch_stall.{ic_stall_any|ic_stall_dq_empty|ic_stall_back_pressure}
PMCx08a bp_l1_btb_correct
PMCx08c ic_cache_inval.{l2_invalidating_probe|fill_invalidated}
PMCx099 bp_tlb_rel
PMCx0a9 de_dis_uop_queue_empty_di0
PMCx0c7 ex_ret_brn_resync
PMCx28a ic_oc_mode_switch.{oc_ic_mode_switch|ic_oc_mode_switch}
L3PMCx01 l3_request_g1.caching_l3_cache_accesses
L3PMCx06 l3_comb_clstr_state.{other_l3_miss_typs|request_miss}
[1] Processor Programming Reference (PPR) for AMD Family 19h, Model 01h,
Revision B1 Processors - 55898 Rev 0.35 - Feb 5, 2021.
[2] Processor Programming Reference (PPR) for AMD Family 17h Model 71h,
Revision B0 Processors, 56176 Rev 3.06 - Jul 17, 2019.
[3] Processor Programming Reference (PPR) for AMD Family 17h Models
01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019.
All of the PPRs can be found at:
https://bugzilla.kernel.org/show_bug.cgi?id=206537
Reviewed-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin Liška <mliska@suse.cz>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vijay Thakkar <vijaythakkar@me.com>
Cc: linux-perf-users@vger.kernel.org
Link: https://lore.kernel.org/r/20210406215944.113332-5-Smita.KoralahalliChannabasappa@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
215 lines
6.8 KiB
JSON
215 lines
6.8 KiB
JSON
[
|
|
{
|
|
"MetricName": "branch_misprediction_ratio",
|
|
"BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)",
|
|
"MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)",
|
|
"MetricGroup": "branch_prediction",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"EventName": "all_data_cache_accesses",
|
|
"EventCode": "0x29",
|
|
"BriefDescription": "All L1 Data Cache Accesses",
|
|
"UMask": "0x07"
|
|
},
|
|
{
|
|
"MetricName": "all_l2_cache_accesses",
|
|
"BriefDescription": "All L2 Cache Accesses",
|
|
"MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_accesses_from_ic_misses",
|
|
"EventCode": "0x60",
|
|
"BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)",
|
|
"UMask": "0x10"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_accesses_from_dc_misses",
|
|
"EventCode": "0x60",
|
|
"BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)",
|
|
"UMask": "0xe8"
|
|
},
|
|
{
|
|
"MetricName": "l2_cache_accesses_from_l2_hwpf",
|
|
"BriefDescription": "L2 Cache Accesses from L2 HWPF",
|
|
"MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"MetricName": "all_l2_cache_misses",
|
|
"BriefDescription": "All L2 Cache Misses",
|
|
"MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_misses_from_ic_miss",
|
|
"EventCode": "0x64",
|
|
"BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses",
|
|
"UMask": "0x01"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_misses_from_dc_misses",
|
|
"EventCode": "0x64",
|
|
"BriefDescription": "L2 Cache Misses from L1 Data Cache Misses",
|
|
"UMask": "0x08"
|
|
},
|
|
{
|
|
"MetricName": "l2_cache_misses_from_l2_hwpf",
|
|
"BriefDescription": "L2 Cache Misses from L2 Cache HWPF",
|
|
"MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"MetricName": "all_l2_cache_hits",
|
|
"BriefDescription": "All L2 Cache Hits",
|
|
"MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_hits_from_ic_misses",
|
|
"EventCode": "0x64",
|
|
"BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses",
|
|
"UMask": "0x06"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_hits_from_dc_misses",
|
|
"EventCode": "0x64",
|
|
"BriefDescription": "L2 Cache Hits from L1 Data Cache Misses",
|
|
"UMask": "0xf0"
|
|
},
|
|
{
|
|
"EventName": "l2_cache_hits_from_l2_hwpf",
|
|
"EventCode": "0x70",
|
|
"BriefDescription": "L2 Cache Hits from L2 Cache HWPF",
|
|
"UMask": "0xff"
|
|
},
|
|
{
|
|
"EventName": "l3_cache_accesses",
|
|
"EventCode": "0x04",
|
|
"BriefDescription": "L3 Cache Accesses",
|
|
"UMask": "0xff",
|
|
"Unit": "L3PMC"
|
|
},
|
|
{
|
|
"EventName": "l3_misses",
|
|
"EventCode": "0x04",
|
|
"BriefDescription": "L3 Misses (includes cacheline state change requests)",
|
|
"UMask": "0x01",
|
|
"Unit": "L3PMC"
|
|
},
|
|
{
|
|
"MetricName": "l3_read_miss_latency",
|
|
"BriefDescription": "Average L3 Read Miss Latency (in core clocks)",
|
|
"MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1",
|
|
"MetricGroup": "l3_cache",
|
|
"ScaleUnit": "1core clocks"
|
|
},
|
|
{
|
|
"MetricName": "op_cache_fetch_miss_ratio",
|
|
"BriefDescription": "Op Cache (64B) Fetch Miss Ratio",
|
|
"MetricExpr": "d_ratio(op_cache_hit_miss.op_cache_miss, op_cache_hit_miss.all_op_cache_accesses)",
|
|
"MetricGroup": "l2_cache"
|
|
},
|
|
{
|
|
"MetricName": "ic_fetch_miss_ratio",
|
|
"BriefDescription": "Instruction Cache (32B) Fetch Miss Ratio",
|
|
"MetricExpr": "d_ratio(ic_tag_hit_miss.instruction_cache_miss, ic_tag_hit_miss.all_instruction_cache_accesses)",
|
|
"MetricGroup": "l2_cache",
|
|
"ScaleUnit": "100%"
|
|
},
|
|
{
|
|
"EventName": "l1_data_cache_fills_from_memory",
|
|
"EventCode": "0x44",
|
|
"BriefDescription": "L1 Data Cache Fills: From Memory",
|
|
"UMask": "0x48"
|
|
},
|
|
{
|
|
"EventName": "l1_data_cache_fills_from_remote_node",
|
|
"EventCode": "0x44",
|
|
"BriefDescription": "L1 Data Cache Fills: From Remote Node",
|
|
"UMask": "0x50"
|
|
},
|
|
{
|
|
"EventName": "l1_data_cache_fills_from_within_same_ccx",
|
|
"EventCode": "0x44",
|
|
"BriefDescription": "L1 Data Cache Fills: From within same CCX",
|
|
"UMask": "0x03"
|
|
},
|
|
{
|
|
"EventName": "l1_data_cache_fills_from_external_ccx_cache",
|
|
"EventCode": "0x44",
|
|
"BriefDescription": "L1 Data Cache Fills: From External CCX Cache",
|
|
"UMask": "0x14"
|
|
},
|
|
{
|
|
"EventName": "l1_data_cache_fills_all",
|
|
"EventCode": "0x44",
|
|
"BriefDescription": "L1 Data Cache Fills: All",
|
|
"UMask": "0xff"
|
|
},
|
|
{
|
|
"MetricName": "l1_itlb_misses",
|
|
"BriefDescription": "L1 ITLB Misses",
|
|
"MetricExpr": "bp_l1_tlb_miss_l2_tlb_hit + bp_l1_tlb_miss_l2_tlb_miss",
|
|
"MetricGroup": "tlb"
|
|
},
|
|
{
|
|
"EventName": "l2_itlb_misses",
|
|
"EventCode": "0x85",
|
|
"BriefDescription": "L2 ITLB Misses & Instruction page walks",
|
|
"UMask": "0x07"
|
|
},
|
|
{
|
|
"EventName": "l1_dtlb_misses",
|
|
"EventCode": "0x45",
|
|
"BriefDescription": "L1 DTLB Misses",
|
|
"UMask": "0xff"
|
|
},
|
|
{
|
|
"EventName": "l2_dtlb_misses",
|
|
"EventCode": "0x45",
|
|
"BriefDescription": "L2 DTLB Misses & Data page walks",
|
|
"UMask": "0xf0"
|
|
},
|
|
{
|
|
"EventName": "all_tlbs_flushed",
|
|
"EventCode": "0x78",
|
|
"BriefDescription": "All TLBs Flushed",
|
|
"UMask": "0xff"
|
|
},
|
|
{
|
|
"MetricName": "macro_ops_dispatched",
|
|
"BriefDescription": "Macro-ops Dispatched",
|
|
"MetricExpr": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch + de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch",
|
|
"MetricGroup": "decoder"
|
|
},
|
|
{
|
|
"EventName": "sse_avx_stalls",
|
|
"EventCode": "0x0e",
|
|
"BriefDescription": "Mixed SSE/AVX Stalls",
|
|
"UMask": "0x0e"
|
|
},
|
|
{
|
|
"EventName": "macro_ops_retired",
|
|
"EventCode": "0xc1",
|
|
"BriefDescription": "Macro-ops Retired"
|
|
},
|
|
{
|
|
"MetricName": "all_remote_links_outbound",
|
|
"BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)",
|
|
"MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3",
|
|
"MetricGroup": "data_fabric",
|
|
"PerPkg": "1",
|
|
"ScaleUnit": "3e-5MiB"
|
|
},
|
|
{
|
|
"MetricName": "nps1_die_to_dram",
|
|
"BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
|
|
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
|
|
"MetricGroup": "data_fabric",
|
|
"PerPkg": "1",
|
|
"ScaleUnit": "6.1e-5MiB"
|
|
}
|
|
]
|