linux/tools/arch/x86/include/asm/amd/ibs.h
Linus Torvalds 33c66eb5e9 Performance events changes for v7.1:
Core updates:
 
  - Try to allocate task_ctx_data quickly, to optimize
    O(N^2) algorithm on large systems with O(100k) threads
    (Namhyung Kim)
 
 AMD PMU driver IBS support updates and fixes, by Ravi Bangoria:
 
  - Fix interrupt accounting for discarded samples
  - Fix a Zen5-specific quirk
  - Fix PhyAddrVal handling
  - Fix NMI-safety with perf_allow_kernel()
  - Fix a race between event add and NMIs
 
 Intel PMU driver updates:
 
  - Only check GP counters for PEBS constraints validation (Dapeng Mi)
 
 MSR driver:
 
  - Turn SMI_COUNT and PPERF on by default, instead of a long
    list of CPU models to enable them on (Kan Liang)
 
 Misc cleanups and fixes by Aldf Conte, Anshuman Khandual, Namhyung Kim,
 Ravi Bangoria and Yen-Hsiang Hsu.
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmncppoRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1hN2hAAgd8ix2hZjT/v/wH0iIayRKPEI8KqQ0XP
 7L0nqHVrNw3gzsxkRBIBljyWdYhHmxYnV2ExgddwXdpcD2j/Vf2YUfNtrE0fXL5J
 wD8/M4WxzxA2gwcRgz3kGaeU/I0Ble9TcIdSaI3kJFJarHaDw3jEsif/gfmYbZfm
 oX+gjIUCspnUMqb5EqsHdWxPWub87NnddPI8c9hmhq/9IZ4QvhUxS+lQHc+GihpY
 MTlTxG10W/+f84w0lyG153KslV1rngIqoQ2uJTRe0fjx3VX1uhsgB3LCrkTzMOWe
 GVODiMhN9u5o0pfJLbboSDZ32z3QrsojXbS2Z+ZHqfqbgompIzH9SVh5fFSGKtfK
 64CEP4mO90JGGnDYS6vaPJhZrbusZKzuLt0tcn0aIYHD48PNJXhD2tVE76JsnmAj
 SicnL78QOQkB8Gi0LuCXhxPXY/KAqFtOgmKV9x+gqJuAFgTXEUhem6IOJjShhwOQ
 NfIkXDHz7kmMLblWRmuslGOWfWddRKheQNvuJ+YqbVto6N192PQdSjnBBZjX8GpL
 o52FYCbwGXckZ9X+SU55j3lmQbmtS5Rn8PwB7dmHnVIp8bRI62ANQVoNc1feIrAt
 7UA0SrIPz94oe8tH8lQYb5d98fv/6+Nroli8Vpik4wQZf1VUrzlEEXv/m9CTOL4G
 FA5CtFF1AmA=
 =4Vs0
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "Core updates:

   - Try to allocate task_ctx_data quickly, to optimize O(N^2) algorithm
     on large systems with O(100k) threads (Namhyung Kim)

  AMD PMU driver IBS support updates and fixes, by Ravi Bangoria:
   - Fix interrupt accounting for discarded samples
   - Fix a Zen5-specific quirk
   - Fix PhyAddrVal handling
   - Fix NMI-safety with perf_allow_kernel()
   - Fix a race between event add and NMIs

  Intel PMU driver updates:
   - Only check GP counters for PEBS constraints validation (Dapeng Mi)

  MSR driver:
   - Turn SMI_COUNT and PPERF on by default, instead of a long list of
     CPU models to enable them on (Kan Liang)

  ... and misc cleanups and fixes by Aldf Conte, Anshuman Khandual,
  Namhyung Kim, Ravi Bangoria and Yen-Hsiang Hsu"

* tag 'perf-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/events: Replace READ_ONCE() with standard pgtable accessors
  perf/x86/msr: Make SMI and PPERF on by default
  perf/x86/intel/p4: Fix unused variable warning in p4_pmu_init()
  perf/x86/intel: Only check GP counters for PEBS constraints validation
  perf/x86/amd/ibs: Fix comment typo in ibs_op_data
  perf/amd/ibs: Advertise remote socket capability
  perf/amd/ibs: Enable streaming store filter
  perf/amd/ibs: Enable RIP bit63 hardware filtering
  perf/amd/ibs: Enable fetch latency filtering
  perf/amd/ibs: Support IBS_{FETCH|OP}_CTL2[Dis] to eliminate RMW race
  perf/amd/ibs: Add new MSRs and CPUID bits definitions
  perf/amd/ibs: Define macro for ldlat mask and shift
  perf/amd/ibs: Avoid race between event add and NMI
  perf/amd/ibs: Avoid calling perf_allow_kernel() from the IBS NMI handler
  perf/amd/ibs: Preserve PhyAddrVal bit when clearing PhyAddr MSR
  perf/amd/ibs: Limit ldlat->l3missonly dependency to Zen5
  perf/amd/ibs: Account interrupt for discarded samples
  perf/core: Simplify __detach_global_ctx_data()
  perf/core: Try to allocate task_ctx_data quickly
  perf/core: Pass GFP flags to attach_task_ctx_data()
2026-04-14 13:22:40 -07:00

159 lines
5.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_AMD_IBS_H
#define _ASM_X86_AMD_IBS_H
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
*/
#include "../msr-index.h"
/* IBS_OP_DATA2 DataSrc */
#define IBS_DATA_SRC_LOC_CACHE 2
#define IBS_DATA_SRC_DRAM 3
#define IBS_DATA_SRC_REM_CACHE 4
#define IBS_DATA_SRC_IO 7
/* IBS_OP_DATA2 DataSrc Extension */
#define IBS_DATA_SRC_EXT_LOC_CACHE 1
#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2
#define IBS_DATA_SRC_EXT_DRAM 3
#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5
#define IBS_DATA_SRC_EXT_PMEM 6
#define IBS_DATA_SRC_EXT_IO 7
#define IBS_DATA_SRC_EXT_EXT_MEM 8
#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12
/*
* IBS Hardware MSRs
*/
/* MSR 0xc0011030: IBS Fetch Control */
union ibs_fetch_ctl {
__u64 val;
struct {
__u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
fetch_cnt:16, /* 16-31: instruction fetch count */
fetch_lat:16, /* 32-47: instruction fetch latency */
fetch_en:1, /* 48: instruction fetch enable */
fetch_val:1, /* 49: instruction fetch valid */
fetch_comp:1, /* 50: instruction fetch complete */
ic_miss:1, /* 51: i-cache miss */
phy_addr_valid:1,/* 52: physical address valid */
l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
* (needs IbsPhyAddrValid) */
l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
rand_en:1, /* 57: random tagging enable */
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
* (needs IbsFetchComp) */
l3_miss_only:1, /* 59: Collect L3 miss samples only */
fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */
fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */
reserved:2; /* 62-63: reserved */
};
};
/* MSR 0xc0011033: IBS Execution Control */
union ibs_op_ctl {
__u64 val;
struct {
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
l3_miss_only:1, /* 16: Collect L3 miss samples only */
op_en:1, /* 17: op sampling enable */
op_val:1, /* 18: op sample valid */
cnt_ctl:1, /* 19: periodic op counter control */
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
ldlat_thrsh:4, /* 59-62: Load Latency threshold */
ldlat_en:1; /* 63: Load Latency enabled */
};
};
/* MSR 0xc0011035: IBS Op Data 1 */
union ibs_op_data {
__u64 val;
struct {
__u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
tag_to_ret_ctr:16, /* 16-31: op tag to retire count */
reserved1:2, /* 32-33: reserved */
op_return:1, /* 34: return op */
op_brn_taken:1, /* 35: taken branch op */
op_brn_misp:1, /* 36: mispredicted branch op */
op_brn_ret:1, /* 37: branch op retired */
op_rip_invalid:1, /* 38: RIP is invalid */
op_brn_fuse:1, /* 39: fused branch op */
op_microcode:1, /* 40: microcode op */
reserved2:23; /* 41-63: reserved */
};
};
/* MSR 0xc0011036: IBS Op Data 2 */
union ibs_op_data2 {
__u64 val;
struct {
__u64 data_src_lo:3, /* 0-2: data source low */
reserved0:1, /* 3: reserved */
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
data_src_hi:2, /* 6-7: data source high */
reserved1:56; /* 8-63: reserved */
};
};
/* MSR 0xc0011037: IBS Op Data 3 */
union ibs_op_data3 {
__u64 val;
struct {
__u64 ld_op:1, /* 0: load op */
st_op:1, /* 1: store op */
dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
dc_l2tlb_miss:1, /* 3: data cache L2TLB miss in 2M page */
dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
dc_miss:1, /* 7: data cache miss */
dc_mis_acc:1, /* 8: misaligned access */
reserved:4, /* 9-12: reserved */
dc_wc_mem_acc:1, /* 13: write combining memory access */
dc_uc_mem_acc:1, /* 14: uncacheable memory access */
dc_locked_op:1, /* 15: locked operation */
dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
dc_lin_addr_valid:1, /* 17: data cache linear address valid */
dc_phy_addr_valid:1, /* 18: data cache physical address valid */
dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
l2_miss:1, /* 20: L2 cache miss */
sw_pf:1, /* 21: software prefetch */
op_mem_width:4, /* 22-25: load/store size in bytes */
op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
dc_miss_lat:16, /* 32-47: data cache miss latency */
tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
};
};
/* MSR 0xc001103c: IBS Fetch Control Extended */
union ic_ibs_extd_ctl {
__u64 val;
struct {
__u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
reserved:48; /* 16-63: reserved */
};
};
/*
* IBS driver related
*/
struct perf_ibs_data {
u32 size;
union {
u32 data[0]; /* data buffer starts here */
u32 caps;
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
#endif /* _ASM_X86_AMD_IBS_H */