mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 10:04:04 +02:00
perf tools fixes for v6.16, 1st batch:
- Fix some file descriptor leaks that stand out with recent changes to
'perf list'.
- Fix prctl include to fix building 'perf bench futex' hash with musl libc.
- Restrict 'perf test' uniquifying entry to machines with 'uncore_imc' PMUs.
- Document new output fields (op, cache, mem, dtlb, snoop) used with
'perf mem'.
- Synchronize kernel header copies.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCaFWAEwAKCRCyPKLppCJ+
J0/LAP46WdlK1T49um790QowJR+HqRQEngur6ZIb8ya0FpCc4AEAnv8KdGdeoF21
H403o3fbCaMGN5ZQ4loe3IEO9gh6EAY=
=9GzE
-----END PGP SIGNATURE-----
Merge tag 'perf-tools-fixes-for-v6.16-1-2025-06-20' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools fixes from Arnaldo Carvalho de Melo:
- Fix some file descriptor leaks that stand out with recent changes to
'perf list'
- Fix prctl include to fix building 'perf bench futex' hash with musl
libc
- Restrict 'perf test' uniquifying entry to machines with 'uncore_imc'
PMUs
- Document new output fields (op, cache, mem, dtlb, snoop) used with
'perf mem'
- Synchronize kernel header copies
* tag 'perf-tools-fixes-for-v6.16-1-2025-06-20' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools:
tools headers x86 cpufeatures: Sync with the kernel sources
perf bench futex: Fix prctl include in musl libc
perf test: Directory file descriptor leak
perf evsel: Missed close() when probing hybrid core PMUs
tools headers: Synchronize linux/bits.h with the kernel sources
tools arch amd ibs: Sync ibs.h with the kernel sources
tools arch x86: Sync the msr-index.h copy with the kernel sources
tools headers: Syncronize linux/build_bug.h with the kernel sources
tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
tools headers UAPI: Sync linux/kvm.h with the kernel sources
tools headers UAPI: Sync the drm/drm.h with the kernel sources
perf beauty: Update copy of linux/socket.h with the kernel sources
tools headers UAPI: Sync kvm header with the kernel sources
tools headers x86 svm: Sync svm headers with the kernel sources
tools headers UAPI: Sync KVM's vmx.h header with the kernel sources
tools kvm headers arm64: Update KVM header from the kernel sources
tools headers UAPI: Sync linux/prctl.h with the kernel sources to pick FUTEX knob
perf mem: Document new output fields (op, cache, mem, dtlb, snoop)
tools headers: Update the fs headers with the kernel sources
perf test: Restrict uniquifying test to machines with 'uncore_imc'
This commit is contained in:
commit
7c7f9dd1ea
|
|
@ -4,9 +4,9 @@
|
|||
#ifndef _UAPI_LINUX_BITS_H
|
||||
#define _UAPI_LINUX_BITS_H
|
||||
|
||||
#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
|
||||
#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))
|
||||
|
||||
#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
|
||||
#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
|
||||
|
||||
#define __GENMASK_U128(h, l) \
|
||||
((_BIT128((h)) << 1) - (_BIT128(l)))
|
||||
|
|
|
|||
|
|
@ -431,10 +431,11 @@ enum {
|
|||
|
||||
/* Device Control API on vcpu fd */
|
||||
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
|
||||
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
|
||||
#define KVM_ARM_VCPU_PMU_V3_INIT 1
|
||||
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
|
||||
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
|
||||
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
|
||||
#define KVM_ARM_VCPU_PMU_V3_INIT 1
|
||||
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
|
||||
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
|
||||
#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
|
||||
#define KVM_ARM_VCPU_TIMER_CTRL 1
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_AMD_IBS_H
|
||||
#define _ASM_X86_AMD_IBS_H
|
||||
|
||||
/*
|
||||
* From PPR Vol 1 for AMD Family 19h Model 01h B1
|
||||
* 55898 Rev 0.35 - Feb 5, 2021
|
||||
|
|
@ -151,3 +154,5 @@ struct perf_ibs_data {
|
|||
};
|
||||
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_AMD_IBS_H */
|
||||
|
|
|
|||
|
|
@ -336,7 +336,7 @@
|
|||
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
|
||||
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
|
||||
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
|
||||
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
|
||||
|
|
@ -379,6 +379,7 @@
|
|||
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
|
||||
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
|
||||
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
|
||||
#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */
|
||||
#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
|
||||
|
|
@ -447,6 +448,7 @@
|
|||
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
|
||||
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
|
||||
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
|
||||
#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
|
||||
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
|
||||
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
|
||||
|
||||
|
|
@ -458,6 +460,7 @@
|
|||
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
|
||||
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
|
||||
|
||||
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
|
||||
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
|
||||
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
|
||||
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
|
||||
|
|
@ -482,7 +485,8 @@
|
|||
#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
|
||||
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
|
||||
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
|
||||
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */
|
||||
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
|
||||
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
|
|
@ -535,6 +539,8 @@
|
|||
#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
|
||||
#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
|
||||
#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
|
||||
#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */
|
||||
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
|
||||
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
|
||||
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
|
||||
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
|
|
|||
|
|
@ -533,7 +533,7 @@
|
|||
#define MSR_HWP_CAPABILITIES 0x00000771
|
||||
#define MSR_HWP_REQUEST_PKG 0x00000772
|
||||
#define MSR_HWP_INTERRUPT 0x00000773
|
||||
#define MSR_HWP_REQUEST 0x00000774
|
||||
#define MSR_HWP_REQUEST 0x00000774
|
||||
#define MSR_HWP_STATUS 0x00000777
|
||||
|
||||
/* CPUID.6.EAX */
|
||||
|
|
@ -550,16 +550,16 @@
|
|||
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
|
||||
|
||||
/* IA32_HWP_REQUEST */
|
||||
#define HWP_MIN_PERF(x) (x & 0xff)
|
||||
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
|
||||
#define HWP_MIN_PERF(x) (x & 0xff)
|
||||
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
|
||||
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
|
||||
#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
|
||||
#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24)
|
||||
#define HWP_EPP_PERFORMANCE 0x00
|
||||
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
|
||||
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
|
||||
#define HWP_EPP_POWERSAVE 0xFF
|
||||
#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
|
||||
#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
|
||||
#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32)
|
||||
#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42)
|
||||
|
||||
/* IA32_HWP_STATUS */
|
||||
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
|
||||
|
|
@ -602,7 +602,11 @@
|
|||
/* V6 PMON MSR range */
|
||||
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
|
||||
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
|
||||
#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
|
||||
#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
|
||||
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
|
||||
#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
|
||||
#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
|
||||
#define MSR_IA32_PMC_V6_STEP 4
|
||||
|
||||
/* KeyID partitioning between MKTME and TDX */
|
||||
|
|
|
|||
|
|
@ -441,6 +441,7 @@ struct kvm_sync_regs {
|
|||
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
|
||||
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
|
||||
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
|
||||
#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
|
||||
|
||||
#define KVM_STATE_NESTED_FORMAT_VMX 0
|
||||
#define KVM_STATE_NESTED_FORMAT_SVM 1
|
||||
|
|
@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd {
|
|||
#define KVM_X86_SNP_VM 4
|
||||
#define KVM_X86_TDX_VM 5
|
||||
|
||||
/* Trust Domain eXtension sub-ioctl() commands. */
|
||||
enum kvm_tdx_cmd_id {
|
||||
KVM_TDX_CAPABILITIES = 0,
|
||||
KVM_TDX_INIT_VM,
|
||||
KVM_TDX_INIT_VCPU,
|
||||
KVM_TDX_INIT_MEM_REGION,
|
||||
KVM_TDX_FINALIZE_VM,
|
||||
KVM_TDX_GET_CPUID,
|
||||
|
||||
KVM_TDX_CMD_NR_MAX,
|
||||
};
|
||||
|
||||
struct kvm_tdx_cmd {
|
||||
/* enum kvm_tdx_cmd_id */
|
||||
__u32 id;
|
||||
/* flags for sub-commend. If sub-command doesn't use this, set zero. */
|
||||
__u32 flags;
|
||||
/*
|
||||
* data for each sub-command. An immediate or a pointer to the actual
|
||||
* data in process virtual address. If sub-command doesn't use it,
|
||||
* set zero.
|
||||
*/
|
||||
__u64 data;
|
||||
/*
|
||||
* Auxiliary error code. The sub-command may return TDX SEAMCALL
|
||||
* status code in addition to -Exxx.
|
||||
*/
|
||||
__u64 hw_error;
|
||||
};
|
||||
|
||||
struct kvm_tdx_capabilities {
|
||||
__u64 supported_attrs;
|
||||
__u64 supported_xfam;
|
||||
__u64 reserved[254];
|
||||
|
||||
/* Configurable CPUID bits for userspace */
|
||||
struct kvm_cpuid2 cpuid;
|
||||
};
|
||||
|
||||
struct kvm_tdx_init_vm {
|
||||
__u64 attributes;
|
||||
__u64 xfam;
|
||||
__u64 mrconfigid[6]; /* sha384 digest */
|
||||
__u64 mrowner[6]; /* sha384 digest */
|
||||
__u64 mrownerconfig[6]; /* sha384 digest */
|
||||
|
||||
/* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
|
||||
__u64 reserved[12];
|
||||
|
||||
/*
|
||||
* Call KVM_TDX_INIT_VM before vcpu creation, thus before
|
||||
* KVM_SET_CPUID2.
|
||||
* This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
|
||||
* TDX module directly virtualizes those CPUIDs without VMM. The user
|
||||
* space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
|
||||
* those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
|
||||
* the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
|
||||
* module doesn't virtualize.
|
||||
*/
|
||||
struct kvm_cpuid2 cpuid;
|
||||
};
|
||||
|
||||
#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
|
||||
|
||||
struct kvm_tdx_init_mem_region {
|
||||
__u64 source_addr;
|
||||
__u64 gpa;
|
||||
__u64 nr_pages;
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@
|
|||
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
|
||||
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
|
||||
#define SVM_EXIT_INVPCID 0x0a2
|
||||
#define SVM_EXIT_BUS_LOCK 0x0a5
|
||||
#define SVM_EXIT_IDLE_HLT 0x0a6
|
||||
#define SVM_EXIT_NPF 0x400
|
||||
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
|
||||
|
|
@ -225,6 +226,7 @@
|
|||
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
|
||||
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
|
||||
{ SVM_EXIT_INVPCID, "invpcid" }, \
|
||||
{ SVM_EXIT_BUS_LOCK, "buslock" }, \
|
||||
{ SVM_EXIT_IDLE_HLT, "idle-halt" }, \
|
||||
{ SVM_EXIT_NPF, "npf" }, \
|
||||
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#define EXIT_REASON_TRIPLE_FAULT 2
|
||||
#define EXIT_REASON_INIT_SIGNAL 3
|
||||
#define EXIT_REASON_SIPI_SIGNAL 4
|
||||
#define EXIT_REASON_OTHER_SMI 6
|
||||
|
||||
#define EXIT_REASON_INTERRUPT_WINDOW 7
|
||||
#define EXIT_REASON_NMI_WINDOW 8
|
||||
|
|
@ -92,6 +93,7 @@
|
|||
#define EXIT_REASON_TPAUSE 68
|
||||
#define EXIT_REASON_BUS_LOCK 74
|
||||
#define EXIT_REASON_NOTIFY 75
|
||||
#define EXIT_REASON_TDCALL 77
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
|
|
@ -155,7 +157,8 @@
|
|||
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
|
||||
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
|
||||
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
|
||||
{ EXIT_REASON_NOTIFY, "NOTIFY" }
|
||||
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
|
||||
{ EXIT_REASON_TDCALL, "TDCALL" }
|
||||
|
||||
#define VMX_EXIT_REASON_FLAGS \
|
||||
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy)
|
|||
EXPORT_SYMBOL(__memcpy)
|
||||
|
||||
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
|
||||
SYM_PIC_ALIAS(memcpy)
|
||||
EXPORT_SYMBOL(memcpy)
|
||||
|
||||
SYM_FUNC_START_LOCAL(memcpy_orig)
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ SYM_FUNC_END(__memset)
|
|||
EXPORT_SYMBOL(__memset)
|
||||
|
||||
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
|
||||
SYM_PIC_ALIAS(memset)
|
||||
EXPORT_SYMBOL(memset)
|
||||
|
||||
SYM_FUNC_START_LOCAL(memset_orig)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
|
||||
#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
|
||||
#define BITS_PER_BYTE 8
|
||||
#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
|
||||
|
||||
/*
|
||||
* Create a contiguous bitmask starting at bit position @l and ending at
|
||||
|
|
@ -19,16 +20,68 @@
|
|||
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
|
||||
*/
|
||||
#if !defined(__ASSEMBLY__)
|
||||
|
||||
/*
|
||||
* Missing asm support
|
||||
*
|
||||
* GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
|
||||
* something not available in asm. Nevertheless, fixed width integers is a C
|
||||
* concept. Assembly code can rely on the long and long long versions instead.
|
||||
*/
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/overflow.h>
|
||||
|
||||
#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
|
||||
#else
|
||||
|
||||
/*
|
||||
* Generate a mask for the specified type @t. Additional checks are made to
|
||||
* guarantee the value returned fits in that type, relying on
|
||||
* -Wshift-count-overflow compiler check to detect incompatible arguments.
|
||||
* For example, all these create build errors or warnings:
|
||||
*
|
||||
* - GENMASK(15, 20): wrong argument order
|
||||
* - GENMASK(72, 15): doesn't fit unsigned long
|
||||
* - GENMASK_U32(33, 15): doesn't fit in a u32
|
||||
*/
|
||||
#define GENMASK_TYPE(t, h, l) \
|
||||
((t)(GENMASK_INPUT_CHECK(h, l) + \
|
||||
(type_max(t) << (l) & \
|
||||
type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
|
||||
|
||||
#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
|
||||
#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
|
||||
#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
|
||||
#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
|
||||
|
||||
/*
|
||||
* Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
|
||||
* following examples generate compiler warnings due to -Wshift-count-overflow:
|
||||
*
|
||||
* - BIT_U8(8)
|
||||
* - BIT_U32(-1)
|
||||
* - BIT_U32(40)
|
||||
*/
|
||||
#define BIT_INPUT_CHECK(type, nr) \
|
||||
BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type)))
|
||||
|
||||
#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr)))
|
||||
|
||||
#define BIT_U8(nr) BIT_TYPE(u8, nr)
|
||||
#define BIT_U16(nr) BIT_TYPE(u16, nr)
|
||||
#define BIT_U32(nr) BIT_TYPE(u32, nr)
|
||||
#define BIT_U64(nr) BIT_TYPE(u64, nr)
|
||||
|
||||
#else /* defined(__ASSEMBLY__) */
|
||||
|
||||
/*
|
||||
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
|
||||
* disable the input check if that is the case.
|
||||
*/
|
||||
#define GENMASK_INPUT_CHECK(h, l) 0
|
||||
#endif
|
||||
|
||||
#endif /* !defined(__ASSEMBLY__) */
|
||||
|
||||
#define GENMASK(h, l) \
|
||||
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|
||||
|
|
|
|||
|
|
@ -4,17 +4,17 @@
|
|||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#ifdef __CHECKER__
|
||||
#define BUILD_BUG_ON_ZERO(e) (0)
|
||||
#else /* __CHECKER__ */
|
||||
/*
|
||||
* Force a compilation error if condition is true, but also produce a
|
||||
* result (of value 0 and type int), so the expression can be used
|
||||
* e.g. in a structure initializer (or where-ever else comma expressions
|
||||
* aren't permitted).
|
||||
*
|
||||
* Take an error message as an optional second argument. If omitted,
|
||||
* default to the stringification of the tested expression.
|
||||
*/
|
||||
#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
|
||||
#endif /* __CHECKER__ */
|
||||
#define BUILD_BUG_ON_ZERO(e, ...) \
|
||||
__BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true")
|
||||
|
||||
/* Force a compilation error if a constant expression is not a power of 2 */
|
||||
#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
|
||||
|
|
|
|||
|
|
@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
|||
__asm__ ("" : "=r" (var) : "0" (var))
|
||||
#endif
|
||||
|
||||
#ifndef __BUILD_BUG_ON_ZERO_MSG
|
||||
#if defined(__clang__)
|
||||
#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); })))
|
||||
#else
|
||||
#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _TOOLS_LINUX_COMPILER_H */
|
||||
|
|
|
|||
|
|
@ -905,13 +905,17 @@ struct drm_syncobj_destroy {
|
|||
};
|
||||
|
||||
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
|
||||
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1)
|
||||
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
|
||||
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1)
|
||||
struct drm_syncobj_handle {
|
||||
__u32 handle;
|
||||
__u32 flags;
|
||||
|
||||
__s32 fd;
|
||||
__u32 pad;
|
||||
|
||||
__u64 point;
|
||||
};
|
||||
|
||||
struct drm_syncobj_transfer {
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ struct fscrypt_key_specifier {
|
|||
*/
|
||||
struct fscrypt_provisioning_key_payload {
|
||||
__u32 type;
|
||||
__u32 __reserved;
|
||||
__u32 flags;
|
||||
__u8 raw[];
|
||||
};
|
||||
|
||||
|
|
@ -128,7 +128,9 @@ struct fscrypt_add_key_arg {
|
|||
struct fscrypt_key_specifier key_spec;
|
||||
__u32 raw_size;
|
||||
__u32 key_id;
|
||||
__u32 __reserved[8];
|
||||
#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
|
||||
__u32 flags;
|
||||
__u32 __reserved[7];
|
||||
__u8 raw[];
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -375,6 +375,7 @@ struct kvm_run {
|
|||
#define KVM_SYSTEM_EVENT_WAKEUP 4
|
||||
#define KVM_SYSTEM_EVENT_SUSPEND 5
|
||||
#define KVM_SYSTEM_EVENT_SEV_TERM 6
|
||||
#define KVM_SYSTEM_EVENT_TDX_FATAL 7
|
||||
__u32 type;
|
||||
__u32 ndata;
|
||||
union {
|
||||
|
|
@ -930,6 +931,9 @@ struct kvm_enable_cap {
|
|||
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
|
||||
#define KVM_CAP_X86_GUEST_MODE 238
|
||||
#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
|
||||
#define KVM_CAP_ARM_EL2 240
|
||||
#define KVM_CAP_ARM_EL2_E2H0 241
|
||||
#define KVM_CAP_RISCV_MP_STATE_RESET 242
|
||||
|
||||
struct kvm_irq_routing_irqchip {
|
||||
__u32 irqchip;
|
||||
|
|
|
|||
|
|
@ -182,8 +182,12 @@ struct statx {
|
|||
/* File offset alignment for direct I/O reads */
|
||||
__u32 stx_dio_read_offset_align;
|
||||
|
||||
/* 0xb8 */
|
||||
__u64 __spare3[9]; /* Spare space for future expansion */
|
||||
/* Optimised max atomic write unit in bytes */
|
||||
__u32 stx_atomic_write_unit_max_opt;
|
||||
__u32 __spare2[1];
|
||||
|
||||
/* 0xc0 */
|
||||
__u64 __spare3[8]; /* Spare space for future expansion */
|
||||
|
||||
/* 0x100 */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool.
|
|||
# perf mem report
|
||||
|
||||
A normal perf mem report output will provide detailed memory access profile.
|
||||
However, it can also be aggregated based on output fields. For example:
|
||||
New output fields will show related access info together. For example:
|
||||
|
||||
# perf mem report -F mem,sample,snoop
|
||||
Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876
|
||||
Memory access Samples Snoop
|
||||
N/A 1903343 N/A
|
||||
L1 hit 1056754 N/A
|
||||
L2 hit 75231 N/A
|
||||
L3 hit 9496 HitM
|
||||
L3 hit 2270 N/A
|
||||
RAM hit 8710 N/A
|
||||
Remote node, same socket RAM hit 3241 N/A
|
||||
Remote core, same node Any cache hit 1572 HitM
|
||||
Remote core, same node Any cache hit 514 N/A
|
||||
Remote node, same socket Any cache hit 1216 HitM
|
||||
Remote node, same socket Any cache hit 350 N/A
|
||||
Uncached hit 18 N/A
|
||||
# perf mem report -F overhead,cache,snoop,comm
|
||||
...
|
||||
# Samples: 92K of event 'ibs_op//'
|
||||
# Total weight : 531104
|
||||
#
|
||||
# ---------- Cache ----------- --- Snoop ----
|
||||
# Overhead L1 L2 L1-buf Other HitM Other Command
|
||||
# ........ ............................ .............. ..........
|
||||
#
|
||||
76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1
|
||||
5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make
|
||||
5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc
|
||||
5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as
|
||||
5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh
|
||||
1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld
|
||||
0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config
|
||||
0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git
|
||||
0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm
|
||||
...
|
||||
|
||||
Also, it can be aggregated based on various memory access info using the
|
||||
sort keys. For example:
|
||||
|
||||
# perf mem report -s mem,snoop
|
||||
...
|
||||
# Samples: 92K of event 'ibs_op//'
|
||||
# Total weight : 531104
|
||||
# Sort order : mem,snoop
|
||||
#
|
||||
# Overhead Samples Memory access Snoop
|
||||
# ........ ............ ....................................... ............
|
||||
#
|
||||
47.99% 1509 L2 hit N/A
|
||||
25.08% 338 core, same node Any cache hit HitM
|
||||
10.24% 54374 N/A N/A
|
||||
6.77% 35938 L1 hit N/A
|
||||
6.39% 101 core, same node Any cache hit N/A
|
||||
3.50% 69 RAM hit N/A
|
||||
0.03% 158 LFB/MAB hit N/A
|
||||
0.00% 2 Uncached hit N/A
|
||||
|
||||
Please refer to their man page for more detail.
|
||||
|
||||
|
|
|
|||
|
|
@ -119,6 +119,22 @@ REPORT OPTIONS
|
|||
And the default sort keys are changed to local_weight, mem, sym, dso,
|
||||
symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat.
|
||||
|
||||
-F::
|
||||
--fields=::
|
||||
Specify output field - multiple keys can be specified in CSV format.
|
||||
Please see linkperf:perf-report[1] for details.
|
||||
|
||||
In addition to the default fields, 'perf mem report' will provide the
|
||||
following fields to break down sample periods.
|
||||
|
||||
- op: operation in the sample instruction (load, store, prefetch, ...)
|
||||
- cache: location in CPU cache (L1, L2, ...) where the sample hit
|
||||
- mem: location in memory or other places the sample hit
|
||||
- dtlb: location in Data TLB (L1, L2) where the sample hit
|
||||
- snoop: snoop result for the sampled data access
|
||||
|
||||
Please take a look at the OUTPUT FIELD SELECTION section for caveats.
|
||||
|
||||
-T::
|
||||
--type-profile::
|
||||
Show data-type profile result instead of code symbols. This requires
|
||||
|
|
@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20:
|
|||
90% [k] memcpy
|
||||
10% [.] strcmp
|
||||
|
||||
OUTPUT FIELD SELECTION
|
||||
----------------------
|
||||
"perf mem report" adds a number of new output fields specific to data source
|
||||
information in the sample. Some of them have the same name with the existing
|
||||
sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll
|
||||
behave differently when it's used by -F/--fields or -s/--sort.
|
||||
|
||||
Using those two as output fields will aggregate samples altogether and show
|
||||
breakdown.
|
||||
|
||||
$ perf mem report -F mem,snoop
|
||||
...
|
||||
# ------ Memory ------- --- Snoop ----
|
||||
# RAM Uncach Other HitM Other
|
||||
# ..................... ..............
|
||||
#
|
||||
3.5% 0.0% 96.5% 25.1% 74.9%
|
||||
|
||||
But using the same name for sort keys will aggregate samples for each type
|
||||
separately.
|
||||
|
||||
$ perf mem report -s mem,snoop
|
||||
# Overhead Samples Memory access Snoop
|
||||
# ........ ............ ....................................... ............
|
||||
#
|
||||
47.99% 1509 L2 hit N/A
|
||||
25.08% 338 core, same node Any cache hit HitM
|
||||
10.24% 54374 N/A N/A
|
||||
6.77% 35938 L1 hit N/A
|
||||
6.39% 101 core, same node Any cache hit N/A
|
||||
3.50% 69 RAM hit N/A
|
||||
0.03% 158 LFB/MAB hit N/A
|
||||
0.00% 2 Uncached hit N/A
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1]
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/zalloc.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
|
|
|
|||
|
|
@ -2,11 +2,18 @@
|
|||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "futex.h"
|
||||
|
||||
#ifndef PR_FUTEX_HASH
|
||||
#define PR_FUTEX_HASH 78
|
||||
# define PR_FUTEX_HASH_SET_SLOTS 1
|
||||
# define FH_FLAG_IMMUTABLE (1ULL << 0)
|
||||
# define PR_FUTEX_HASH_GET_SLOTS 2
|
||||
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
|
||||
#endif // PR_FUTEX_HASH
|
||||
|
||||
void futex_set_nbuckets_param(struct bench_futex_parameters *params)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ done
|
|||
# diff with extra ignore lines
|
||||
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
|
||||
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
|
||||
check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
|
||||
check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"'
|
||||
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
|
||||
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
|
||||
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ perf_tool=perf
|
|||
err=0
|
||||
|
||||
test_event_uniquifying() {
|
||||
# We use `clockticks` to verify the uniquify behavior.
|
||||
# We use `clockticks` in `uncore_imc` to verify the uniquify behavior.
|
||||
pmu="uncore_imc"
|
||||
event="clockticks"
|
||||
|
||||
# If the `-A` option is added, the event should be uniquified.
|
||||
|
|
@ -43,11 +44,18 @@ test_event_uniquifying() {
|
|||
echo "stat event uniquifying test"
|
||||
uniquified_event_array=()
|
||||
|
||||
# Skip if the machine does not have `uncore_imc` device.
|
||||
if ! ${perf_tool} list pmu | grep -q ${pmu}; then
|
||||
echo "Target does not support PMU ${pmu} [Skipped]"
|
||||
err=2
|
||||
return
|
||||
fi
|
||||
|
||||
# Check how many uniquified events.
|
||||
while IFS= read -r line; do
|
||||
uniquified_event=$(echo "$line" | awk '{print $1}')
|
||||
uniquified_event_array+=("${uniquified_event}")
|
||||
done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]")
|
||||
done < <(${perf_tool} list -v ${event} | grep ${pmu})
|
||||
|
||||
perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true"
|
||||
$perf_command
|
||||
|
|
|
|||
|
|
@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd,
|
|||
continue; /* Skip scripts that have a separate driver. */
|
||||
fd = openat(dir_fd, ent->d_name, O_PATH);
|
||||
append_scripts_in_dir(fd, result, result_sz);
|
||||
close(fd);
|
||||
}
|
||||
for (i = 0; i < n_dirs; i++) /* Clean up */
|
||||
zfree(&entlist[i]);
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
|
|||
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
|
||||
}
|
||||
|
||||
static inline size_t msg_data_left(struct msghdr *msg)
|
||||
static inline size_t msg_data_left(const struct msghdr *msg)
|
||||
{
|
||||
return iov_iter_count(&msg->msg_iter);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t;
|
|||
#define PAGE_IS_PFNZERO (1 << 5)
|
||||
#define PAGE_IS_HUGE (1 << 6)
|
||||
#define PAGE_IS_SOFT_DIRTY (1 << 7)
|
||||
#define PAGE_IS_GUARD (1 << 8)
|
||||
|
||||
/*
|
||||
* struct page_region - Page region with flags
|
||||
|
|
|
|||
|
|
@ -364,4 +364,11 @@ struct prctl_mm_map {
|
|||
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
|
||||
|
||||
/* FUTEX hash management */
|
||||
#define PR_FUTEX_HASH 78
|
||||
# define PR_FUTEX_HASH_SET_SLOTS 1
|
||||
# define FH_FLAG_IMMUTABLE (1ULL << 0)
|
||||
# define PR_FUTEX_HASH_GET_SLOTS 2
|
||||
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
|
|
|||
|
|
@ -182,8 +182,12 @@ struct statx {
|
|||
/* File offset alignment for direct I/O reads */
|
||||
__u32 stx_dio_read_offset_align;
|
||||
|
||||
/* 0xb8 */
|
||||
__u64 __spare3[9]; /* Spare space for future expansion */
|
||||
/* Optimised max atomic write unit in bytes */
|
||||
__u32 stx_atomic_write_unit_max_opt;
|
||||
__u32 __spare2[1];
|
||||
|
||||
/* 0xc0 */
|
||||
__u64 __spare3[8]; /* Spare space for future expansion */
|
||||
|
||||
/* 0x100 */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -132,4 +132,8 @@
|
|||
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
|
||||
#endif
|
||||
|
||||
#ifndef SYM_PIC_ALIAS
|
||||
#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL)
|
||||
#endif
|
||||
|
||||
#endif /* PERF_LINUX_LINKAGE_H_ */
|
||||
|
|
|
|||
|
|
@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config)
|
|||
ret = evsel__open(evsel, NULL, tmap) >= 0;
|
||||
}
|
||||
|
||||
evsel__close(evsel);
|
||||
evsel__delete(evsel);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user