perf tools fixes for v6.16, 1st batch:

- Fix some file descriptor leaks that stand out with recent changes to
   'perf list'.
 
 - Fix prctl include to fix building 'perf bench futex' hash with musl libc.
 
 - Restrict 'perf test' uniquifying entry to machines with 'uncore_imc' PMUs.
 
 - Document new output fields (op, cache, mem, dtlb, snoop) used with
   'perf mem'.
 
 - Synchronize kernel header copies.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCaFWAEwAKCRCyPKLppCJ+
 J0/LAP46WdlK1T49um790QowJR+HqRQEngur6ZIb8ya0FpCc4AEAnv8KdGdeoF21
 H403o3fbCaMGN5ZQ4loe3IEO9gh6EAY=
 =9GzE
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-for-v6.16-1-2025-06-20' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix some file descriptor leaks that stand out with recent changes to
   'perf list'

 - Fix prctl include to fix building 'perf bench futex' hash with musl
   libc

 - Restrict 'perf test' uniquifying entry to machines with 'uncore_imc'
   PMUs

 - Document new output fields (op, cache, mem, dtlb, snoop) used with
   'perf mem'

 - Synchronize kernel header copies

* tag 'perf-tools-fixes-for-v6.16-1-2025-06-20' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools:
  tools headers x86 cpufeatures: Sync with the kernel sources
  perf bench futex: Fix prctl include in musl libc
  perf test: Directory file descriptor leak
  perf evsel: Missed close() when probing hybrid core PMUs
  tools headers: Synchronize linux/bits.h with the kernel sources
  tools arch amd ibs: Sync ibs.h with the kernel sources
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  tools headers: Syncronize linux/build_bug.h with the kernel sources
  tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools headers UAPI: Sync the drm/drm.h with the kernel sources
  perf beauty: Update copy of linux/socket.h with the kernel sources
  tools headers UAPI: Sync kvm header with the kernel sources
  tools headers x86 svm: Sync svm headers with the kernel sources
  tools headers UAPI: Sync KVM's vmx.h header with the kernel sources
  tools kvm headers arm64: Update KVM header from the kernel sources
  tools headers UAPI: Sync linux/prctl.h with the kernel sources to pick FUTEX knob
  perf mem: Document new output fields (op, cache, mem, dtlb, snoop)
  tools headers: Update the fs headers with the kernel sources
  perf test: Restrict uniquifying test to machines with 'uncore_imc'
This commit is contained in:
Linus Torvalds 2025-06-21 07:59:45 -07:00
commit 7c7f9dd1ea
30 changed files with 328 additions and 52 deletions

View File

@ -4,9 +4,9 @@
#ifndef _UAPI_LINUX_BITS_H
#define _UAPI_LINUX_BITS_H
#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))
#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_U128(h, l) \
((_BIT128((h)) << 1) - (_BIT128(l)))

View File

@ -431,10 +431,11 @@ enum {
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1

View File

@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_AMD_IBS_H
#define _ASM_X86_AMD_IBS_H
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
@ -151,3 +154,5 @@ struct perf_ibs_data {
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
#endif /* _ASM_X86_AMD_IBS_H */

View File

@ -336,7 +336,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@ -379,6 +379,7 @@
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */
#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@ -447,6 +448,7 @@
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
@ -458,6 +460,7 @@
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
@ -482,7 +485,8 @@
#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
/*
* BUG word(s)
@ -535,6 +539,8 @@
#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -533,7 +533,7 @@
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
@ -550,16 +550,16 @@
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
#define HWP_MIN_PERF(x) (x & 0xff)
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_MIN_PERF(x) (x & 0xff)
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32)
#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
@ -602,7 +602,11 @@
/* V6 PMON MSR range */
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
#define MSR_IA32_PMC_V6_STEP 4
/* KeyID partitioning between MKTME and TDX */

View File

@ -441,6 +441,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SNP_VM 4
#define KVM_X86_TDX_VM 5
/* Trust Domain eXtension sub-ioctl() commands. */
enum kvm_tdx_cmd_id {
KVM_TDX_CAPABILITIES = 0,
KVM_TDX_INIT_VM,
KVM_TDX_INIT_VCPU,
KVM_TDX_INIT_MEM_REGION,
KVM_TDX_FINALIZE_VM,
KVM_TDX_GET_CPUID,
KVM_TDX_CMD_NR_MAX,
};
struct kvm_tdx_cmd {
/* enum kvm_tdx_cmd_id */
__u32 id;
/* flags for sub-commend. If sub-command doesn't use this, set zero. */
__u32 flags;
/*
* data for each sub-command. An immediate or a pointer to the actual
* data in process virtual address. If sub-command doesn't use it,
* set zero.
*/
__u64 data;
/*
* Auxiliary error code. The sub-command may return TDX SEAMCALL
* status code in addition to -Exxx.
*/
__u64 hw_error;
};
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
__u64 reserved[254];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
};
struct kvm_tdx_init_vm {
__u64 attributes;
__u64 xfam;
__u64 mrconfigid[6]; /* sha384 digest */
__u64 mrowner[6]; /* sha384 digest */
__u64 mrownerconfig[6]; /* sha384 digest */
/* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
__u64 reserved[12];
/*
* Call KVM_TDX_INIT_VM before vcpu creation, thus before
* KVM_SET_CPUID2.
* This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
* TDX module directly virtualizes those CPUIDs without VMM. The user
* space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
* those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
* the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
* module doesn't virtualize.
*/
struct kvm_cpuid2 cpuid;
};
#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
struct kvm_tdx_init_mem_region {
__u64 source_addr;
__u64 gpa;
__u64 nr_pages;
};
#endif /* _ASM_X86_KVM_H */

View File

@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
#define SVM_EXIT_BUS_LOCK 0x0a5
#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
@ -225,6 +226,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_BUS_LOCK, "buslock" }, \
{ SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \

View File

@ -34,6 +34,7 @@
#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_INIT_SIGNAL 3
#define EXIT_REASON_SIPI_SIGNAL 4
#define EXIT_REASON_OTHER_SMI 6
#define EXIT_REASON_INTERRUPT_WINDOW 7
#define EXIT_REASON_NMI_WINDOW 8
@ -92,6 +93,7 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
#define EXIT_REASON_TDCALL 77
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@ -155,7 +157,8 @@
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
{ EXIT_REASON_NOTIFY, "NOTIFY" }
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
{ EXIT_REASON_TDCALL, "TDCALL" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }

View File

@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
SYM_PIC_ALIAS(memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)

View File

@ -42,6 +42,7 @@ SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
SYM_PIC_ALIAS(memset)
EXPORT_SYMBOL(memset)
SYM_FUNC_START_LOCAL(memset_orig)

View File

@ -12,6 +12,7 @@
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
#define BITS_PER_BYTE 8
#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
/*
* Create a contiguous bitmask starting at bit position @l and ending at
@ -19,16 +20,68 @@
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
#if !defined(__ASSEMBLY__)
/*
* Missing asm support
*
* GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
* something not available in asm. Nevertheless, fixed width integers is a C
* concept. Assembly code can rely on the long and long long versions instead.
*/
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/overflow.h>
#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
#else
/*
* Generate a mask for the specified type @t. Additional checks are made to
* guarantee the value returned fits in that type, relying on
* -Wshift-count-overflow compiler check to detect incompatible arguments.
* For example, all these create build errors or warnings:
*
* - GENMASK(15, 20): wrong argument order
* - GENMASK(72, 15): doesn't fit unsigned long
* - GENMASK_U32(33, 15): doesn't fit in a u32
*/
#define GENMASK_TYPE(t, h, l) \
((t)(GENMASK_INPUT_CHECK(h, l) + \
(type_max(t) << (l) & \
type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
/*
* Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
* following examples generate compiler warnings due to -Wshift-count-overflow:
*
* - BIT_U8(8)
* - BIT_U32(-1)
* - BIT_U32(40)
*/
#define BIT_INPUT_CHECK(type, nr) \
BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type)))
#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr)))
#define BIT_U8(nr) BIT_TYPE(u8, nr)
#define BIT_U16(nr) BIT_TYPE(u16, nr)
#define BIT_U32(nr) BIT_TYPE(u32, nr)
#define BIT_U64(nr) BIT_TYPE(u64, nr)
#else /* defined(__ASSEMBLY__) */
/*
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
#define GENMASK_INPUT_CHECK(h, l) 0
#endif
#endif /* !defined(__ASSEMBLY__) */
#define GENMASK(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))

View File

@ -4,17 +4,17 @@
#include <linux/compiler.h>
#ifdef __CHECKER__
#define BUILD_BUG_ON_ZERO(e) (0)
#else /* __CHECKER__ */
/*
* Force a compilation error if condition is true, but also produce a
* result (of value 0 and type int), so the expression can be used
* e.g. in a structure initializer (or where-ever else comma expressions
* aren't permitted).
*
* Take an error message as an optional second argument. If omitted,
* default to the stringification of the tested expression.
*/
#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
#endif /* __CHECKER__ */
#define BUILD_BUG_ON_ZERO(e, ...) \
__BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true")
/* Force a compilation error if a constant expression is not a power of 2 */
#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \

View File

@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__asm__ ("" : "=r" (var) : "0" (var))
#endif
#ifndef __BUILD_BUG_ON_ZERO_MSG
#if defined(__clang__)
#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); })))
#else
#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
#endif
#endif
#endif /* __ASSEMBLY__ */
#endif /* _TOOLS_LINUX_COMPILER_H */

View File

@ -905,13 +905,17 @@ struct drm_syncobj_destroy {
};
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1)
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1)
struct drm_syncobj_handle {
__u32 handle;
__u32 flags;
__s32 fd;
__u32 pad;
__u64 point;
};
struct drm_syncobj_transfer {

View File

@ -119,7 +119,7 @@ struct fscrypt_key_specifier {
*/
struct fscrypt_provisioning_key_payload {
__u32 type;
__u32 __reserved;
__u32 flags;
__u8 raw[];
};
@ -128,7 +128,9 @@ struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
__u32 __reserved[8];
#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
__u32 flags;
__u32 __reserved[7];
__u8 raw[];
};

View File

@ -375,6 +375,7 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
#define KVM_SYSTEM_EVENT_SEV_TERM 6
#define KVM_SYSTEM_EVENT_TDX_FATAL 7
__u32 type;
__u32 ndata;
union {
@ -930,6 +931,9 @@ struct kvm_enable_cap {
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
struct kvm_irq_routing_irqchip {
__u32 irqchip;

View File

@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
/* Optimised max atomic write unit in bytes */
__u32 stx_atomic_write_unit_max_opt;
__u32 __spare2[1];
/* 0xc0 */
__u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};

View File

@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool.
# perf mem report
A normal perf mem report output will provide detailed memory access profile.
However, it can also be aggregated based on output fields. For example:
New output fields will show related access info together. For example:
# perf mem report -F mem,sample,snoop
Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876
Memory access Samples Snoop
N/A 1903343 N/A
L1 hit 1056754 N/A
L2 hit 75231 N/A
L3 hit 9496 HitM
L3 hit 2270 N/A
RAM hit 8710 N/A
Remote node, same socket RAM hit 3241 N/A
Remote core, same node Any cache hit 1572 HitM
Remote core, same node Any cache hit 514 N/A
Remote node, same socket Any cache hit 1216 HitM
Remote node, same socket Any cache hit 350 N/A
Uncached hit 18 N/A
# perf mem report -F overhead,cache,snoop,comm
...
# Samples: 92K of event 'ibs_op//'
# Total weight : 531104
#
# ---------- Cache ----------- --- Snoop ----
# Overhead L1 L2 L1-buf Other HitM Other Command
# ........ ............................ .............. ..........
#
76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1
5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make
5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc
5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as
5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh
1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld
0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config
0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git
0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm
...
Also, it can be aggregated based on various memory access info using the
sort keys. For example:
# perf mem report -s mem,snoop
...
# Samples: 92K of event 'ibs_op//'
# Total weight : 531104
# Sort order : mem,snoop
#
# Overhead Samples Memory access Snoop
# ........ ............ ....................................... ............
#
47.99% 1509 L2 hit N/A
25.08% 338 core, same node Any cache hit HitM
10.24% 54374 N/A N/A
6.77% 35938 L1 hit N/A
6.39% 101 core, same node Any cache hit N/A
3.50% 69 RAM hit N/A
0.03% 158 LFB/MAB hit N/A
0.00% 2 Uncached hit N/A
Please refer to their man page for more detail.

View File

@ -119,6 +119,22 @@ REPORT OPTIONS
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat.
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Please see linkperf:perf-report[1] for details.
In addition to the default fields, 'perf mem report' will provide the
following fields to break down sample periods.
- op: operation in the sample instruction (load, store, prefetch, ...)
- cache: location in CPU cache (L1, L2, ...) where the sample hit
- mem: location in memory or other places the sample hit
- dtlb: location in Data TLB (L1, L2) where the sample hit
- snoop: snoop result for the sampled data access
Please take a look at the OUTPUT FIELD SELECTION section for caveats.
-T::
--type-profile::
Show data-type profile result instead of code symbols. This requires
@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20:
90% [k] memcpy
10% [.] strcmp
OUTPUT FIELD SELECTION
----------------------
"perf mem report" adds a number of new output fields specific to data source
information in the sample. Some of them have the same name with the existing
sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll
behave differently when it's used by -F/--fields or -s/--sort.
Using those two as output fields will aggregate samples altogether and show
breakdown.
$ perf mem report -F mem,snoop
...
# ------ Memory ------- --- Snoop ----
# RAM Uncach Other HitM Other
# ..................... ..............
#
3.5% 0.0% 96.5% 25.1% 74.9%
But using the same name for sort keys will aggregate samples for each type
separately.
$ perf mem report -s mem,snoop
# Overhead Samples Memory access Snoop
# ........ ............ ....................................... ............
#
47.99% 1509 L2 hit N/A
25.08% 338 core, same node Any cache hit HitM
10.24% 54374 N/A N/A
6.77% 35938 L1 hit N/A
6.39% 101 core, same node Any cache hit N/A
3.50% 69 RAM hit N/A
0.03% 158 LFB/MAB hit N/A
0.00% 2 Uncached hit N/A
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1]

View File

@ -18,7 +18,6 @@
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/prctl.h>
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>

View File

@ -2,11 +2,18 @@
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/prctl.h>
#include <sys/prctl.h>
#include "futex.h"
#ifndef PR_FUTEX_HASH
#define PR_FUTEX_HASH 78
# define PR_FUTEX_HASH_SET_SLOTS 1
# define FH_FLAG_IMMUTABLE (1ULL << 0)
# define PR_FUTEX_HASH_GET_SLOTS 2
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
#endif // PR_FUTEX_HASH
void futex_set_nbuckets_param(struct bench_futex_parameters *params)
{
unsigned long flags;

View File

@ -186,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'

View File

@ -9,7 +9,8 @@ perf_tool=perf
err=0
test_event_uniquifying() {
# We use `clockticks` to verify the uniquify behavior.
# We use `clockticks` in `uncore_imc` to verify the uniquify behavior.
pmu="uncore_imc"
event="clockticks"
# If the `-A` option is added, the event should be uniquified.
@ -43,11 +44,18 @@ test_event_uniquifying() {
echo "stat event uniquifying test"
uniquified_event_array=()
# Skip if the machine does not have `uncore_imc` device.
if ! ${perf_tool} list pmu | grep -q ${pmu}; then
echo "Target does not support PMU ${pmu} [Skipped]"
err=2
return
fi
# Check how many uniquified events.
while IFS= read -r line; do
uniquified_event=$(echo "$line" | awk '{print $1}')
uniquified_event_array+=("${uniquified_event}")
done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]")
done < <(${perf_tool} list -v ${event} | grep ${pmu})
perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true"
$perf_command

View File

@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd,
continue; /* Skip scripts that have a separate driver. */
fd = openat(dir_fd, ent->d_name, O_PATH);
append_scripts_in_dir(fd, result, result_sz);
close(fd);
}
for (i = 0; i < n_dirs; i++) /* Clean up */
zfree(&entlist[i]);

View File

@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
}
static inline size_t msg_data_left(struct msghdr *msg)
static inline size_t msg_data_left(const struct msghdr *msg)
{
return iov_iter_count(&msg->msg_iter);
}

View File

@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags

View File

@ -364,4 +364,11 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
/* FUTEX hash management */
#define PR_FUTEX_HASH 78
# define PR_FUTEX_HASH_SET_SLOTS 1
# define FH_FLAG_IMMUTABLE (1ULL << 0)
# define PR_FUTEX_HASH_GET_SLOTS 2
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
#endif /* _LINUX_PRCTL_H */

View File

@ -182,8 +182,12 @@ struct statx {
/* File offset alignment for direct I/O reads */
__u32 stx_dio_read_offset_align;
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
/* Optimised max atomic write unit in bytes */
__u32 stx_atomic_write_unit_max_opt;
__u32 __spare2[1];
/* 0xc0 */
__u64 __spare3[8]; /* Spare space for future expansion */
/* 0x100 */
};

View File

@ -132,4 +132,8 @@
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
#ifndef SYM_PIC_ALIAS
#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL)
#endif
#endif /* PERF_LINUX_LINKAGE_H_ */

View File

@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
evsel__close(evsel);
evsel__delete(evsel);
}