mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
Merge branch kvm-arm64/hyp-tracing into kvmarm-master/next
* kvm-arm64/hyp-tracing: (40 commits) : . : EL2 tracing support, adding both 'remote' ring-buffer : infrastructure and the tracing itself, courtesy of : Vincent Donnefort. From the cover letter: : : "The growing set of features supported by the hypervisor in protected : mode necessitates debugging and profiling tools. Tracefs is the : ideal candidate for this task: : : * It is simple to use and to script. : : * It is supported by various tools, from the trace-cmd CLI to the : Android web-based perfetto. : : * The ring-buffer, where are stored trace events consists of linked : pages, making it an ideal structure for sharing between kernel and : hypervisor. : : This series first introduces a new generic way of creating remote events and : remote buffers. Then it adds support to the pKVM hypervisor." : . tracing: selftests: Extend hotplug testing for trace remotes tracing: Non-consuming read for trace remotes with an offline CPU tracing: Adjust cmd_check_undefined to show unexpected undefined symbols tracing: Restore accidentally removed SPDX tag KVM: arm64: avoid unused-variable warning tracing: Generate undef symbols allowlist for simple_ring_buffer KVM: arm64: tracing: add ftrace dependency tracing: add more symbols to whitelist tracing: Update undefined symbols allow list for simple_ring_buffer KVM: arm64: Fix out-of-tree build for nVHE/pKVM tracing tracing: selftests: Add hypervisor trace remote tests KVM: arm64: Add selftest event support to nVHE/pKVM hyp KVM: arm64: Add hyp_enter/hyp_exit events to nVHE/pKVM hyp KVM: arm64: Add event support to the nVHE/pKVM hyp and trace remote KVM: arm64: Add trace reset to the nVHE/pKVM hyp KVM: arm64: Sync boot clock with the nVHE/pKVM hyp KVM: arm64: Add trace remote for the nVHE/pKVM hyp KVM: arm64: Add tracing capability for the nVHE/pKVM hyp KVM: arm64: Support unaligned fixmap in the pKVM hyp KVM: arm64: Initialise hyp_nr_cpus for nVHE hyp ... Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
commit
2de32a25a3
|
|
@ -91,6 +91,17 @@ interactions.
|
|||
user_events
|
||||
uprobetracer
|
||||
|
||||
Remote Tracing
|
||||
--------------
|
||||
|
||||
This section covers the framework to read compatible ring-buffers, written by
|
||||
entities outside of the kernel (most likely firmware or hypervisor)
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
remotes
|
||||
|
||||
Additional Resources
|
||||
--------------------
|
||||
|
||||
|
|
|
|||
66
Documentation/trace/remotes.rst
Normal file
66
Documentation/trace/remotes.rst
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============
|
||||
Tracing Remotes
|
||||
===============
|
||||
|
||||
:Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
|
||||
Overview
|
||||
========
|
||||
Firmware and hypervisors are black boxes to the kernel. Having a way to see what
|
||||
they are doing can be useful to debug both. This is where remote tracing buffers
|
||||
come in. A remote tracing buffer is a ring buffer executed by the firmware or
|
||||
hypervisor into memory that is memory mapped to the host kernel. This is similar
|
||||
to how user space memory maps the kernel ring buffer but in this case the kernel
|
||||
is acting like user space and the firmware or hypervisor is the "kernel" side.
|
||||
With a trace remote ring buffer, the firmware and hypervisor can record events
|
||||
for which the host kernel can see and expose to user space.
|
||||
|
||||
Register a remote
|
||||
=================
|
||||
A remote must provide a set of callbacks `struct trace_remote_callbacks` whom
|
||||
description can be found below. Those callbacks allows Tracefs to enable and
|
||||
disable tracing and events, to load and unload a tracing buffer (a set of
|
||||
ring-buffers) and to swap a reader page with the head page, which enables
|
||||
consuming reading.
|
||||
|
||||
.. kernel-doc:: include/linux/trace_remote.h
|
||||
|
||||
Once registered, an instance will appear for this remote in the Tracefs
|
||||
directory **remotes/**. Buffers can then be read using the usual Tracefs files
|
||||
**trace_pipe** and **trace**.
|
||||
|
||||
Declare a remote event
|
||||
======================
|
||||
Macros are provided to ease the declaration of remote events, in a similar
|
||||
fashion to in-kernel events. A declaration must provide an ID, a description of
|
||||
the event arguments and how to print the event:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
REMOTE_EVENT(foo, EVENT_FOO_ID,
|
||||
RE_STRUCT(
|
||||
re_field(u64, bar)
|
||||
),
|
||||
RE_PRINTK("bar=%lld", __entry->bar)
|
||||
);
|
||||
|
||||
Then those events must be declared in a C file with the following:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE_FILE foo_events.h
|
||||
#include <trace/define_remote_events.h>
|
||||
|
||||
This will provide a `struct remote_event remote_event_foo` that can be given to
|
||||
`trace_remote_register`.
|
||||
|
||||
Registered events appear in the remote directory under **events/**.
|
||||
|
||||
Simple ring-buffer
|
||||
==================
|
||||
A simple implementation for a ring-buffer writer can be found in
|
||||
kernel/trace/simple_ring_buffer.c.
|
||||
|
||||
.. kernel-doc:: include/linux/simple_ring_buffer.h
|
||||
|
|
@ -89,6 +89,14 @@ enum __kvm_host_smccc_func {
|
|||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_load,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_unload,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_enable,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_swap_reader,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_update_clock,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_reset,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_enable_event,
|
||||
__KVM_HOST_SMCCC_FUNC___tracing_write_event,
|
||||
};
|
||||
|
||||
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
|
||||
|
|
|
|||
16
arch/arm64/include/asm/kvm_define_hypevents.h
Normal file
16
arch/arm64/include/asm/kvm_define_hypevents.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE_FILE arch/arm64/include/asm/kvm_hypevents.h
|
||||
|
||||
#define REMOTE_EVENT_SECTION "_hyp_events"
|
||||
|
||||
#define HE_STRUCT(__args) __args
|
||||
#define HE_PRINTK(__args...) __args
|
||||
#define he_field re_field
|
||||
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
REMOTE_EVENT(__name, 0, RE_STRUCT(__struct), RE_PRINTK(__printk))
|
||||
|
||||
#define HYP_EVENT_MULTI_READ
|
||||
#include <trace/define_remote_events.h>
|
||||
#undef HYP_EVENT_MULTI_READ
|
||||
|
|
@ -923,6 +923,9 @@ struct kvm_vcpu_arch {
|
|||
|
||||
/* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
|
||||
struct vncr_tlb *vncr_tlb;
|
||||
|
||||
/* Hyp-readable copy of kvm_vcpu::pid */
|
||||
pid_t pid;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -129,8 +129,7 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
|
|||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp,
|
||||
void (*fn)(void));
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits);
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits);
|
||||
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
|
||||
#endif
|
||||
|
||||
|
|
@ -147,5 +146,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
|
|||
extern unsigned long kvm_nvhe_sym(__icache_flags);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
|
||||
extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
|
||||
extern unsigned long kvm_nvhe_sym(hyp_nr_cpus);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
|
|
|||
60
arch/arm64/include/asm/kvm_hypevents.h
Normal file
60
arch/arm64/include/asm/kvm_hypevents.h
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#if !defined(__ARM64_KVM_HYPEVENTS_H_) || defined(HYP_EVENT_MULTI_READ)
|
||||
#define __ARM64_KVM_HYPEVENTS_H_
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
#include <nvhe/trace.h>
|
||||
#endif
|
||||
|
||||
#ifndef __HYP_ENTER_EXIT_REASON
|
||||
#define __HYP_ENTER_EXIT_REASON
|
||||
enum hyp_enter_exit_reason {
|
||||
HYP_REASON_SMC,
|
||||
HYP_REASON_HVC,
|
||||
HYP_REASON_PSCI,
|
||||
HYP_REASON_HOST_ABORT,
|
||||
HYP_REASON_GUEST_EXIT,
|
||||
HYP_REASON_ERET_HOST,
|
||||
HYP_REASON_ERET_GUEST,
|
||||
HYP_REASON_UNKNOWN /* Must be last */
|
||||
};
|
||||
#endif
|
||||
|
||||
HYP_EVENT(hyp_enter,
|
||||
HE_PROTO(struct kvm_cpu_context *host_ctxt, u8 reason),
|
||||
HE_STRUCT(
|
||||
he_field(u8, reason)
|
||||
he_field(pid_t, vcpu)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->reason = reason;
|
||||
__entry->vcpu = __tracing_get_vcpu_pid(host_ctxt);
|
||||
),
|
||||
HE_PRINTK("reason=%s vcpu=%d", __hyp_enter_exit_reason_str(__entry->reason), __entry->vcpu)
|
||||
);
|
||||
|
||||
HYP_EVENT(hyp_exit,
|
||||
HE_PROTO(struct kvm_cpu_context *host_ctxt, u8 reason),
|
||||
HE_STRUCT(
|
||||
he_field(u8, reason)
|
||||
he_field(pid_t, vcpu)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->reason = reason;
|
||||
__entry->vcpu = __tracing_get_vcpu_pid(host_ctxt);
|
||||
),
|
||||
HE_PRINTK("reason=%s vcpu=%d", __hyp_enter_exit_reason_str(__entry->reason), __entry->vcpu)
|
||||
);
|
||||
|
||||
HYP_EVENT(selftest,
|
||||
HE_PROTO(u64 id),
|
||||
HE_STRUCT(
|
||||
he_field(u64, id)
|
||||
),
|
||||
HE_ASSIGN(
|
||||
__entry->id = id;
|
||||
),
|
||||
RE_PRINTK("id=%llu", __entry->id)
|
||||
);
|
||||
#endif
|
||||
26
arch/arm64/include/asm/kvm_hyptrace.h
Normal file
26
arch/arm64/include/asm/kvm_hyptrace.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYPTRACE_H_
|
||||
#define __ARM64_KVM_HYPTRACE_H_
|
||||
|
||||
#include <linux/ring_buffer.h>
|
||||
|
||||
struct hyp_trace_desc {
|
||||
unsigned long bpages_backing_start;
|
||||
size_t bpages_backing_size;
|
||||
struct trace_buffer_desc trace_buffer_desc;
|
||||
|
||||
};
|
||||
|
||||
struct hyp_event_id {
|
||||
unsigned short id;
|
||||
atomic_t enabled;
|
||||
};
|
||||
|
||||
extern struct remote_event __hyp_events_start[];
|
||||
extern struct remote_event __hyp_events_end[];
|
||||
|
||||
/* hyp_event section used by the hypervisor */
|
||||
extern struct hyp_event_id __hyp_event_ids_start[];
|
||||
extern struct hyp_event_id __hyp_event_ids_end[];
|
||||
|
||||
#endif
|
||||
|
|
@ -138,6 +138,10 @@ KVM_NVHE_ALIAS(__hyp_data_start);
|
|||
KVM_NVHE_ALIAS(__hyp_data_end);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_start);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_end);
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
KVM_NVHE_ALIAS(__hyp_event_ids_start);
|
||||
KVM_NVHE_ALIAS(__hyp_event_ids_end);
|
||||
#endif
|
||||
|
||||
/* pKVM static key */
|
||||
KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
|
||||
|
|
|
|||
|
|
@ -13,12 +13,23 @@
|
|||
*(__kvm_ex_table) \
|
||||
__stop___kvm_ex_table = .;
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
#define HYPERVISOR_EVENT_IDS \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_event_ids_start = .; \
|
||||
*(HYP_SECTION_NAME(.event_ids)) \
|
||||
__hyp_event_ids_end = .;
|
||||
#else
|
||||
#define HYPERVISOR_EVENT_IDS
|
||||
#endif
|
||||
|
||||
#define HYPERVISOR_RODATA_SECTIONS \
|
||||
HYP_SECTION_NAME(.rodata) : { \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_start = .; \
|
||||
*(HYP_SECTION_NAME(.data..ro_after_init)) \
|
||||
*(HYP_SECTION_NAME(.rodata)) \
|
||||
HYPERVISOR_EVENT_IDS \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_end = .; \
|
||||
}
|
||||
|
|
@ -307,6 +318,13 @@ SECTIONS
|
|||
|
||||
HYPERVISOR_DATA_SECTION
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
.data.hyp_events : {
|
||||
__hyp_events_start = .;
|
||||
*(SORT(_hyp_events.*))
|
||||
__hyp_events_end = .;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Data written with the MMU off but read with the MMU on requires
|
||||
* cache lines to be invalidated, discarding up to a Cache Writeback
|
||||
|
|
|
|||
|
|
@ -42,32 +42,10 @@ menuconfig KVM
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
depends on KVM
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
local EL2 hypervisor development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PROTECTED_NVHE_STACKTRACE
|
||||
bool "Protected KVM hypervisor stacktraces"
|
||||
depends on NVHE_EL2_DEBUG
|
||||
default n
|
||||
help
|
||||
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||
|
||||
If using protected nVHE mode, but cannot afford the associated
|
||||
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||
say N.
|
||||
|
||||
If unsure, or not using protected nVHE (pKVM), say N.
|
||||
if KVM
|
||||
|
||||
config PTDUMP_STAGE2_DEBUGFS
|
||||
bool "Present the stage-2 pagetables to debugfs"
|
||||
depends on KVM
|
||||
depends on DEBUG_KERNEL
|
||||
depends on DEBUG_FS
|
||||
depends on ARCH_HAS_PTDUMP
|
||||
|
|
@ -82,4 +60,48 @@ config PTDUMP_STAGE2_DEBUGFS
|
|||
|
||||
If in doubt, say N.
|
||||
|
||||
config NVHE_EL2_DEBUG
|
||||
bool "Debug mode for non-VHE EL2 object"
|
||||
default n
|
||||
help
|
||||
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
|
||||
Failure reports will BUG() in the hypervisor. This is intended for
|
||||
local EL2 hypervisor development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
if NVHE_EL2_DEBUG
|
||||
|
||||
config NVHE_EL2_TRACING
|
||||
bool
|
||||
depends on TRACING && FTRACE
|
||||
select TRACE_REMOTE
|
||||
default y
|
||||
|
||||
config PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
bool "Disable the host stage-2 on panic"
|
||||
default n
|
||||
help
|
||||
Relax the host stage-2 on hypervisor panic to allow the kernel to
|
||||
unwind and symbolize the hypervisor stacktrace. This however tampers
|
||||
the system security. This is intended for local EL2 hypervisor
|
||||
development.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PKVM_STACKTRACE
|
||||
bool "Protected KVM hypervisor stacktraces"
|
||||
depends on PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
default y
|
||||
help
|
||||
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||
|
||||
If using protected nVHE mode, but cannot afford the associated
|
||||
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||
say N.
|
||||
|
||||
If unsure, or not using protected nVHE (pKVM), say N.
|
||||
|
||||
endif # NVHE_EL2_DEBUG
|
||||
endif # KVM
|
||||
endif # VIRTUALIZATION
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
|
|||
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
|
||||
kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
|
||||
|
||||
kvm-$(CONFIG_NVHE_EL2_TRACING) += hyp_trace.o
|
||||
|
||||
always-y := hyp_constants.h hyp-constants.s
|
||||
|
||||
define rule_gen_hyp_constants
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace_arm.h"
|
||||
#include "hyp_trace.h"
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
|
@ -35,6 +36,7 @@
|
|||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_nested.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
|
@ -705,6 +707,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
|
||||
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
|
||||
vcpu_set_on_unsupported_cpu(vcpu);
|
||||
|
||||
vcpu->arch.pid = pid_nr(vcpu->pid);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
|
|
@ -2414,6 +2418,10 @@ static int __init init_subsystems(void)
|
|||
|
||||
kvm_register_perf_callbacks();
|
||||
|
||||
err = kvm_hyp_trace_init();
|
||||
if (err)
|
||||
kvm_err("Failed to initialize Hyp tracing\n");
|
||||
|
||||
out:
|
||||
if (err)
|
||||
hyp_cpu_pm_exit();
|
||||
|
|
@ -2465,7 +2473,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
|
|||
preempt_disable();
|
||||
cpu_hyp_init_context();
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
|
||||
num_possible_cpus(), kern_hyp_va(per_cpu_base),
|
||||
kern_hyp_va(per_cpu_base),
|
||||
hyp_va_bits);
|
||||
cpu_hyp_init_features();
|
||||
|
||||
|
|
@ -2674,6 +2682,8 @@ static int __init init_hyp_mode(void)
|
|||
kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr;
|
||||
}
|
||||
|
||||
kvm_nvhe_sym(hyp_nr_cpus) = num_possible_cpus();
|
||||
|
||||
/*
|
||||
* Map the Hyp-code called directly from the host
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -539,7 +539,7 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
|||
|
||||
/* All hyp bugs, including warnings, are treated as fatal. */
|
||||
if (!is_protected_kvm_enabled() ||
|
||||
IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
|
||||
IS_ENABLED(CONFIG_PKVM_DISABLE_STAGE2_ON_PANIC)) {
|
||||
struct bug_entry *bug = find_bug(elr_in_kimg);
|
||||
|
||||
if (bug)
|
||||
|
|
|
|||
23
arch/arm64/kvm/hyp/include/nvhe/arm-smccc.h
Normal file
23
arch/arm64/kvm/hyp/include/nvhe/arm-smccc.h
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__
|
||||
#define __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__
|
||||
|
||||
#include <asm/kvm_hypevents.h>
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
|
||||
#define hyp_smccc_1_1_smc(...) \
|
||||
do { \
|
||||
trace_hyp_exit(NULL, HYP_REASON_SMC); \
|
||||
arm_smccc_1_1_smc(__VA_ARGS__); \
|
||||
trace_hyp_enter(NULL, HYP_REASON_SMC); \
|
||||
} while (0)
|
||||
|
||||
#define hyp_smccc_1_2_smc(...) \
|
||||
do { \
|
||||
trace_hyp_exit(NULL, HYP_REASON_SMC); \
|
||||
arm_smccc_1_2_smc(__VA_ARGS__); \
|
||||
trace_hyp_enter(NULL, HYP_REASON_SMC); \
|
||||
} while (0)
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_NVHE_ARM_SMCCC_H__ */
|
||||
16
arch/arm64/kvm/hyp/include/nvhe/clock.h
Normal file
16
arch/arm64/kvm/hyp/include/nvhe/clock.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_CLOCK_H
|
||||
#define __ARM64_KVM_HYP_NVHE_CLOCK_H
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/kvm_hyp.h>
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
|
||||
u64 trace_clock(void);
|
||||
#else
|
||||
static inline void
|
||||
trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
|
||||
static inline u64 trace_clock(void) { return 0; }
|
||||
#endif
|
||||
#endif
|
||||
14
arch/arm64/kvm/hyp/include/nvhe/define_events.h
Normal file
14
arch/arm64/kvm/hyp/include/nvhe/define_events.h
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#undef HYP_EVENT
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
struct hyp_event_id hyp_event_id_##__name \
|
||||
__section(".hyp.event_ids."#__name) = { \
|
||||
.enabled = ATOMIC_INIT(0), \
|
||||
}
|
||||
|
||||
#define HYP_EVENT_MULTI_READ
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#undef HYP_EVENT_MULTI_READ
|
||||
|
||||
#undef HYP_EVENT
|
||||
|
|
@ -30,8 +30,6 @@ enum pkvm_component_id {
|
|||
PKVM_ID_FFA,
|
||||
};
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_host_share_hyp(u64 pfn);
|
||||
int __pkvm_host_unshare_hyp(u64 pfn);
|
||||
|
|
|
|||
70
arch/arm64/kvm/hyp/include/nvhe/trace.h
Normal file
70
arch/arm64/kvm/hyp/include/nvhe/trace.h
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __ARM64_KVM_HYP_NVHE_TRACE_H
|
||||
#define __ARM64_KVM_HYP_NVHE_TRACE_H
|
||||
|
||||
#include <linux/trace_remote_event.h>
|
||||
|
||||
#include <asm/kvm_hyptrace.h>
|
||||
|
||||
static inline pid_t __tracing_get_vcpu_pid(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
if (!host_ctxt)
|
||||
host_ctxt = host_data_ptr(host_ctxt);
|
||||
|
||||
vcpu = host_ctxt->__hyp_running_vcpu;
|
||||
|
||||
return vcpu ? vcpu->arch.pid : 0;
|
||||
}
|
||||
|
||||
#define HE_PROTO(__args...) __args
|
||||
#define HE_ASSIGN(__args...) __args
|
||||
#define HE_STRUCT RE_STRUCT
|
||||
#define he_field re_field
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
REMOTE_EVENT_FORMAT(__name, __struct); \
|
||||
extern struct hyp_event_id hyp_event_id_##__name; \
|
||||
static __always_inline void trace_##__name(__proto) \
|
||||
{ \
|
||||
struct remote_event_format_##__name *__entry; \
|
||||
size_t length = sizeof(*__entry); \
|
||||
\
|
||||
if (!atomic_read(&hyp_event_id_##__name.enabled)) \
|
||||
return; \
|
||||
__entry = tracing_reserve_entry(length); \
|
||||
if (!__entry) \
|
||||
return; \
|
||||
__entry->hdr.id = hyp_event_id_##__name.id; \
|
||||
__assign \
|
||||
tracing_commit_entry(); \
|
||||
}
|
||||
|
||||
void *tracing_reserve_entry(unsigned long length);
|
||||
void tracing_commit_entry(void);
|
||||
|
||||
int __tracing_load(unsigned long desc_va, size_t desc_size);
|
||||
void __tracing_unload(void);
|
||||
int __tracing_enable(bool enable);
|
||||
int __tracing_swap_reader(unsigned int cpu);
|
||||
void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc);
|
||||
int __tracing_reset(unsigned int cpu);
|
||||
int __tracing_enable_event(unsigned short id, bool enable);
|
||||
#else
|
||||
static inline void *tracing_reserve_entry(unsigned long length) { return NULL; }
|
||||
static inline void tracing_commit_entry(void) { }
|
||||
#define HYP_EVENT(__name, __proto, __struct, __assign, __printk) \
|
||||
static inline void trace_##__name(__proto) {}
|
||||
|
||||
static inline int __tracing_load(unsigned long desc_va, size_t desc_size) { return -ENODEV; }
|
||||
static inline void __tracing_unload(void) { }
|
||||
static inline int __tracing_enable(bool enable) { return -ENODEV; }
|
||||
static inline int __tracing_swap_reader(unsigned int cpu) { return -ENODEV; }
|
||||
static inline void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { }
|
||||
static inline int __tracing_reset(unsigned int cpu) { return -ENODEV; }
|
||||
static inline int __tracing_enable_event(unsigned short id, bool enable) { return -ENODEV; }
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -17,7 +17,7 @@ ccflags-y += -fno-stack-protector \
|
|||
hostprogs := gen-hyprel
|
||||
HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o tishift.o
|
||||
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
CFLAGS_switch.nvhe.o += -Wno-override-init
|
||||
|
|
@ -29,8 +29,12 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
|||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
hyp-obj-y += ../../../kernel/smccc-call.o
|
||||
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
|
||||
hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
|
||||
hyp-obj-y += $(lib-objs)
|
||||
|
||||
# Path to simple_ring_buffer.c
|
||||
CFLAGS_trace.nvhe.o += -I$(srctree)/kernel/trace/
|
||||
|
||||
##
|
||||
## Build rules for compiling nVHE hyp code
|
||||
## Output of this folder is `kvm_nvhe.o`, a partially linked object
|
||||
|
|
|
|||
65
arch/arm64/kvm/hyp/nvhe/clock.c
Normal file
65
arch/arm64/kvm/hyp/nvhe/clock.c
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/clock.h>
|
||||
|
||||
#include <asm/arch_timer.h>
|
||||
#include <asm/div64.h>
|
||||
|
||||
static struct clock_data {
|
||||
struct {
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
u64 epoch_ns;
|
||||
u64 epoch_cyc;
|
||||
u64 cyc_overflow64;
|
||||
} data[2];
|
||||
u64 cur;
|
||||
} trace_clock_data;
|
||||
|
||||
static u64 __clock_mult_uint128(u64 cyc, u32 mult, u32 shift)
|
||||
{
|
||||
__uint128_t ns = (__uint128_t)cyc * mult;
|
||||
|
||||
ns >>= shift;
|
||||
|
||||
return (u64)ns;
|
||||
}
|
||||
|
||||
/* Does not guarantee no reader on the modified bank. */
|
||||
void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
|
||||
{
|
||||
struct clock_data *clock = &trace_clock_data;
|
||||
u64 bank = clock->cur ^ 1;
|
||||
|
||||
clock->data[bank].mult = mult;
|
||||
clock->data[bank].shift = shift;
|
||||
clock->data[bank].epoch_ns = epoch_ns;
|
||||
clock->data[bank].epoch_cyc = epoch_cyc;
|
||||
clock->data[bank].cyc_overflow64 = ULONG_MAX / mult;
|
||||
|
||||
smp_store_release(&clock->cur, bank);
|
||||
}
|
||||
|
||||
/* Use untrusted host data */
|
||||
u64 trace_clock(void)
|
||||
{
|
||||
struct clock_data *clock = &trace_clock_data;
|
||||
u64 bank = smp_load_acquire(&clock->cur);
|
||||
u64 cyc, ns;
|
||||
|
||||
cyc = __arch_counter_get_cntvct() - clock->data[bank].epoch_cyc;
|
||||
|
||||
if (likely(cyc < clock->data[bank].cyc_overflow64)) {
|
||||
ns = cyc * clock->data[bank].mult;
|
||||
ns >>= clock->data[bank].shift;
|
||||
} else {
|
||||
ns = __clock_mult_uint128(cyc, clock->data[bank].mult,
|
||||
clock->data[bank].shift);
|
||||
}
|
||||
|
||||
return (u64)ns + clock->data[bank].epoch_ns;
|
||||
}
|
||||
25
arch/arm64/kvm/hyp/nvhe/events.c
Normal file
25
arch/arm64/kvm/hyp/nvhe/events.c
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trace.h>
|
||||
|
||||
#include <nvhe/define_events.h>
|
||||
|
||||
int __tracing_enable_event(unsigned short id, bool enable)
|
||||
{
|
||||
struct hyp_event_id *event_id = &__hyp_event_ids_start[id];
|
||||
atomic_t *enabled;
|
||||
|
||||
if (event_id >= __hyp_event_ids_end)
|
||||
return -EINVAL;
|
||||
|
||||
enabled = hyp_fixmap_map(__hyp_pa(&event_id->enabled));
|
||||
atomic_set(enabled, enable);
|
||||
hyp_fixmap_unmap();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -26,10 +26,10 @@
|
|||
* the duration and are therefore serialised.
|
||||
*/
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/arm_ffa.h>
|
||||
#include <asm/kvm_pkvm.h>
|
||||
|
||||
#include <nvhe/arm-smccc.h>
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/memory.h>
|
||||
|
|
@ -147,7 +147,7 @@ static int ffa_map_hyp_buffers(u64 ffa_page_count)
|
|||
{
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_FN64_RXTX_MAP,
|
||||
.a1 = hyp_virt_to_phys(hyp_buffers.tx),
|
||||
.a2 = hyp_virt_to_phys(hyp_buffers.rx),
|
||||
|
|
@ -161,7 +161,7 @@ static int ffa_unmap_hyp_buffers(void)
|
|||
{
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_RXTX_UNMAP,
|
||||
.a1 = HOST_FFA_ID,
|
||||
}, &res);
|
||||
|
|
@ -172,7 +172,7 @@ static int ffa_unmap_hyp_buffers(void)
|
|||
static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fraglen, u32 endpoint_id)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_FRAG_TX,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
|
|
@ -184,7 +184,7 @@ static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
|||
static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 fragoff)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_FRAG_RX,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
|
|
@ -196,7 +196,7 @@ static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
|||
static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
|
||||
u32 fraglen)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = func_id,
|
||||
.a1 = len,
|
||||
.a2 = fraglen,
|
||||
|
|
@ -206,7 +206,7 @@ static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
|
|||
static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
||||
u32 handle_hi, u32 flags)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_MEM_RECLAIM,
|
||||
.a1 = handle_lo,
|
||||
.a2 = handle_hi,
|
||||
|
|
@ -216,7 +216,7 @@ static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
|
|||
|
||||
static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_FN64_MEM_RETRIEVE_REQ,
|
||||
.a1 = len,
|
||||
.a2 = len,
|
||||
|
|
@ -225,7 +225,7 @@ static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
|
|||
|
||||
static void ffa_rx_release(struct arm_smccc_1_2_regs *res)
|
||||
{
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_RX_RELEASE,
|
||||
}, res);
|
||||
}
|
||||
|
|
@ -728,7 +728,7 @@ static int hyp_ffa_post_init(void)
|
|||
size_t min_rxtx_sz;
|
||||
struct arm_smccc_1_2_regs res;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
.a0 = FFA_ID_GET,
|
||||
}, &res);
|
||||
if (res.a0 != FFA_SUCCESS)
|
||||
|
|
@ -737,7 +737,7 @@ static int hyp_ffa_post_init(void)
|
|||
if (res.a2 != HOST_FFA_ID)
|
||||
return -EINVAL;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
|
||||
.a0 = FFA_FEATURES,
|
||||
.a1 = FFA_FN64_RXTX_MAP,
|
||||
}, &res);
|
||||
|
|
@ -788,7 +788,7 @@ static void do_ffa_version(struct arm_smccc_1_2_regs *res,
|
|||
* first if TEE supports it.
|
||||
*/
|
||||
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_VERSION,
|
||||
.a1 = ffa_req_version,
|
||||
}, res);
|
||||
|
|
@ -824,7 +824,7 @@ static void do_ffa_part_get(struct arm_smccc_1_2_regs *res,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_PARTITION_INFO_GET,
|
||||
.a1 = uuid0,
|
||||
.a2 = uuid1,
|
||||
|
|
@ -939,7 +939,7 @@ int hyp_ffa_init(void *pages)
|
|||
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
|
||||
return 0;
|
||||
|
||||
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
hyp_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
|
||||
.a0 = FFA_VERSION,
|
||||
.a1 = FFA_VERSION_1_2,
|
||||
}, &res);
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ SYM_FUNC_START(__hyp_do_panic)
|
|||
|
||||
mov x29, x0
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_DEBUG
|
||||
#ifdef PKVM_DISABLE_STAGE2_ON_PANIC
|
||||
/* Ensure host stage-2 is disabled */
|
||||
mrs x0, hcr_el2
|
||||
bic x0, x0, #HCR_VM
|
||||
|
|
|
|||
|
|
@ -12,12 +12,14 @@
|
|||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include <nvhe/ffa.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/pkvm.h>
|
||||
#include <nvhe/trace.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
|
@ -136,6 +138,8 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
|||
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
|
||||
|
||||
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
hyp_vcpu->vcpu.arch.pid = host_vcpu->arch.pid;
|
||||
}
|
||||
|
||||
static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
|
|
@ -486,17 +490,15 @@ static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
|
|||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, size, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, nr_cpus, host_ctxt, 3);
|
||||
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 4);
|
||||
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 5);
|
||||
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 3);
|
||||
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 4);
|
||||
|
||||
/*
|
||||
* __pkvm_init() will return only if an error occurred, otherwise it
|
||||
* will tail-call in __pkvm_init_finalise() which will have to deal
|
||||
* with the host context directly.
|
||||
*/
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, nr_cpus, per_cpu_base,
|
||||
hyp_va_bits);
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, per_cpu_base, hyp_va_bits);
|
||||
}
|
||||
|
||||
static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -589,6 +591,65 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
|
|||
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
|
||||
}
|
||||
|
||||
static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
|
||||
DECLARE_REG(size_t, desc_size, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size);
|
||||
}
|
||||
|
||||
static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
__tracing_unload();
|
||||
}
|
||||
|
||||
static void handle___tracing_enable(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(bool, enable, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_enable(enable);
|
||||
}
|
||||
|
||||
static void handle___tracing_swap_reader(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu);
|
||||
}
|
||||
|
||||
static void handle___tracing_update_clock(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u32, mult, host_ctxt, 1);
|
||||
DECLARE_REG(u32, shift, host_ctxt, 2);
|
||||
DECLARE_REG(u64, epoch_ns, host_ctxt, 3);
|
||||
DECLARE_REG(u64, epoch_cyc, host_ctxt, 4);
|
||||
|
||||
__tracing_update_clock(mult, shift, epoch_ns, epoch_cyc);
|
||||
}
|
||||
|
||||
static void handle___tracing_reset(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_reset(cpu);
|
||||
}
|
||||
|
||||
static void handle___tracing_enable_event(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned short, id, host_ctxt, 1);
|
||||
DECLARE_REG(bool, enable, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __tracing_enable_event(id, enable);
|
||||
}
|
||||
|
||||
static void handle___tracing_write_event(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(u64, id, host_ctxt, 1);
|
||||
|
||||
trace_selftest(id);
|
||||
}
|
||||
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
|
|
@ -630,6 +691,14 @@ static const hcall_t host_hcall[] = {
|
|||
HANDLE_FUNC(__pkvm_vcpu_load),
|
||||
HANDLE_FUNC(__pkvm_vcpu_put),
|
||||
HANDLE_FUNC(__pkvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__tracing_load),
|
||||
HANDLE_FUNC(__tracing_unload),
|
||||
HANDLE_FUNC(__tracing_enable),
|
||||
HANDLE_FUNC(__tracing_swap_reader),
|
||||
HANDLE_FUNC(__tracing_update_clock),
|
||||
HANDLE_FUNC(__tracing_reset),
|
||||
HANDLE_FUNC(__tracing_enable_event),
|
||||
HANDLE_FUNC(__tracing_write_event),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -670,7 +739,9 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
|||
|
||||
static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_SMC);
|
||||
__kvm_hyp_host_forward_smc(host_ctxt);
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
|
||||
}
|
||||
|
||||
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -757,15 +828,19 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
|||
{
|
||||
u64 esr = read_sysreg_el2(SYS_ESR);
|
||||
|
||||
|
||||
switch (ESR_ELx_EC(esr)) {
|
||||
case ESR_ELx_EC_HVC64:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_HVC);
|
||||
handle_host_hcall(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SMC64:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_HOST_ABORT);
|
||||
handle_host_mem_abort(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SYS64:
|
||||
|
|
@ -775,4 +850,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
|||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_ERET_HOST);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,12 @@ SECTIONS {
|
|||
HYP_SECTION(.text)
|
||||
HYP_SECTION(.data..ro_after_init)
|
||||
HYP_SECTION(.rodata)
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
BEGIN_HYP_SECTION(.event_ids)
|
||||
*(SORT(.hyp.event_ids.*))
|
||||
END_HYP_SECTION
|
||||
#endif
|
||||
|
||||
/*
|
||||
* .hyp..data..percpu needs to be page aligned to maintain the same
|
||||
|
|
|
|||
|
|
@ -244,7 +244,7 @@ static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
|
|||
|
||||
void *hyp_fixmap_map(phys_addr_t phys)
|
||||
{
|
||||
return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
|
||||
return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys) + offset_in_page(phys);
|
||||
}
|
||||
|
||||
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
|
||||
|
|
@ -366,7 +366,7 @@ void *hyp_fixblock_map(phys_addr_t phys, size_t *size)
|
|||
#ifdef HAS_FIXBLOCK
|
||||
*size = PMD_SIZE;
|
||||
hyp_spin_lock(&hyp_fixblock_lock);
|
||||
return fixmap_map_slot(&hyp_fixblock_slot, phys);
|
||||
return fixmap_map_slot(&hyp_fixblock_slot, phys) + offset_in_page(phys);
|
||||
#else
|
||||
*size = PAGE_SIZE;
|
||||
return hyp_fixmap_map(phys);
|
||||
|
|
|
|||
|
|
@ -6,11 +6,12 @@
|
|||
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <uapi/linux/psci.h>
|
||||
|
||||
#include <nvhe/arm-smccc.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
|
|
@ -65,7 +66,7 @@ static unsigned long psci_call(unsigned long fn, unsigned long arg0,
|
|||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
|
||||
hyp_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
|
||||
return res.a0;
|
||||
}
|
||||
|
||||
|
|
@ -206,6 +207,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
|
|||
struct kvm_cpu_context *host_ctxt;
|
||||
|
||||
host_ctxt = host_data_ptr(host_ctxt);
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_PSCI);
|
||||
|
||||
if (is_cpu_on)
|
||||
boot_args = this_cpu_ptr(&cpu_on_args);
|
||||
|
|
@ -221,6 +223,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
|
|||
write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR);
|
||||
write_sysreg(INIT_PSTATE_EL1, SPSR_EL2);
|
||||
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_PSCI);
|
||||
__host_enter(host_ctxt);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -341,8 +341,7 @@ void __noreturn __pkvm_init_finalise(void)
|
|||
__host_enter(host_ctxt);
|
||||
}
|
||||
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits)
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params;
|
||||
void *virt = hyp_phys_to_virt(phys);
|
||||
|
|
@ -355,7 +354,6 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
|||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock_init(&pkvm_pgd_lock);
|
||||
hyp_nr_cpus = nr_cpus;
|
||||
|
||||
ret = divide_memory_pool(virt, size);
|
||||
if (ret)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
|
|||
stacktrace_info->pc = pc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
#ifdef CONFIG_PKVM_STACKTRACE
|
||||
#include <asm/stacktrace/nvhe.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
|
||||
|
|
@ -134,11 +134,11 @@ static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
|||
|
||||
unwind(&state, pkvm_save_backtrace_entry, &idx);
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#else /* !CONFIG_PKVM_STACKTRACE */
|
||||
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#endif /* CONFIG_PKVM_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
#include <hyp/switch.h>
|
||||
#include <hyp/sysreg-sr.h>
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
|
@ -21,6 +20,7 @@
|
|||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_hypevents.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
|
|
@ -308,10 +308,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
__debug_switch_to_guest(vcpu);
|
||||
|
||||
do {
|
||||
trace_hyp_exit(host_ctxt, HYP_REASON_ERET_GUEST);
|
||||
|
||||
/* Jump in the fire! */
|
||||
exit_code = __guest_enter(vcpu);
|
||||
|
||||
/* And we're baaack! */
|
||||
trace_hyp_enter(host_ctxt, HYP_REASON_GUEST_EXIT);
|
||||
} while (fixup_guest_exit(vcpu, &exit_code));
|
||||
|
||||
__sysreg_save_state_nvhe(guest_ctxt);
|
||||
|
|
|
|||
306
arch/arm64/kvm/hyp/nvhe/trace.c
Normal file
306
arch/arm64/kvm/hyp/nvhe/trace.c
Normal file
|
|
@ -0,0 +1,306 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <nvhe/clock.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trace.h>
|
||||
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
#include "simple_ring_buffer.c"
|
||||
|
||||
static DEFINE_PER_CPU(struct simple_rb_per_cpu, __simple_rbs);
|
||||
|
||||
static struct hyp_trace_buffer {
|
||||
struct simple_rb_per_cpu __percpu *simple_rbs;
|
||||
void *bpages_backing_start;
|
||||
size_t bpages_backing_size;
|
||||
hyp_spinlock_t lock;
|
||||
} trace_buffer = {
|
||||
.simple_rbs = &__simple_rbs,
|
||||
.lock = __HYP_SPIN_LOCK_UNLOCKED,
|
||||
};
|
||||
|
||||
static bool hyp_trace_buffer_loaded(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
return trace_buffer->bpages_backing_size > 0;
|
||||
}
|
||||
|
||||
void *tracing_reserve_entry(unsigned long length)
|
||||
{
|
||||
return simple_ring_buffer_reserve(this_cpu_ptr(trace_buffer.simple_rbs), length,
|
||||
trace_clock());
|
||||
}
|
||||
|
||||
void tracing_commit_entry(void)
|
||||
{
|
||||
simple_ring_buffer_commit(this_cpu_ptr(trace_buffer.simple_rbs));
|
||||
}
|
||||
|
||||
static int __admit_host_mem(void *start, u64 size)
|
||||
{
|
||||
if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size) || !size)
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
return __pkvm_host_donate_hyp(hyp_virt_to_pfn(start), size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void __release_host_mem(void *start, u64 size)
|
||||
{
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(start), size >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_load_bpage_backing(struct hyp_trace_buffer *trace_buffer,
|
||||
struct hyp_trace_desc *desc)
|
||||
{
|
||||
void *start = (void *)kern_hyp_va(desc->bpages_backing_start);
|
||||
size_t size = desc->bpages_backing_size;
|
||||
int ret;
|
||||
|
||||
ret = __admit_host_mem(start, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
memset(start, 0, size);
|
||||
|
||||
trace_buffer->bpages_backing_start = start;
|
||||
trace_buffer->bpages_backing_size = size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unload_bpage_backing(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
void *start = trace_buffer->bpages_backing_start;
|
||||
size_t size = trace_buffer->bpages_backing_size;
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
memset(start, 0, size);
|
||||
|
||||
__release_host_mem(start, size);
|
||||
|
||||
trace_buffer->bpages_backing_start = 0;
|
||||
trace_buffer->bpages_backing_size = 0;
|
||||
}
|
||||
|
||||
static void *__pin_shared_page(unsigned long kern_va)
|
||||
{
|
||||
void *va = kern_hyp_va((void *)kern_va);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return va;
|
||||
|
||||
return hyp_pin_shared_mem(va, va + PAGE_SIZE) ? NULL : va;
|
||||
}
|
||||
|
||||
static void __unpin_shared_page(void *va)
|
||||
{
|
||||
if (!is_protected_kvm_enabled())
|
||||
return;
|
||||
|
||||
hyp_unpin_shared_mem(va, va + PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unload(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
hyp_assert_lock_held(&trace_buffer->lock);
|
||||
|
||||
if (!hyp_trace_buffer_loaded(trace_buffer))
|
||||
return;
|
||||
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++)
|
||||
simple_ring_buffer_unload_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu),
|
||||
__unpin_shared_page);
|
||||
|
||||
hyp_trace_buffer_unload_bpage_backing(trace_buffer);
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
|
||||
struct hyp_trace_desc *desc)
|
||||
{
|
||||
struct simple_buffer_page *bpages;
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int ret, cpu;
|
||||
|
||||
hyp_assert_lock_held(&trace_buffer->lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(trace_buffer))
|
||||
return -EINVAL;
|
||||
|
||||
ret = hyp_trace_buffer_load_bpage_backing(trace_buffer, desc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bpages = trace_buffer->bpages_backing_start;
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) {
|
||||
ret = simple_ring_buffer_init_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu),
|
||||
bpages, rb_desc, __pin_shared_page,
|
||||
__unpin_shared_page);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bpages += rb_desc->nr_page_va;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
hyp_trace_buffer_unload(trace_buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
unsigned int cpu;
|
||||
size_t nr_bpages;
|
||||
void *desc_end;
|
||||
|
||||
/*
|
||||
* Both desc_size and bpages_backing_size are untrusted host-provided
|
||||
* values. We rely on __pkvm_host_donate_hyp() to enforce their validity.
|
||||
*/
|
||||
desc_end = (void *)desc + desc_size;
|
||||
nr_bpages = desc->bpages_backing_size / sizeof(struct simple_buffer_page);
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) {
|
||||
/* Can we read nr_page_va? */
|
||||
if ((void *)rb_desc + struct_size(rb_desc, page_va, 0) > desc_end)
|
||||
return false;
|
||||
|
||||
/* Overflow desc? */
|
||||
if ((void *)rb_desc + struct_size(rb_desc, page_va, rb_desc->nr_page_va) > desc_end)
|
||||
return false;
|
||||
|
||||
/* Overflow bpages backing memory? */
|
||||
if (nr_bpages < rb_desc->nr_page_va)
|
||||
return false;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return false;
|
||||
|
||||
if (cpu != rb_desc->cpu)
|
||||
return false;
|
||||
|
||||
nr_bpages -= rb_desc->nr_page_va;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int __tracing_load(unsigned long desc_hva, size_t desc_size)
|
||||
{
|
||||
struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva);
|
||||
int ret;
|
||||
|
||||
ret = __admit_host_mem(desc, desc_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!hyp_trace_desc_validate(desc, desc_size))
|
||||
goto err_release_desc;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
ret = hyp_trace_buffer_load(&trace_buffer, desc);
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
err_release_desc:
|
||||
__release_host_mem(desc, desc_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __tracing_unload(void)
|
||||
{
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
hyp_trace_buffer_unload(&trace_buffer);
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
}
|
||||
|
||||
int __tracing_enable(bool enable)
|
||||
{
|
||||
int cpu, ret = enable ? -EINVAL : 0;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (!hyp_trace_buffer_loaded(&trace_buffer))
|
||||
goto unlock;
|
||||
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++)
|
||||
simple_ring_buffer_enable_tracing(per_cpu_ptr(trace_buffer.simple_rbs, cpu),
|
||||
enable);
|
||||
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __tracing_swap_reader(unsigned int cpu)
|
||||
{
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(&trace_buffer))
|
||||
ret = simple_ring_buffer_swap_reader_page(
|
||||
per_cpu_ptr(trace_buffer.simple_rbs, cpu));
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* After this loop, all CPUs are observing the new bank... */
|
||||
for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
|
||||
struct simple_rb_per_cpu *simple_rb = per_cpu_ptr(trace_buffer.simple_rbs, cpu);
|
||||
|
||||
while (READ_ONCE(simple_rb->status) == SIMPLE_RB_WRITING)
|
||||
;
|
||||
}
|
||||
|
||||
/* ...we can now override the old one and swap. */
|
||||
trace_clock_update(mult, shift, epoch_ns, epoch_cyc);
|
||||
}
|
||||
|
||||
int __tracing_reset(unsigned int cpu)
|
||||
{
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (cpu >= hyp_nr_cpus)
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&trace_buffer.lock);
|
||||
|
||||
if (hyp_trace_buffer_loaded(&trace_buffer))
|
||||
ret = simple_ring_buffer_reset(per_cpu_ptr(trace_buffer.simple_rbs, cpu));
|
||||
|
||||
hyp_spin_unlock(&trace_buffer.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
442
arch/arm64/kvm/hyp_trace.c
Normal file
442
arch/arm64/kvm/hyp_trace.c
Normal file
|
|
@ -0,0 +1,442 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/trace_remote.h>
|
||||
#include <linux/tracefs.h>
|
||||
#include <linux/simple_ring_buffer.h>
|
||||
|
||||
#include <asm/arch_timer.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_hyptrace.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include "hyp_trace.h"
|
||||
|
||||
/* Same 10min used by clocksource when width is more than 32-bits */
|
||||
#define CLOCK_MAX_CONVERSION_S 600
|
||||
/*
|
||||
* Time to give for the clock init. Long enough to get a good mult/shift
|
||||
* estimation. Short enough to not delay the tracing start too much.
|
||||
*/
|
||||
#define CLOCK_INIT_MS 100
|
||||
/*
|
||||
* Time between clock checks. Must be small enough to catch clock deviation when
|
||||
* it is still tiny.
|
||||
*/
|
||||
#define CLOCK_UPDATE_MS 500
|
||||
|
||||
static struct hyp_trace_clock {
|
||||
u64 cycles;
|
||||
u64 cyc_overflow64;
|
||||
u64 boot;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
struct delayed_work work;
|
||||
struct completion ready;
|
||||
struct mutex lock;
|
||||
bool running;
|
||||
} hyp_clock;
|
||||
|
||||
static void __hyp_clock_work(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct hyp_trace_clock *hyp_clock;
|
||||
struct system_time_snapshot snap;
|
||||
u64 rate, delta_cycles;
|
||||
u64 boot, delta_boot;
|
||||
|
||||
hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
|
||||
|
||||
ktime_get_snapshot(&snap);
|
||||
boot = ktime_to_ns(snap.boot);
|
||||
|
||||
delta_boot = boot - hyp_clock->boot;
|
||||
delta_cycles = snap.cycles - hyp_clock->cycles;
|
||||
|
||||
/* Compare hyp clock with the kernel boot clock */
|
||||
if (hyp_clock->mult) {
|
||||
u64 err, cur = delta_cycles;
|
||||
|
||||
if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) {
|
||||
__uint128_t tmp = (__uint128_t)cur * hyp_clock->mult;
|
||||
|
||||
cur = tmp >> hyp_clock->shift;
|
||||
} else {
|
||||
cur *= hyp_clock->mult;
|
||||
cur >>= hyp_clock->shift;
|
||||
}
|
||||
cur += hyp_clock->boot;
|
||||
|
||||
err = abs_diff(cur, boot);
|
||||
/* No deviation, only update epoch if necessary */
|
||||
if (!err) {
|
||||
if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1))
|
||||
goto fast_forward;
|
||||
|
||||
goto resched;
|
||||
}
|
||||
|
||||
/* Warn if the error is above tracing precision (1us) */
|
||||
if (err > NSEC_PER_USEC)
|
||||
pr_warn_ratelimited("hyp trace clock off by %lluus\n",
|
||||
err / NSEC_PER_USEC);
|
||||
}
|
||||
|
||||
rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
|
||||
|
||||
clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
|
||||
rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
|
||||
|
||||
/* Add a comfortable 50% margin */
|
||||
hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1;
|
||||
|
||||
fast_forward:
|
||||
hyp_clock->cycles = snap.cycles;
|
||||
hyp_clock->boot = boot;
|
||||
kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult,
|
||||
hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
|
||||
complete(&hyp_clock->ready);
|
||||
|
||||
resched:
|
||||
schedule_delayed_work(&hyp_clock->work,
|
||||
msecs_to_jiffies(CLOCK_UPDATE_MS));
|
||||
}
|
||||
|
||||
static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable)
|
||||
{
|
||||
struct system_time_snapshot snap;
|
||||
|
||||
if (hyp_clock->running == enable)
|
||||
return;
|
||||
|
||||
if (!enable) {
|
||||
cancel_delayed_work_sync(&hyp_clock->work);
|
||||
hyp_clock->running = false;
|
||||
}
|
||||
|
||||
ktime_get_snapshot(&snap);
|
||||
|
||||
hyp_clock->boot = ktime_to_ns(snap.boot);
|
||||
hyp_clock->cycles = snap.cycles;
|
||||
hyp_clock->mult = 0;
|
||||
|
||||
init_completion(&hyp_clock->ready);
|
||||
INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
|
||||
schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
|
||||
wait_for_completion(&hyp_clock->ready);
|
||||
hyp_clock->running = true;
|
||||
}
|
||||
|
||||
/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
|
||||
static struct hyp_trace_buffer {
|
||||
struct hyp_trace_desc *desc;
|
||||
size_t desc_size;
|
||||
} trace_buffer;
|
||||
|
||||
static int __map_hyp(void *start, size_t size)
|
||||
{
|
||||
if (is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
return create_hyp_mappings(start, start + size, PAGE_HYP);
|
||||
}
|
||||
|
||||
static int __share_page(unsigned long va)
|
||||
{
|
||||
return kvm_share_hyp((void *)va, (void *)va + 1);
|
||||
}
|
||||
|
||||
static void __unshare_page(unsigned long va)
|
||||
{
|
||||
kvm_unshare_hyp((void *)va, (void *)va + 1);
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
|
||||
{
|
||||
int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
|
||||
size_t backing_size;
|
||||
void *start;
|
||||
|
||||
backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
|
||||
num_possible_cpus());
|
||||
|
||||
start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
|
||||
if (!start)
|
||||
return -ENOMEM;
|
||||
|
||||
trace_buffer->desc->bpages_backing_start = (unsigned long)start;
|
||||
trace_buffer->desc->bpages_backing_size = backing_size;
|
||||
|
||||
return __map_hyp(start, backing_size);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
|
||||
trace_buffer->desc->bpages_backing_size);
|
||||
}
|
||||
|
||||
static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu, p;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
|
||||
if (cpu > last_cpu)
|
||||
break;
|
||||
|
||||
__share_page(rb_desc->meta_va);
|
||||
for (p = 0; p < rb_desc->nr_page_va; p++)
|
||||
__unshare_page(rb_desc->page_va[p]);
|
||||
}
|
||||
}
|
||||
|
||||
static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu, p, ret = 0;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
|
||||
ret = __share_page(rb_desc->meta_va);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
for (p = 0; p < rb_desc->nr_page_va; p++) {
|
||||
ret = __share_page(rb_desc->page_va[p]);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
for (p--; p >= 0; p--)
|
||||
__unshare_page(rb_desc->page_va[p]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
|
||||
{
|
||||
struct hyp_trace_buffer *trace_buffer = priv;
|
||||
struct hyp_trace_desc *desc;
|
||||
size_t desc_size;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON(trace_buffer->desc))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
desc_size = trace_buffer_desc_size(size, num_possible_cpus());
|
||||
if (desc_size == SIZE_MAX)
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
desc_size = PAGE_ALIGN(desc_size);
|
||||
desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
|
||||
if (!desc)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = __map_hyp(desc, desc_size);
|
||||
if (ret)
|
||||
goto err_free_desc;
|
||||
|
||||
trace_buffer->desc = desc;
|
||||
|
||||
ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
|
||||
if (ret)
|
||||
goto err_free_desc;
|
||||
|
||||
ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
|
||||
cpu_possible_mask);
|
||||
if (ret)
|
||||
goto err_free_backing;
|
||||
|
||||
ret = hyp_trace_buffer_share_hyp(trace_buffer);
|
||||
if (ret)
|
||||
goto err_free_buffer;
|
||||
|
||||
ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
|
||||
if (ret)
|
||||
goto err_unload_pages;
|
||||
|
||||
return &desc->trace_buffer_desc;
|
||||
|
||||
err_unload_pages:
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
|
||||
|
||||
err_free_buffer:
|
||||
trace_remote_free_buffer(&desc->trace_buffer_desc);
|
||||
|
||||
err_free_backing:
|
||||
hyp_trace_buffer_free_bpages_backing(trace_buffer);
|
||||
|
||||
err_free_desc:
|
||||
free_pages_exact(desc, desc_size);
|
||||
trace_buffer->desc = NULL;
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
|
||||
{
|
||||
struct hyp_trace_buffer *trace_buffer = priv;
|
||||
|
||||
if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
|
||||
return;
|
||||
|
||||
kvm_call_hyp_nvhe(__tracing_unload);
|
||||
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
|
||||
trace_remote_free_buffer(desc);
|
||||
hyp_trace_buffer_free_bpages_backing(trace_buffer);
|
||||
free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
|
||||
trace_buffer->desc = NULL;
|
||||
}
|
||||
|
||||
static int hyp_trace_enable_tracing(bool enable, void *priv)
|
||||
{
|
||||
hyp_trace_clock_enable(&hyp_clock, enable);
|
||||
|
||||
return kvm_call_hyp_nvhe(__tracing_enable, enable);
|
||||
}
|
||||
|
||||
static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu);
|
||||
}
|
||||
|
||||
static int hyp_trace_reset(unsigned int cpu, void *priv)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__tracing_reset, cpu);
|
||||
}
|
||||
|
||||
static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
|
||||
{
|
||||
struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]);
|
||||
struct page *page;
|
||||
atomic_t *enabled;
|
||||
void *map;
|
||||
|
||||
if (is_protected_kvm_enabled())
|
||||
return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable);
|
||||
|
||||
enabled = &event_id->enabled;
|
||||
page = virt_to_page(enabled);
|
||||
map = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
|
||||
enabled = map + offset_in_page(enabled);
|
||||
atomic_set(enabled, enable);
|
||||
|
||||
vunmap(map);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hyp_trace_clock_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_puts(m, "[boot]\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
|
||||
|
||||
static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf,
|
||||
size_t cnt, loff_t *pos)
|
||||
{
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kvm_call_hyp_nvhe(__tracing_write_event, val);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static const struct file_operations hyp_trace_write_event_fops = {
|
||||
.write = hyp_trace_write_event_write,
|
||||
};
|
||||
|
||||
static int hyp_trace_init_tracefs(struct dentry *d, void *priv)
|
||||
{
|
||||
if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops))
|
||||
return -ENOMEM;
|
||||
|
||||
return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ?
|
||||
0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct trace_remote_callbacks trace_remote_callbacks = {
|
||||
.init = hyp_trace_init_tracefs,
|
||||
.load_trace_buffer = hyp_trace_load,
|
||||
.unload_trace_buffer = hyp_trace_unload,
|
||||
.enable_tracing = hyp_trace_enable_tracing,
|
||||
.swap_reader_page = hyp_trace_swap_reader_page,
|
||||
.reset = hyp_trace_reset,
|
||||
.enable_event = hyp_trace_enable_event,
|
||||
};
|
||||
|
||||
static const char *__hyp_enter_exit_reason_str(u8 reason);
|
||||
|
||||
#include <asm/kvm_define_hypevents.h>
|
||||
|
||||
static const char *__hyp_enter_exit_reason_str(u8 reason)
|
||||
{
|
||||
static const char strs[][12] = {
|
||||
"smc",
|
||||
"hvc",
|
||||
"psci",
|
||||
"host_abort",
|
||||
"guest_exit",
|
||||
"eret_host",
|
||||
"eret_guest",
|
||||
"unknown",
|
||||
};
|
||||
|
||||
return strs[min(reason, HYP_REASON_UNKNOWN)];
|
||||
}
|
||||
|
||||
static void __init hyp_trace_init_events(void)
|
||||
{
|
||||
struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
|
||||
struct remote_event *event = __hyp_events_start;
|
||||
int id = 0;
|
||||
|
||||
/* Events on both sides hypervisor are sorted */
|
||||
for (; event < __hyp_events_end; event++, hyp_event_id++, id++)
|
||||
event->id = hyp_event_id->id = id;
|
||||
}
|
||||
|
||||
int __init kvm_hyp_trace_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
return 0;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
const struct arch_timer_erratum_workaround *wa =
|
||||
per_cpu(timer_unstable_counter_workaround, cpu);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) &&
|
||||
wa && wa->read_cntvct_el0) {
|
||||
pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
hyp_trace_init_events();
|
||||
|
||||
return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
|
||||
__hyp_events_start, __hyp_events_end - __hyp_events_start);
|
||||
}
|
||||
11
arch/arm64/kvm/hyp_trace.h
Normal file
11
arch/arm64/kvm/hyp_trace.h
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __ARM64_KVM_HYP_TRACE_H__
|
||||
#define __ARM64_KVM_HYP_TRACE_H__
|
||||
|
||||
#ifdef CONFIG_NVHE_EL2_TRACING
|
||||
int kvm_hyp_trace_init(void);
|
||||
#else
|
||||
static inline int kvm_hyp_trace_init(void) { return 0; }
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -197,7 +197,7 @@ static void hyp_dump_backtrace(unsigned long hyp_offset)
|
|||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
#ifdef CONFIG_PKVM_STACKTRACE
|
||||
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
|
||||
pkvm_stacktrace);
|
||||
|
||||
|
|
@ -225,12 +225,12 @@ static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
|||
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
|
||||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#else /* !CONFIG_PKVM_STACKTRACE */
|
||||
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
|
||||
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PKVM_STACKTRACE\n");
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
#endif /* CONFIG_PKVM_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
|
||||
|
|
|
|||
|
|
@ -664,6 +664,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
|
|||
fsnotify_create(d_inode(dentry->d_parent), dentry);
|
||||
return tracefs_end_creating(dentry);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracefs_create_file);
|
||||
|
||||
static struct dentry *__create_dir(const char *name, struct dentry *parent,
|
||||
const struct inode_operations *ops)
|
||||
|
|
|
|||
|
|
@ -251,4 +251,62 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
|
|||
void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
|
||||
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
|
||||
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
|
||||
|
||||
struct ring_buffer_desc {
|
||||
int cpu;
|
||||
unsigned int nr_page_va; /* excludes the meta page */
|
||||
unsigned long meta_va;
|
||||
unsigned long page_va[] __counted_by(nr_page_va);
|
||||
};
|
||||
|
||||
struct trace_buffer_desc {
|
||||
int nr_cpus;
|
||||
size_t struct_len;
|
||||
char __data[]; /* list of ring_buffer_desc */
|
||||
};
|
||||
|
||||
static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc)
|
||||
{
|
||||
size_t len = struct_size(desc, page_va, desc->nr_page_va);
|
||||
|
||||
return (struct ring_buffer_desc *)((void *)desc + len);
|
||||
}
|
||||
|
||||
static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc)
|
||||
{
|
||||
return (struct ring_buffer_desc *)(&desc->__data[0]);
|
||||
}
|
||||
|
||||
static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus)
|
||||
{
|
||||
unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
|
||||
struct ring_buffer_desc *rbdesc;
|
||||
|
||||
return size_add(offsetof(struct trace_buffer_desc, __data),
|
||||
size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages)));
|
||||
}
|
||||
|
||||
#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \
|
||||
for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \
|
||||
(__cpu) < (__trace_pdesc)->nr_cpus; \
|
||||
(__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc))
|
||||
|
||||
struct ring_buffer_remote {
|
||||
struct trace_buffer_desc *desc;
|
||||
int (*swap_reader_page)(unsigned int cpu, void *priv);
|
||||
int (*reset)(unsigned int cpu, void *priv);
|
||||
void *priv;
|
||||
};
|
||||
|
||||
int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu);
|
||||
|
||||
struct trace_buffer *
|
||||
__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
|
||||
struct lock_class_key *key);
|
||||
|
||||
#define ring_buffer_alloc_remote(remote) \
|
||||
({ \
|
||||
static struct lock_class_key __key; \
|
||||
__ring_buffer_alloc_remote(remote, &__key); \
|
||||
})
|
||||
#endif /* _LINUX_RING_BUFFER_H */
|
||||
|
|
|
|||
41
include/linux/ring_buffer_types.h
Normal file
41
include/linux/ring_buffer_types.h
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_RING_BUFFER_TYPES_H
|
||||
#define _LINUX_RING_BUFFER_TYPES_H
|
||||
|
||||
#include <asm/local.h>
|
||||
|
||||
#define TS_SHIFT 27
|
||||
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
|
||||
#define TS_DELTA_TEST (~TS_MASK)
|
||||
|
||||
/*
|
||||
* We need to fit the time_stamp delta into 27 bits.
|
||||
*/
|
||||
static inline bool test_time_stamp(u64 delta)
|
||||
{
|
||||
return !!(delta & TS_DELTA_TEST);
|
||||
}
|
||||
|
||||
#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
|
||||
|
||||
#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
|
||||
#define RB_ALIGNMENT 4U
|
||||
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
|
||||
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
|
||||
|
||||
#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 0
|
||||
# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
|
||||
#else
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 1
|
||||
# define RB_ARCH_ALIGNMENT 8U
|
||||
#endif
|
||||
|
||||
#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
|
||||
|
||||
struct buffer_data_page {
|
||||
u64 time_stamp; /* page time stamp */
|
||||
local_t commit; /* write committed index */
|
||||
unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
|
||||
};
|
||||
#endif
|
||||
65
include/linux/simple_ring_buffer.h
Normal file
65
include/linux/simple_ring_buffer.h
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SIMPLE_RING_BUFFER_H
|
||||
#define _LINUX_SIMPLE_RING_BUFFER_H
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <linux/ring_buffer_types.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Ideally those struct would stay private but the caller needs to know
|
||||
* the allocation size for simple_ring_buffer_init().
|
||||
*/
|
||||
struct simple_buffer_page {
|
||||
struct list_head link;
|
||||
struct buffer_data_page *page;
|
||||
u64 entries;
|
||||
u32 write;
|
||||
u32 id;
|
||||
};
|
||||
|
||||
struct simple_rb_per_cpu {
|
||||
struct simple_buffer_page *tail_page;
|
||||
struct simple_buffer_page *reader_page;
|
||||
struct simple_buffer_page *head_page;
|
||||
struct simple_buffer_page *bpages;
|
||||
struct trace_buffer_meta *meta;
|
||||
u32 nr_pages;
|
||||
|
||||
#define SIMPLE_RB_UNAVAILABLE 0
|
||||
#define SIMPLE_RB_READY 1
|
||||
#define SIMPLE_RB_WRITING 2
|
||||
u32 status;
|
||||
|
||||
u64 last_overrun;
|
||||
u64 write_stamp;
|
||||
|
||||
struct simple_rb_cbs *cbs;
|
||||
};
|
||||
|
||||
int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc);
|
||||
|
||||
void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
|
||||
u64 timestamp);
|
||||
|
||||
void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable);
|
||||
|
||||
int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
|
||||
|
||||
int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc,
|
||||
void *(*load_page)(unsigned long va),
|
||||
void (*unload_page)(void *va));
|
||||
|
||||
void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
void (*unload_page)(void *));
|
||||
#endif
|
||||
48
include/linux/trace_remote.h
Normal file
48
include/linux/trace_remote.h
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_TRACE_REMOTE_H
|
||||
#define _LINUX_TRACE_REMOTE_H
|
||||
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <linux/trace_remote_event.h>
|
||||
|
||||
/**
|
||||
* struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
|
||||
* @init: Called once the remote has been registered. Allows the
|
||||
* caller to extend the Tracefs remote directory
|
||||
* @load_trace_buffer: Called before Tracefs accesses the trace buffer for the first
|
||||
* time. Must return a &trace_buffer_desc
|
||||
* (most likely filled with trace_remote_alloc_buffer())
|
||||
* @unload_trace_buffer:
|
||||
* Called once Tracefs has no use for the trace buffer
|
||||
* (most likely call trace_remote_free_buffer())
|
||||
* @enable_tracing: Called on Tracefs tracing_on. It is expected from the
|
||||
* remote to allow writing.
|
||||
* @swap_reader_page: Called when Tracefs consumes a new page from a
|
||||
* ring-buffer. It is expected from the remote to isolate a
|
||||
* @reset: Called on `echo 0 > trace`. It is expected from the
|
||||
* remote to reset all ring-buffer pages.
|
||||
* new reader-page from the @cpu ring-buffer.
|
||||
* @enable_event: Called on events/event_name/enable. It is expected from
|
||||
* the remote to allow the writing event @id.
|
||||
*/
|
||||
struct trace_remote_callbacks {
|
||||
int (*init)(struct dentry *d, void *priv);
|
||||
struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
|
||||
void (*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
|
||||
int (*enable_tracing)(bool enable, void *priv);
|
||||
int (*swap_reader_page)(unsigned int cpu, void *priv);
|
||||
int (*reset)(unsigned int cpu, void *priv);
|
||||
int (*enable_event)(unsigned short id, bool enable, void *priv);
|
||||
};
|
||||
|
||||
int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
|
||||
struct remote_event *events, size_t nr_events);
|
||||
|
||||
int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
|
||||
const struct cpumask *cpumask);
|
||||
|
||||
void trace_remote_free_buffer(struct trace_buffer_desc *desc);
|
||||
|
||||
#endif
|
||||
33
include/linux/trace_remote_event.h
Normal file
33
include/linux/trace_remote_event.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_TRACE_REMOTE_EVENTS_H
|
||||
#define _LINUX_TRACE_REMOTE_EVENTS_H
|
||||
|
||||
struct trace_remote;
|
||||
struct trace_event_fields;
|
||||
struct trace_seq;
|
||||
|
||||
struct remote_event_hdr {
|
||||
unsigned short id;
|
||||
};
|
||||
|
||||
#define REMOTE_EVENT_NAME_MAX 30
|
||||
struct remote_event {
|
||||
char name[REMOTE_EVENT_NAME_MAX];
|
||||
unsigned short id;
|
||||
bool enabled;
|
||||
struct trace_remote *remote;
|
||||
struct trace_event_fields *fields;
|
||||
char *print_fmt;
|
||||
void (*print)(void *evt, struct trace_seq *seq);
|
||||
};
|
||||
|
||||
#define RE_STRUCT(__args...) __args
|
||||
#define re_field(__type, __field) __type __field;
|
||||
|
||||
#define REMOTE_EVENT_FORMAT(__name, __struct) \
|
||||
struct remote_event_format_##__name { \
|
||||
struct remote_event_hdr hdr; \
|
||||
__struct \
|
||||
}
|
||||
#endif
|
||||
73
include/trace/define_remote_events.h
Normal file
73
include/trace/define_remote_events.h
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <linux/trace_events.h>
|
||||
#include <linux/trace_remote_event.h>
|
||||
#include <linux/trace_seq.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE(__file) __stringify(../../__file)
|
||||
|
||||
#ifdef REMOTE_EVENT_SECTION
|
||||
# define __REMOTE_EVENT_SECTION(__name) __used __section(REMOTE_EVENT_SECTION"."#__name)
|
||||
#else
|
||||
# define __REMOTE_EVENT_SECTION(__name)
|
||||
#endif
|
||||
|
||||
#define REMOTE_PRINTK_COUNT_ARGS(__args...) \
|
||||
__COUNT_ARGS(, ##__args, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0)
|
||||
|
||||
#define __remote_printk0() \
|
||||
trace_seq_putc(seq, '\n')
|
||||
|
||||
#define __remote_printk1(__fmt) \
|
||||
trace_seq_puts(seq, " " __fmt "\n") \
|
||||
|
||||
#define __remote_printk2(__fmt, __args...) \
|
||||
do { \
|
||||
trace_seq_putc(seq, ' '); \
|
||||
trace_seq_printf(seq, __fmt, __args); \
|
||||
trace_seq_putc(seq, '\n'); \
|
||||
} while (0)
|
||||
|
||||
/* Apply the appropriate trace_seq sequence according to the number of arguments */
|
||||
#define remote_printk(__args...) \
|
||||
CONCATENATE(__remote_printk, REMOTE_PRINTK_COUNT_ARGS(__args))(__args)
|
||||
|
||||
#define RE_PRINTK(__args...) __args
|
||||
|
||||
#define REMOTE_EVENT(__name, __id, __struct, __printk) \
|
||||
REMOTE_EVENT_FORMAT(__name, __struct); \
|
||||
static void remote_event_print_##__name(void *evt, struct trace_seq *seq) \
|
||||
{ \
|
||||
struct remote_event_format_##__name __maybe_unused *__entry = evt; \
|
||||
trace_seq_puts(seq, #__name); \
|
||||
remote_printk(__printk); \
|
||||
}
|
||||
#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
|
||||
|
||||
#undef REMOTE_EVENT
|
||||
#undef RE_PRINTK
|
||||
#undef re_field
|
||||
#define re_field(__type, __field) \
|
||||
{ \
|
||||
.type = #__type, .name = #__field, \
|
||||
.size = sizeof(__type), .align = __alignof__(__type), \
|
||||
.is_signed = is_signed_type(__type), \
|
||||
},
|
||||
#define __entry REC
|
||||
#define RE_PRINTK(__fmt, __args...) "\"" __fmt "\", " __stringify(__args)
|
||||
#define REMOTE_EVENT(__name, __id, __struct, __printk) \
|
||||
static struct trace_event_fields remote_event_fields_##__name[] = { \
|
||||
__struct \
|
||||
{} \
|
||||
}; \
|
||||
static char remote_event_print_fmt_##__name[] = __printk; \
|
||||
static struct remote_event __REMOTE_EVENT_SECTION(__name) \
|
||||
remote_event_##__name = { \
|
||||
.name = #__name, \
|
||||
.id = __id, \
|
||||
.fields = remote_event_fields_##__name, \
|
||||
.print_fmt = remote_event_print_fmt_##__name, \
|
||||
.print = remote_event_print_##__name, \
|
||||
}
|
||||
#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
|
||||
|
|
@ -17,8 +17,8 @@
|
|||
* @entries: Number of entries in the ring-buffer.
|
||||
* @overrun: Number of entries lost in the ring-buffer.
|
||||
* @read: Number of entries that have been read.
|
||||
* @Reserved1: Internal use only.
|
||||
* @Reserved2: Internal use only.
|
||||
* @pages_lost: Number of pages overwritten by the writer.
|
||||
* @pages_touched: Number of pages written by the writer.
|
||||
*/
|
||||
struct trace_buffer_meta {
|
||||
__u32 meta_page_size;
|
||||
|
|
@ -39,8 +39,8 @@ struct trace_buffer_meta {
|
|||
__u64 overrun;
|
||||
__u64 read;
|
||||
|
||||
__u64 Reserved1;
|
||||
__u64 Reserved2;
|
||||
__u64 pages_lost;
|
||||
__u64 pages_touched;
|
||||
};
|
||||
|
||||
#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)
|
||||
|
|
|
|||
|
|
@ -1281,4 +1281,18 @@ config HIST_TRIGGERS_DEBUG
|
|||
|
||||
source "kernel/trace/rv/Kconfig"
|
||||
|
||||
config TRACE_REMOTE
|
||||
bool
|
||||
|
||||
config SIMPLE_RING_BUFFER
|
||||
bool
|
||||
|
||||
config TRACE_REMOTE_TEST
|
||||
tristate "Test module for remote tracing"
|
||||
select TRACE_REMOTE
|
||||
select SIMPLE_RING_BUFFER
|
||||
help
|
||||
This trace remote includes a ring-buffer writer implementation using
|
||||
"simple_ring_buffer". This is solely intending for testing.
|
||||
|
||||
endif # FTRACE
|
||||
|
|
|
|||
|
|
@ -128,4 +128,63 @@ obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
|
|||
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
|
||||
obj-$(CONFIG_RV) += rv/
|
||||
|
||||
obj-$(CONFIG_TRACE_REMOTE) += trace_remote.o
|
||||
obj-$(CONFIG_SIMPLE_RING_BUFFER) += simple_ring_buffer.o
|
||||
obj-$(CONFIG_TRACE_REMOTE_TEST) += remote_test.o
|
||||
|
||||
#
|
||||
# simple_ring_buffer is used by the pKVM hypervisor which does not have access
|
||||
# to all kernel symbols. Fail the build if forbidden symbols are found.
|
||||
#
|
||||
# undefsyms_base generates a set of compiler and tooling-generated symbols that can
|
||||
# safely be ignored for simple_ring_buffer.
|
||||
#
|
||||
filechk_undefsyms_base = \
|
||||
echo '$(pound)include <linux/atomic.h>'; \
|
||||
echo '$(pound)include <linux/string.h>'; \
|
||||
echo '$(pound)include <asm/page.h>'; \
|
||||
echo 'static char page[PAGE_SIZE] __aligned(PAGE_SIZE);'; \
|
||||
echo 'void undefsyms_base(void *p, int n);'; \
|
||||
echo 'void undefsyms_base(void *p, int n) {'; \
|
||||
echo ' char buffer[256] = { 0 };'; \
|
||||
echo ' u32 u = 0;'; \
|
||||
echo ' memset((char * volatile)page, 8, PAGE_SIZE);'; \
|
||||
echo ' memset((char * volatile)buffer, 8, sizeof(buffer));'; \
|
||||
echo ' memcpy((void * volatile)p, buffer, sizeof(buffer));'; \
|
||||
echo ' cmpxchg((u32 * volatile)&u, 0, 8);'; \
|
||||
echo ' WARN_ON(n == 0xdeadbeef);'; \
|
||||
echo '}'
|
||||
|
||||
$(obj)/undefsyms_base.c: FORCE
|
||||
$(call filechk,undefsyms_base)
|
||||
|
||||
clean-files += undefsyms_base.c
|
||||
|
||||
$(obj)/undefsyms_base.o: $(obj)/undefsyms_base.c
|
||||
|
||||
targets += undefsyms_base.o
|
||||
|
||||
# Ensure KASAN is enabled to avoid logic that may disable FORTIFY_SOURCE when
|
||||
# KASAN is not enabled. undefsyms_base.o does not automatically get KASAN flags
|
||||
# because it is not linked into vmlinux.
|
||||
KASAN_SANITIZE_undefsyms_base.o := y
|
||||
|
||||
UNDEFINED_ALLOWLIST = __asan __gcov __kasan __kcsan __hwasan __sancov __sanitizer __tsan __ubsan __x86_indirect_thunk \
|
||||
__msan simple_ring_buffer \
|
||||
$(shell $(NM) -u $(obj)/undefsyms_base.o 2>/dev/null | awk '{print $$2}')
|
||||
|
||||
quiet_cmd_check_undefined = NM $<
|
||||
cmd_check_undefined = \
|
||||
undefsyms=$$($(NM) -u $< | grep -v $(addprefix -e , $(UNDEFINED_ALLOWLIST)) || true); \
|
||||
if [ -n "$$undefsyms" ]; then \
|
||||
echo "Unexpected symbols in $<:" >&2; \
|
||||
echo "$$undefsyms" >&2; \
|
||||
false; \
|
||||
fi
|
||||
|
||||
$(obj)/%.o.checked: $(obj)/%.o $(obj)/undefsyms_base.o FORCE
|
||||
$(call if_changed,check_undefined)
|
||||
|
||||
always-$(CONFIG_SIMPLE_RING_BUFFER) += simple_ring_buffer.o.checked
|
||||
|
||||
libftrace-y := ftrace.o
|
||||
|
|
|
|||
261
kernel/trace/remote_test.c
Normal file
261
kernel/trace/remote_test.c
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 - Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/simple_ring_buffer.h>
|
||||
#include <linux/trace_remote.h>
|
||||
#include <linux/tracefs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define REMOTE_EVENT_INCLUDE_FILE kernel/trace/remote_test_events.h
|
||||
#include <trace/define_remote_events.h>
|
||||
|
||||
static DEFINE_PER_CPU(struct simple_rb_per_cpu *, simple_rbs);
|
||||
static struct trace_buffer_desc *remote_test_buffer_desc;
|
||||
|
||||
/*
|
||||
* The trace_remote lock already serializes accesses from the trace_remote_callbacks.
|
||||
* However write_event can still race with load/unload.
|
||||
*/
|
||||
static DEFINE_MUTEX(simple_rbs_lock);
|
||||
|
||||
static int remote_test_load_simple_rb(int cpu, struct ring_buffer_desc *rb_desc)
|
||||
{
|
||||
struct simple_rb_per_cpu *cpu_buffer;
|
||||
struct simple_buffer_page *bpages;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
cpu_buffer = kmalloc_obj(*cpu_buffer);
|
||||
if (!cpu_buffer)
|
||||
return ret;
|
||||
|
||||
bpages = kmalloc_objs(*bpages, rb_desc->nr_page_va);
|
||||
if (!bpages)
|
||||
goto err_free_cpu_buffer;
|
||||
|
||||
ret = simple_ring_buffer_init(cpu_buffer, bpages, rb_desc);
|
||||
if (ret)
|
||||
goto err_free_bpages;
|
||||
|
||||
scoped_guard(mutex, &simple_rbs_lock) {
|
||||
WARN_ON(*per_cpu_ptr(&simple_rbs, cpu));
|
||||
*per_cpu_ptr(&simple_rbs, cpu) = cpu_buffer;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_bpages:
|
||||
kfree(bpages);
|
||||
|
||||
err_free_cpu_buffer:
|
||||
kfree(cpu_buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void remote_test_unload_simple_rb(int cpu)
|
||||
{
|
||||
struct simple_rb_per_cpu *cpu_buffer = *per_cpu_ptr(&simple_rbs, cpu);
|
||||
struct simple_buffer_page *bpages;
|
||||
|
||||
if (!cpu_buffer)
|
||||
return;
|
||||
|
||||
guard(mutex)(&simple_rbs_lock);
|
||||
|
||||
bpages = cpu_buffer->bpages;
|
||||
simple_ring_buffer_unload(cpu_buffer);
|
||||
kfree(bpages);
|
||||
kfree(cpu_buffer);
|
||||
*per_cpu_ptr(&simple_rbs, cpu) = NULL;
|
||||
}
|
||||
|
||||
static struct trace_buffer_desc *remote_test_load(unsigned long size, void *unused)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
struct trace_buffer_desc *desc;
|
||||
size_t desc_size;
|
||||
int cpu, ret;
|
||||
|
||||
if (WARN_ON(remote_test_buffer_desc))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
desc_size = trace_buffer_desc_size(size, num_possible_cpus());
|
||||
if (desc_size == SIZE_MAX) {
|
||||
ret = -E2BIG;
|
||||
goto err;
|
||||
}
|
||||
|
||||
desc = kmalloc(desc_size, GFP_KERNEL);
|
||||
if (!desc) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = trace_remote_alloc_buffer(desc, desc_size, size, cpu_possible_mask);
|
||||
if (ret)
|
||||
goto err_free_desc;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, desc) {
|
||||
ret = remote_test_load_simple_rb(rb_desc->cpu, rb_desc);
|
||||
if (ret)
|
||||
goto err_unload;
|
||||
}
|
||||
|
||||
remote_test_buffer_desc = desc;
|
||||
|
||||
return remote_test_buffer_desc;
|
||||
|
||||
err_unload:
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, remote_test_buffer_desc)
|
||||
remote_test_unload_simple_rb(rb_desc->cpu);
|
||||
trace_remote_free_buffer(remote_test_buffer_desc);
|
||||
|
||||
err_free_desc:
|
||||
kfree(desc);
|
||||
|
||||
err:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void remote_test_unload(struct trace_buffer_desc *desc, void *unused)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu;
|
||||
|
||||
if (WARN_ON(desc != remote_test_buffer_desc))
|
||||
return;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, desc)
|
||||
remote_test_unload_simple_rb(rb_desc->cpu);
|
||||
|
||||
remote_test_buffer_desc = NULL;
|
||||
trace_remote_free_buffer(desc);
|
||||
kfree(desc);
|
||||
}
|
||||
|
||||
static int remote_test_enable_tracing(bool enable, void *unused)
|
||||
{
|
||||
struct ring_buffer_desc *rb_desc;
|
||||
int cpu;
|
||||
|
||||
if (!remote_test_buffer_desc)
|
||||
return -ENODEV;
|
||||
|
||||
for_each_ring_buffer_desc(rb_desc, cpu, remote_test_buffer_desc)
|
||||
WARN_ON(simple_ring_buffer_enable_tracing(*per_cpu_ptr(&simple_rbs, rb_desc->cpu),
|
||||
enable));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int remote_test_swap_reader_page(unsigned int cpu, void *unused)
|
||||
{
|
||||
struct simple_rb_per_cpu *cpu_buffer;
|
||||
|
||||
if (cpu >= NR_CPUS)
|
||||
return -EINVAL;
|
||||
|
||||
cpu_buffer = *per_cpu_ptr(&simple_rbs, cpu);
|
||||
if (!cpu_buffer)
|
||||
return -EINVAL;
|
||||
|
||||
return simple_ring_buffer_swap_reader_page(cpu_buffer);
|
||||
}
|
||||
|
||||
static int remote_test_reset(unsigned int cpu, void *unused)
|
||||
{
|
||||
struct simple_rb_per_cpu *cpu_buffer;
|
||||
|
||||
if (cpu >= NR_CPUS)
|
||||
return -EINVAL;
|
||||
|
||||
cpu_buffer = *per_cpu_ptr(&simple_rbs, cpu);
|
||||
if (!cpu_buffer)
|
||||
return -EINVAL;
|
||||
|
||||
return simple_ring_buffer_reset(cpu_buffer);
|
||||
}
|
||||
|
||||
static int remote_test_enable_event(unsigned short id, bool enable, void *unused)
|
||||
{
|
||||
if (id != REMOTE_TEST_EVENT_ID)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Let's just use the struct remote_event enabled field that is turned on and off by
|
||||
* trace_remote. This is a bit racy but good enough for a simple test module.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
write_event_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *pos)
|
||||
{
|
||||
struct remote_event_format_selftest *evt_test;
|
||||
struct simple_rb_per_cpu *cpu_buffer;
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
guard(mutex)(&simple_rbs_lock);
|
||||
|
||||
if (!remote_event_selftest.enabled)
|
||||
return -ENODEV;
|
||||
|
||||
guard(preempt)();
|
||||
|
||||
cpu_buffer = *this_cpu_ptr(&simple_rbs);
|
||||
if (!cpu_buffer)
|
||||
return -ENODEV;
|
||||
|
||||
evt_test = simple_ring_buffer_reserve(cpu_buffer,
|
||||
sizeof(struct remote_event_format_selftest),
|
||||
trace_clock_global());
|
||||
if (!evt_test)
|
||||
return -ENODEV;
|
||||
|
||||
evt_test->hdr.id = REMOTE_TEST_EVENT_ID;
|
||||
evt_test->id = val;
|
||||
|
||||
simple_ring_buffer_commit(cpu_buffer);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static const struct file_operations write_event_fops = {
|
||||
.write = write_event_write,
|
||||
};
|
||||
|
||||
static int remote_test_init_tracefs(struct dentry *d, void *unused)
|
||||
{
|
||||
return tracefs_create_file("write_event", 0200, d, NULL, &write_event_fops) ?
|
||||
0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static struct trace_remote_callbacks trace_remote_callbacks = {
|
||||
.init = remote_test_init_tracefs,
|
||||
.load_trace_buffer = remote_test_load,
|
||||
.unload_trace_buffer = remote_test_unload,
|
||||
.enable_tracing = remote_test_enable_tracing,
|
||||
.swap_reader_page = remote_test_swap_reader_page,
|
||||
.reset = remote_test_reset,
|
||||
.enable_event = remote_test_enable_event,
|
||||
};
|
||||
|
||||
static int __init remote_test_init(void)
|
||||
{
|
||||
return trace_remote_register("test", &trace_remote_callbacks, NULL,
|
||||
&remote_event_selftest, 1);
|
||||
}
|
||||
|
||||
module_init(remote_test_init);
|
||||
|
||||
MODULE_DESCRIPTION("Test module for the trace remote interface");
|
||||
MODULE_AUTHOR("Vincent Donnefort");
|
||||
MODULE_LICENSE("GPL");
|
||||
10
kernel/trace/remote_test_events.h
Normal file
10
kernel/trace/remote_test_events.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#define REMOTE_TEST_EVENT_ID 1
|
||||
|
||||
REMOTE_EVENT(selftest, REMOTE_TEST_EVENT_ID,
|
||||
RE_STRUCT(
|
||||
re_field(u64, id)
|
||||
),
|
||||
RE_PRINTK("id=%llu", __entry->id)
|
||||
);
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
*
|
||||
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
|
||||
*/
|
||||
#include <linux/ring_buffer_types.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/trace_recursion.h>
|
||||
#include <linux/trace_events.h>
|
||||
|
|
@ -157,23 +158,6 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
|
|||
/* Used for individual buffers (after the counter) */
|
||||
#define RB_BUFFER_OFF (1 << 20)
|
||||
|
||||
#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
|
||||
|
||||
#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
|
||||
#define RB_ALIGNMENT 4U
|
||||
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
|
||||
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
|
||||
|
||||
#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 0
|
||||
# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
|
||||
#else
|
||||
# define RB_FORCE_8BYTE_ALIGNMENT 1
|
||||
# define RB_ARCH_ALIGNMENT 8U
|
||||
#endif
|
||||
|
||||
#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
|
||||
|
||||
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
|
||||
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
|
||||
|
||||
|
|
@ -316,10 +300,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
|
|||
#define for_each_online_buffer_cpu(buffer, cpu) \
|
||||
for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
|
||||
|
||||
#define TS_SHIFT 27
|
||||
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
|
||||
#define TS_DELTA_TEST (~TS_MASK)
|
||||
|
||||
static u64 rb_event_time_stamp(struct ring_buffer_event *event)
|
||||
{
|
||||
u64 ts;
|
||||
|
|
@ -338,12 +318,6 @@ static u64 rb_event_time_stamp(struct ring_buffer_event *event)
|
|||
|
||||
#define RB_MISSED_MASK (3 << 30)
|
||||
|
||||
struct buffer_data_page {
|
||||
u64 time_stamp; /* page time stamp */
|
||||
local_t commit; /* write committed index */
|
||||
unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
|
||||
};
|
||||
|
||||
struct buffer_data_read_page {
|
||||
unsigned order; /* order of the page */
|
||||
struct buffer_data_page *data; /* actual data, stored in this page */
|
||||
|
|
@ -437,14 +411,6 @@ static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
|
|||
return dpage;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to fit the time_stamp delta into 27 bits.
|
||||
*/
|
||||
static inline bool test_time_stamp(u64 delta)
|
||||
{
|
||||
return !!(delta & TS_DELTA_TEST);
|
||||
}
|
||||
|
||||
struct rb_irq_work {
|
||||
struct irq_work work;
|
||||
wait_queue_head_t waiters;
|
||||
|
|
@ -555,10 +521,12 @@ struct ring_buffer_per_cpu {
|
|||
unsigned int mapped;
|
||||
unsigned int user_mapped; /* user space mapping */
|
||||
struct mutex mapping_lock;
|
||||
unsigned long *subbuf_ids; /* ID to subbuf VA */
|
||||
struct buffer_page **subbuf_ids; /* ID to subbuf VA */
|
||||
struct trace_buffer_meta *meta_page;
|
||||
struct ring_buffer_cpu_meta *ring_meta;
|
||||
|
||||
struct ring_buffer_remote *remote;
|
||||
|
||||
/* ring buffer pages to update, > 0 to add, < 0 to remove */
|
||||
long nr_pages_to_update;
|
||||
struct list_head new_pages; /* new pages to add */
|
||||
|
|
@ -581,6 +549,8 @@ struct trace_buffer {
|
|||
|
||||
struct ring_buffer_per_cpu **buffers;
|
||||
|
||||
struct ring_buffer_remote *remote;
|
||||
|
||||
struct hlist_node node;
|
||||
u64 (*clock)(void);
|
||||
|
||||
|
|
@ -636,7 +606,8 @@ int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq
|
|||
trace_seq_printf(s, "\tfield: char data;\t"
|
||||
"offset:%u;\tsize:%u;\tsigned:%u;\n",
|
||||
(unsigned int)offsetof(typeof(field), data),
|
||||
(unsigned int)buffer->subbuf_size,
|
||||
(unsigned int)(buffer ? buffer->subbuf_size :
|
||||
PAGE_SIZE - BUF_PAGE_HDR_SIZE),
|
||||
(unsigned int)is_signed_type(char));
|
||||
|
||||
return !trace_seq_has_overflowed(s);
|
||||
|
|
@ -2238,6 +2209,40 @@ static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
static struct ring_buffer_desc *ring_buffer_desc(struct trace_buffer_desc *trace_desc, int cpu)
|
||||
{
|
||||
struct ring_buffer_desc *desc, *end;
|
||||
size_t len;
|
||||
int i;
|
||||
|
||||
if (!trace_desc)
|
||||
return NULL;
|
||||
|
||||
if (cpu >= trace_desc->nr_cpus)
|
||||
return NULL;
|
||||
|
||||
end = (struct ring_buffer_desc *)((void *)trace_desc + trace_desc->struct_len);
|
||||
desc = __first_ring_buffer_desc(trace_desc);
|
||||
len = struct_size(desc, page_va, desc->nr_page_va);
|
||||
desc = (struct ring_buffer_desc *)((void *)desc + (len * cpu));
|
||||
|
||||
if (desc < end && desc->cpu == cpu)
|
||||
return desc;
|
||||
|
||||
/* Missing CPUs, need to linear search */
|
||||
for_each_ring_buffer_desc(desc, i, trace_desc) {
|
||||
if (desc->cpu == cpu)
|
||||
return desc;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *ring_buffer_desc_page(struct ring_buffer_desc *desc, int page_id)
|
||||
{
|
||||
return page_id > desc->nr_page_va ? NULL : (void *)desc->page_va[page_id];
|
||||
}
|
||||
|
||||
static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
long nr_pages, struct list_head *pages)
|
||||
{
|
||||
|
|
@ -2245,6 +2250,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
struct ring_buffer_cpu_meta *meta = NULL;
|
||||
struct buffer_page *bpage, *tmp;
|
||||
bool user_thread = current->mm != NULL;
|
||||
struct ring_buffer_desc *desc = NULL;
|
||||
long i;
|
||||
|
||||
/*
|
||||
|
|
@ -2273,6 +2279,12 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
if (buffer->range_addr_start)
|
||||
meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
|
||||
|
||||
if (buffer->remote) {
|
||||
desc = ring_buffer_desc(buffer->remote->desc, cpu_buffer->cpu);
|
||||
if (!desc || WARN_ON(desc->nr_page_va != (nr_pages + 1)))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
|
||||
bpage = alloc_cpu_page(cpu_buffer->cpu);
|
||||
|
|
@ -2297,6 +2309,16 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
rb_meta_buffer_update(cpu_buffer, bpage);
|
||||
bpage->range = 1;
|
||||
bpage->id = i + 1;
|
||||
} else if (desc) {
|
||||
void *p = ring_buffer_desc_page(desc, i + 1);
|
||||
|
||||
if (WARN_ON(!p))
|
||||
goto free_pages;
|
||||
|
||||
bpage->page = p;
|
||||
bpage->range = 1; /* bpage->page can't be freed */
|
||||
bpage->id = i + 1;
|
||||
cpu_buffer->subbuf_ids[i + 1] = bpage;
|
||||
} else {
|
||||
int order = cpu_buffer->buffer->subbuf_order;
|
||||
bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
|
||||
|
|
@ -2394,6 +2416,30 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
|
|||
if (cpu_buffer->ring_meta->head_buffer)
|
||||
rb_meta_buffer_update(cpu_buffer, bpage);
|
||||
bpage->range = 1;
|
||||
} else if (buffer->remote) {
|
||||
struct ring_buffer_desc *desc = ring_buffer_desc(buffer->remote->desc, cpu);
|
||||
|
||||
if (!desc)
|
||||
goto fail_free_reader;
|
||||
|
||||
cpu_buffer->remote = buffer->remote;
|
||||
cpu_buffer->meta_page = (struct trace_buffer_meta *)(void *)desc->meta_va;
|
||||
cpu_buffer->nr_pages = nr_pages;
|
||||
cpu_buffer->subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1,
|
||||
sizeof(*cpu_buffer->subbuf_ids), GFP_KERNEL);
|
||||
if (!cpu_buffer->subbuf_ids)
|
||||
goto fail_free_reader;
|
||||
|
||||
/* Remote buffers are read-only and immutable */
|
||||
atomic_inc(&cpu_buffer->record_disabled);
|
||||
atomic_inc(&cpu_buffer->resize_disabled);
|
||||
|
||||
bpage->page = ring_buffer_desc_page(desc, cpu_buffer->meta_page->reader.id);
|
||||
if (!bpage->page)
|
||||
goto fail_free_reader;
|
||||
|
||||
bpage->range = 1;
|
||||
cpu_buffer->subbuf_ids[0] = bpage;
|
||||
} else {
|
||||
int order = cpu_buffer->buffer->subbuf_order;
|
||||
bpage->page = alloc_cpu_data(cpu, order);
|
||||
|
|
@ -2453,6 +2499,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
|
|||
|
||||
irq_work_sync(&cpu_buffer->irq_work.work);
|
||||
|
||||
if (cpu_buffer->remote)
|
||||
kfree(cpu_buffer->subbuf_ids);
|
||||
|
||||
free_buffer_page(cpu_buffer->reader_page);
|
||||
|
||||
if (head) {
|
||||
|
|
@ -2475,7 +2524,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
|
|||
int order, unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long scratch_size,
|
||||
struct lock_class_key *key)
|
||||
struct lock_class_key *key,
|
||||
struct ring_buffer_remote *remote)
|
||||
{
|
||||
struct trace_buffer *buffer __free(kfree) = NULL;
|
||||
long nr_pages;
|
||||
|
|
@ -2515,6 +2565,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
|
|||
if (!buffer->buffers)
|
||||
goto fail_free_cpumask;
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
|
||||
/* If start/end are specified, then that overrides size */
|
||||
if (start && end) {
|
||||
unsigned long buffers_start;
|
||||
|
|
@ -2570,6 +2622,15 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
|
|||
buffer->range_addr_end = end;
|
||||
|
||||
rb_range_meta_init(buffer, nr_pages, scratch_size);
|
||||
} else if (remote) {
|
||||
struct ring_buffer_desc *desc = ring_buffer_desc(remote->desc, cpu);
|
||||
|
||||
buffer->remote = remote;
|
||||
/* The writer is remote. This ring-buffer is read-only */
|
||||
atomic_inc(&buffer->record_disabled);
|
||||
nr_pages = desc->nr_page_va - 1;
|
||||
if (nr_pages < 2)
|
||||
goto fail_free_buffers;
|
||||
} else {
|
||||
|
||||
/* need at least two pages */
|
||||
|
|
@ -2578,7 +2639,6 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
|
|||
nr_pages = 2;
|
||||
}
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
cpumask_set_cpu(cpu, buffer->cpumask);
|
||||
buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
|
||||
if (!buffer->buffers[cpu])
|
||||
|
|
@ -2620,7 +2680,7 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
|
|||
struct lock_class_key *key)
|
||||
{
|
||||
/* Default buffer page size - one system page */
|
||||
return alloc_buffer(size, flags, 0, 0, 0, 0, key);
|
||||
return alloc_buffer(size, flags, 0, 0, 0, 0, key, NULL);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
|
||||
|
|
@ -2647,7 +2707,18 @@ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flag
|
|||
struct lock_class_key *key)
|
||||
{
|
||||
return alloc_buffer(size, flags, order, start, start + range_size,
|
||||
scratch_size, key);
|
||||
scratch_size, key, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* __ring_buffer_alloc_remote - allocate a new ring_buffer from a remote
|
||||
* @remote: Contains a description of the ring-buffer pages and remote callbacks.
|
||||
* @key: ring buffer reader_lock_key.
|
||||
*/
|
||||
struct trace_buffer *__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
return alloc_buffer(0, 0, 0, 0, 0, 0, key, remote);
|
||||
}
|
||||
|
||||
void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size)
|
||||
|
|
@ -5274,10 +5345,61 @@ unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(ring_buffer_overruns);
|
||||
|
||||
static bool rb_read_remote_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
local_set(&cpu_buffer->entries, READ_ONCE(cpu_buffer->meta_page->entries));
|
||||
local_set(&cpu_buffer->overrun, READ_ONCE(cpu_buffer->meta_page->overrun));
|
||||
local_set(&cpu_buffer->pages_touched, READ_ONCE(cpu_buffer->meta_page->pages_touched));
|
||||
local_set(&cpu_buffer->pages_lost, READ_ONCE(cpu_buffer->meta_page->pages_lost));
|
||||
|
||||
return rb_num_of_entries(cpu_buffer);
|
||||
}
|
||||
|
||||
static void rb_update_remote_head(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct buffer_page *next, *orig;
|
||||
int retry = 3;
|
||||
|
||||
orig = next = cpu_buffer->head_page;
|
||||
rb_inc_page(&next);
|
||||
|
||||
/* Run after the writer */
|
||||
while (cpu_buffer->head_page->page->time_stamp > next->page->time_stamp) {
|
||||
rb_inc_page(&next);
|
||||
|
||||
rb_list_head_clear(cpu_buffer->head_page->list.prev);
|
||||
rb_inc_page(&cpu_buffer->head_page);
|
||||
rb_set_list_to_head(cpu_buffer->head_page->list.prev);
|
||||
|
||||
if (cpu_buffer->head_page == orig) {
|
||||
if (WARN_ON_ONCE(!(--retry)))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
orig = cpu_buffer->commit_page = cpu_buffer->head_page;
|
||||
retry = 3;
|
||||
|
||||
while (cpu_buffer->commit_page->page->time_stamp < next->page->time_stamp) {
|
||||
rb_inc_page(&next);
|
||||
rb_inc_page(&cpu_buffer->commit_page);
|
||||
|
||||
if (cpu_buffer->commit_page == orig) {
|
||||
if (WARN_ON_ONCE(!(--retry)))
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rb_iter_reset(struct ring_buffer_iter *iter)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
|
||||
|
||||
if (cpu_buffer->remote) {
|
||||
rb_read_remote_meta_page(cpu_buffer);
|
||||
rb_update_remote_head(cpu_buffer);
|
||||
}
|
||||
|
||||
/* Iterator usage is expected to have record disabled */
|
||||
iter->head_page = cpu_buffer->reader_page;
|
||||
iter->head = cpu_buffer->reader_page->read;
|
||||
|
|
@ -5428,7 +5550,65 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
|
|||
}
|
||||
|
||||
static struct buffer_page *
|
||||
rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
__rb_get_reader_page_from_remote(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct buffer_page *new_reader, *prev_reader, *prev_head, *new_head, *last;
|
||||
|
||||
if (!rb_read_remote_meta_page(cpu_buffer))
|
||||
return NULL;
|
||||
|
||||
/* More to read on the reader page */
|
||||
if (cpu_buffer->reader_page->read < rb_page_size(cpu_buffer->reader_page)) {
|
||||
if (!cpu_buffer->reader_page->read)
|
||||
cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
|
||||
return cpu_buffer->reader_page;
|
||||
}
|
||||
|
||||
prev_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id];
|
||||
|
||||
WARN_ON_ONCE(cpu_buffer->remote->swap_reader_page(cpu_buffer->cpu,
|
||||
cpu_buffer->remote->priv));
|
||||
/* nr_pages doesn't include the reader page */
|
||||
if (WARN_ON_ONCE(cpu_buffer->meta_page->reader.id > cpu_buffer->nr_pages))
|
||||
return NULL;
|
||||
|
||||
new_reader = cpu_buffer->subbuf_ids[cpu_buffer->meta_page->reader.id];
|
||||
|
||||
WARN_ON_ONCE(prev_reader == new_reader);
|
||||
|
||||
prev_head = new_reader; /* New reader was also the previous head */
|
||||
new_head = prev_head;
|
||||
rb_inc_page(&new_head);
|
||||
last = prev_head;
|
||||
rb_dec_page(&last);
|
||||
|
||||
/* Clear the old HEAD flag */
|
||||
rb_list_head_clear(cpu_buffer->head_page->list.prev);
|
||||
|
||||
prev_reader->list.next = prev_head->list.next;
|
||||
prev_reader->list.prev = prev_head->list.prev;
|
||||
|
||||
/* Swap prev_reader with new_reader */
|
||||
last->list.next = &prev_reader->list;
|
||||
new_head->list.prev = &prev_reader->list;
|
||||
|
||||
new_reader->list.prev = &new_reader->list;
|
||||
new_reader->list.next = &new_head->list;
|
||||
|
||||
/* Reactivate the HEAD flag */
|
||||
rb_set_list_to_head(&last->list);
|
||||
|
||||
cpu_buffer->head_page = new_head;
|
||||
cpu_buffer->reader_page = new_reader;
|
||||
cpu_buffer->pages = &new_head->list;
|
||||
cpu_buffer->read_stamp = new_reader->page->time_stamp;
|
||||
cpu_buffer->lost_events = cpu_buffer->meta_page->reader.lost_events;
|
||||
|
||||
return rb_page_size(cpu_buffer->reader_page) ? cpu_buffer->reader_page : NULL;
|
||||
}
|
||||
|
||||
static struct buffer_page *
|
||||
__rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct buffer_page *reader = NULL;
|
||||
unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size);
|
||||
|
|
@ -5598,6 +5778,13 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
|||
return reader;
|
||||
}
|
||||
|
||||
static struct buffer_page *
|
||||
rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
return cpu_buffer->remote ? __rb_get_reader_page_from_remote(cpu_buffer) :
|
||||
__rb_get_reader_page(cpu_buffer);
|
||||
}
|
||||
|
||||
static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
|
|
@ -6154,6 +6341,8 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
|
|||
meta->entries = local_read(&cpu_buffer->entries);
|
||||
meta->overrun = local_read(&cpu_buffer->overrun);
|
||||
meta->read = cpu_buffer->read;
|
||||
meta->pages_lost = local_read(&cpu_buffer->pages_lost);
|
||||
meta->pages_touched = local_read(&cpu_buffer->pages_touched);
|
||||
|
||||
/* Some archs do not have data cache coherency between kernel and user-space */
|
||||
flush_kernel_vmap_range(cpu_buffer->meta_page, PAGE_SIZE);
|
||||
|
|
@ -6164,6 +6353,23 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
|
|||
{
|
||||
struct buffer_page *page;
|
||||
|
||||
if (cpu_buffer->remote) {
|
||||
if (!cpu_buffer->remote->reset)
|
||||
return;
|
||||
|
||||
cpu_buffer->remote->reset(cpu_buffer->cpu, cpu_buffer->remote->priv);
|
||||
rb_read_remote_meta_page(cpu_buffer);
|
||||
|
||||
/* Read related values, not covered by the meta-page */
|
||||
local_set(&cpu_buffer->pages_read, 0);
|
||||
cpu_buffer->read = 0;
|
||||
cpu_buffer->read_bytes = 0;
|
||||
cpu_buffer->last_overrun = 0;
|
||||
cpu_buffer->reader_page->read = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
rb_head_page_deactivate(cpu_buffer);
|
||||
|
||||
cpu_buffer->head_page
|
||||
|
|
@ -6394,6 +6600,46 @@ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
|
||||
|
||||
int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
|
||||
if (cpu != RING_BUFFER_ALL_CPUS) {
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
return -EINVAL;
|
||||
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
|
||||
guard(raw_spinlock)(&cpu_buffer->reader_lock);
|
||||
if (rb_read_remote_meta_page(cpu_buffer))
|
||||
rb_wakeups(buffer, cpu_buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
guard(cpus_read_lock)();
|
||||
|
||||
/*
|
||||
* Make sure all the ring buffers are up to date before we start reading
|
||||
* them.
|
||||
*/
|
||||
for_each_buffer_cpu(buffer, cpu) {
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
|
||||
guard(raw_spinlock)(&cpu_buffer->reader_lock);
|
||||
rb_read_remote_meta_page(cpu_buffer);
|
||||
}
|
||||
|
||||
for_each_buffer_cpu(buffer, cpu) {
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
|
||||
if (rb_num_of_entries(cpu_buffer))
|
||||
rb_wakeups(buffer, cpu_buffer);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
|
||||
/**
|
||||
* ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
|
||||
|
|
@ -6632,6 +6878,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
|
|||
unsigned int commit;
|
||||
unsigned int read;
|
||||
u64 save_timestamp;
|
||||
bool force_memcpy;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
return -1;
|
||||
|
|
@ -6669,6 +6916,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
|
|||
/* Check if any events were dropped */
|
||||
missed_events = cpu_buffer->lost_events;
|
||||
|
||||
force_memcpy = cpu_buffer->mapped || cpu_buffer->remote;
|
||||
|
||||
/*
|
||||
* If this page has been partially read or
|
||||
* if len is not big enough to read the rest of the page or
|
||||
|
|
@ -6678,7 +6927,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
|
|||
*/
|
||||
if (read || (len < (commit - read)) ||
|
||||
cpu_buffer->reader_page == cpu_buffer->commit_page ||
|
||||
cpu_buffer->mapped) {
|
||||
force_memcpy) {
|
||||
struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
|
||||
unsigned int rpos = read;
|
||||
unsigned int pos = 0;
|
||||
|
|
@ -7034,7 +7283,7 @@ static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
|
|||
}
|
||||
|
||||
static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
unsigned long *subbuf_ids)
|
||||
struct buffer_page **subbuf_ids)
|
||||
{
|
||||
struct trace_buffer_meta *meta = cpu_buffer->meta_page;
|
||||
unsigned int nr_subbufs = cpu_buffer->nr_pages + 1;
|
||||
|
|
@ -7043,7 +7292,7 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
int id = 0;
|
||||
|
||||
id = rb_page_id(cpu_buffer, cpu_buffer->reader_page, id);
|
||||
subbuf_ids[id++] = (unsigned long)cpu_buffer->reader_page->page;
|
||||
subbuf_ids[id++] = cpu_buffer->reader_page;
|
||||
cnt++;
|
||||
|
||||
first_subbuf = subbuf = rb_set_head_page(cpu_buffer);
|
||||
|
|
@ -7053,7 +7302,7 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
if (WARN_ON(id >= nr_subbufs))
|
||||
break;
|
||||
|
||||
subbuf_ids[id] = (unsigned long)subbuf->page;
|
||||
subbuf_ids[id] = subbuf;
|
||||
|
||||
rb_inc_page(&subbuf);
|
||||
id++;
|
||||
|
|
@ -7062,7 +7311,7 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
|
||||
WARN_ON(cnt != nr_subbufs);
|
||||
|
||||
/* install subbuf ID to kern VA translation */
|
||||
/* install subbuf ID to bpage translation */
|
||||
cpu_buffer->subbuf_ids = subbuf_ids;
|
||||
|
||||
meta->meta_struct_len = sizeof(*meta);
|
||||
|
|
@ -7218,13 +7467,15 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
}
|
||||
|
||||
while (p < nr_pages) {
|
||||
struct buffer_page *subbuf;
|
||||
struct page *page;
|
||||
int off = 0;
|
||||
|
||||
if (WARN_ON_ONCE(s >= nr_subbufs))
|
||||
return -EINVAL;
|
||||
|
||||
page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
|
||||
subbuf = cpu_buffer->subbuf_ids[s];
|
||||
page = virt_to_page((void *)subbuf->page);
|
||||
|
||||
for (; off < (1 << (subbuf_order)); off++, page++) {
|
||||
if (p >= nr_pages)
|
||||
|
|
@ -7251,10 +7502,11 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
|
|||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
unsigned long flags, *subbuf_ids;
|
||||
struct buffer_page **subbuf_ids;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask) || buffer->remote)
|
||||
return -EINVAL;
|
||||
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
|
|
@ -7275,7 +7527,7 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
/* subbuf_ids include the reader while nr_pages does not */
|
||||
/* subbuf_ids includes the reader while nr_pages does not */
|
||||
subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1, sizeof(*subbuf_ids), GFP_KERNEL);
|
||||
if (!subbuf_ids) {
|
||||
rb_free_meta_page(cpu_buffer);
|
||||
|
|
|
|||
517
kernel/trace/simple_ring_buffer.c
Normal file
517
kernel/trace/simple_ring_buffer.c
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 - Google LLC
|
||||
* Author: Vincent Donnefort <vdonnefort@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/simple_ring_buffer.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
enum simple_rb_link_type {
|
||||
SIMPLE_RB_LINK_NORMAL = 0,
|
||||
SIMPLE_RB_LINK_HEAD = 1,
|
||||
SIMPLE_RB_LINK_HEAD_MOVING
|
||||
};
|
||||
|
||||
#define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
|
||||
|
||||
static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
|
||||
{
|
||||
unsigned long link = (unsigned long)bpage->link.next;
|
||||
|
||||
link &= SIMPLE_RB_LINK_MASK;
|
||||
link |= SIMPLE_RB_LINK_HEAD;
|
||||
|
||||
/*
|
||||
* Paired with simple_rb_find_head() to order access between the head
|
||||
* link and overrun. It ensures we always report an up-to-date value
|
||||
* after swapping the reader page.
|
||||
*/
|
||||
smp_store_release(&bpage->link.next, (struct list_head *)link);
|
||||
}
|
||||
|
||||
static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
|
||||
struct simple_buffer_page *dst,
|
||||
enum simple_rb_link_type new_type)
|
||||
{
|
||||
unsigned long *link = (unsigned long *)(&bpage->link.next);
|
||||
unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
|
||||
unsigned long new = (unsigned long)(&dst->link) | new_type;
|
||||
|
||||
return try_cmpxchg(link, &old, new);
|
||||
}
|
||||
|
||||
static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
|
||||
{
|
||||
unsigned long link = (unsigned long)bpage->link.next;
|
||||
|
||||
WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
|
||||
}
|
||||
|
||||
static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
|
||||
{
|
||||
unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
|
||||
|
||||
return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
|
||||
}
|
||||
|
||||
static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
|
||||
{
|
||||
return simple_bpage_from_link(bpage->link.next);
|
||||
}
|
||||
|
||||
static void simple_bpage_reset(struct simple_buffer_page *bpage)
|
||||
{
|
||||
bpage->write = 0;
|
||||
bpage->entries = 0;
|
||||
|
||||
local_set(&bpage->page->commit, 0);
|
||||
}
|
||||
|
||||
static void simple_bpage_init(struct simple_buffer_page *bpage, void *page)
|
||||
{
|
||||
INIT_LIST_HEAD(&bpage->link);
|
||||
bpage->page = (struct buffer_data_page *)page;
|
||||
|
||||
simple_bpage_reset(bpage);
|
||||
}
|
||||
|
||||
#define simple_rb_meta_inc(__meta, __inc) \
|
||||
WRITE_ONCE((__meta), (__meta + __inc))
|
||||
|
||||
static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
return !!cpu_buffer->bpages;
|
||||
}
|
||||
|
||||
static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
int retry = cpu_buffer->nr_pages * 2;
|
||||
struct simple_buffer_page *head;
|
||||
|
||||
head = cpu_buffer->head_page;
|
||||
|
||||
while (retry--) {
|
||||
unsigned long link;
|
||||
|
||||
spin:
|
||||
/* See smp_store_release in simple_bpage_set_head_link() */
|
||||
link = (unsigned long)smp_load_acquire(&head->link.prev->next);
|
||||
|
||||
switch (link & ~SIMPLE_RB_LINK_MASK) {
|
||||
/* Found the head */
|
||||
case SIMPLE_RB_LINK_HEAD:
|
||||
cpu_buffer->head_page = head;
|
||||
return 0;
|
||||
/* The writer caught the head, we can spin, that won't be long */
|
||||
case SIMPLE_RB_LINK_HEAD_MOVING:
|
||||
goto spin;
|
||||
}
|
||||
|
||||
head = simple_bpage_next_page(head);
|
||||
}
|
||||
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
|
||||
* @cpu_buffer: A simple_rb_per_cpu
|
||||
*
|
||||
* This function enables consuming reading. It ensures the current head page will not be overwritten
|
||||
* and can be safely read.
|
||||
*
|
||||
* Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
|
||||
* head page.
|
||||
*/
|
||||
int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct simple_buffer_page *last, *head, *reader;
|
||||
unsigned long overrun;
|
||||
int retry = 8;
|
||||
int ret;
|
||||
|
||||
if (!simple_rb_loaded(cpu_buffer))
|
||||
return -ENODEV;
|
||||
|
||||
reader = cpu_buffer->reader_page;
|
||||
|
||||
do {
|
||||
/* Run after the writer to find the head */
|
||||
ret = simple_rb_find_head(cpu_buffer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
head = cpu_buffer->head_page;
|
||||
|
||||
/* Connect the reader page around the header page */
|
||||
reader->link.next = head->link.next;
|
||||
reader->link.prev = head->link.prev;
|
||||
|
||||
/* The last page before the head */
|
||||
last = simple_bpage_from_link(head->link.prev);
|
||||
|
||||
/* The reader page points to the new header page */
|
||||
simple_bpage_set_head_link(reader);
|
||||
|
||||
overrun = cpu_buffer->meta->overrun;
|
||||
} while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
|
||||
|
||||
if (!retry)
|
||||
return -EINVAL;
|
||||
|
||||
cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
|
||||
cpu_buffer->head_page->link.prev = &reader->link;
|
||||
cpu_buffer->reader_page = head;
|
||||
cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
|
||||
cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
|
||||
cpu_buffer->last_overrun = overrun;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
|
||||
|
||||
static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct simple_buffer_page *tail, *new_tail;
|
||||
|
||||
tail = cpu_buffer->tail_page;
|
||||
new_tail = simple_bpage_next_page(tail);
|
||||
|
||||
if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
|
||||
/*
|
||||
* Oh no! we've caught the head. There is none anymore and
|
||||
* swap_reader will spin until we set the new one. Overrun must
|
||||
* be written first, to make sure we report the correct number
|
||||
* of lost events.
|
||||
*/
|
||||
simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
|
||||
simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
|
||||
|
||||
simple_bpage_set_head_link(new_tail);
|
||||
simple_bpage_set_normal_link(tail);
|
||||
}
|
||||
|
||||
simple_bpage_reset(new_tail);
|
||||
cpu_buffer->tail_page = new_tail;
|
||||
|
||||
simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
|
||||
|
||||
return new_tail;
|
||||
}
|
||||
|
||||
static unsigned long rb_event_size(unsigned long length)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
|
||||
return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
|
||||
}
|
||||
|
||||
static struct ring_buffer_event *
|
||||
rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
|
||||
{
|
||||
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
|
||||
event->time_delta = delta & TS_MASK;
|
||||
event->array[0] = delta >> TS_SHIFT;
|
||||
|
||||
return (struct ring_buffer_event *)((unsigned long)event + 8);
|
||||
}
|
||||
|
||||
static struct ring_buffer_event *
|
||||
simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
|
||||
{
|
||||
unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
|
||||
struct simple_buffer_page *tail = cpu_buffer->tail_page;
|
||||
struct ring_buffer_event *event;
|
||||
u32 write, prev_write;
|
||||
u64 time_delta;
|
||||
|
||||
time_delta = timestamp - cpu_buffer->write_stamp;
|
||||
|
||||
if (test_time_stamp(time_delta))
|
||||
ts_ext_size = 8;
|
||||
|
||||
prev_write = tail->write;
|
||||
write = prev_write + event_size + ts_ext_size;
|
||||
|
||||
if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
|
||||
tail = simple_rb_move_tail(cpu_buffer);
|
||||
|
||||
if (!tail->entries) {
|
||||
tail->page->time_stamp = timestamp;
|
||||
time_delta = 0;
|
||||
ts_ext_size = 0;
|
||||
write = event_size;
|
||||
prev_write = 0;
|
||||
}
|
||||
|
||||
tail->write = write;
|
||||
tail->entries++;
|
||||
|
||||
cpu_buffer->write_stamp = timestamp;
|
||||
|
||||
event = (struct ring_buffer_event *)(tail->page->data + prev_write);
|
||||
if (ts_ext_size) {
|
||||
event = rb_event_add_ts_extend(event, time_delta);
|
||||
time_delta = 0;
|
||||
}
|
||||
|
||||
event->type_len = 0;
|
||||
event->time_delta = time_delta;
|
||||
event->array[0] = event_size - RB_EVNT_HDR_SIZE;
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
|
||||
* @cpu_buffer: A simple_rb_per_cpu
|
||||
* @length: Size of the entry in bytes
|
||||
* @timestamp: Timestamp of the entry
|
||||
*
|
||||
* Returns the address of the entry where to write data or NULL
|
||||
*/
|
||||
void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
|
||||
u64 timestamp)
|
||||
{
|
||||
struct ring_buffer_event *rb_event;
|
||||
|
||||
if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
|
||||
return NULL;
|
||||
|
||||
rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
|
||||
|
||||
return &rb_event->array[1];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
|
||||
* @cpu_buffer: The simple_rb_per_cpu where the entry has been reserved
|
||||
*/
|
||||
void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
local_set(&cpu_buffer->tail_page->page->commit,
|
||||
cpu_buffer->tail_page->write);
|
||||
simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
|
||||
|
||||
/*
|
||||
* Paired with simple_rb_enable_tracing() to ensure data is
|
||||
* written to the ring-buffer before teardown.
|
||||
*/
|
||||
smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
|
||||
|
||||
static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
|
||||
{
|
||||
u32 prev_status;
|
||||
|
||||
if (enable)
|
||||
return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
|
||||
|
||||
/* Wait for the buffer to be released */
|
||||
do {
|
||||
prev_status = cmpxchg_acquire(&cpu_buffer->status,
|
||||
SIMPLE_RB_READY,
|
||||
SIMPLE_RB_UNAVAILABLE);
|
||||
} while (prev_status == SIMPLE_RB_WRITING);
|
||||
|
||||
return prev_status;
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_reset - Reset @cpu_buffer
|
||||
* @cpu_buffer: A simple_rb_per_cpu
|
||||
*
|
||||
* This will not clear the content of the data, only reset counters and pointers
|
||||
*
|
||||
* Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
|
||||
*/
|
||||
int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
struct simple_buffer_page *bpage;
|
||||
u32 prev_status;
|
||||
int ret;
|
||||
|
||||
if (!simple_rb_loaded(cpu_buffer))
|
||||
return -ENODEV;
|
||||
|
||||
prev_status = simple_rb_enable_tracing(cpu_buffer, false);
|
||||
|
||||
ret = simple_rb_find_head(cpu_buffer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
|
||||
do {
|
||||
simple_bpage_reset(bpage);
|
||||
bpage = simple_bpage_next_page(bpage);
|
||||
} while (bpage != cpu_buffer->head_page);
|
||||
|
||||
simple_bpage_reset(cpu_buffer->reader_page);
|
||||
|
||||
cpu_buffer->last_overrun = 0;
|
||||
cpu_buffer->write_stamp = 0;
|
||||
|
||||
cpu_buffer->meta->reader.read = 0;
|
||||
cpu_buffer->meta->reader.lost_events = 0;
|
||||
cpu_buffer->meta->entries = 0;
|
||||
cpu_buffer->meta->overrun = 0;
|
||||
cpu_buffer->meta->read = 0;
|
||||
cpu_buffer->meta->pages_lost = 0;
|
||||
cpu_buffer->meta->pages_touched = 0;
|
||||
|
||||
if (prev_status == SIMPLE_RB_READY)
|
||||
simple_rb_enable_tracing(cpu_buffer, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
|
||||
|
||||
int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc,
|
||||
void *(*load_page)(unsigned long va),
|
||||
void (*unload_page)(void *va))
|
||||
{
|
||||
struct simple_buffer_page *bpage = bpages;
|
||||
int ret = 0;
|
||||
void *page;
|
||||
int i;
|
||||
|
||||
/* At least 1 reader page and two pages in the ring-buffer */
|
||||
if (desc->nr_page_va < 3)
|
||||
return -EINVAL;
|
||||
|
||||
memset(cpu_buffer, 0, sizeof(*cpu_buffer));
|
||||
|
||||
cpu_buffer->meta = load_page(desc->meta_va);
|
||||
if (!cpu_buffer->meta)
|
||||
return -EINVAL;
|
||||
|
||||
memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
|
||||
cpu_buffer->meta->meta_page_size = PAGE_SIZE;
|
||||
cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
|
||||
|
||||
/* The reader page is not part of the ring initially */
|
||||
page = load_page(desc->page_va[0]);
|
||||
if (!page) {
|
||||
unload_page(cpu_buffer->meta);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
simple_bpage_init(bpage, page);
|
||||
bpage->id = 0;
|
||||
|
||||
cpu_buffer->nr_pages = 1;
|
||||
|
||||
cpu_buffer->reader_page = bpage;
|
||||
cpu_buffer->tail_page = bpage + 1;
|
||||
cpu_buffer->head_page = bpage + 1;
|
||||
|
||||
for (i = 1; i < desc->nr_page_va; i++) {
|
||||
page = load_page(desc->page_va[i]);
|
||||
if (!page) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
simple_bpage_init(++bpage, page);
|
||||
|
||||
bpage->link.next = &(bpage + 1)->link;
|
||||
bpage->link.prev = &(bpage - 1)->link;
|
||||
bpage->id = i;
|
||||
|
||||
cpu_buffer->nr_pages = i + 1;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
for (i--; i >= 0; i--)
|
||||
unload_page((void *)desc->page_va[i]);
|
||||
unload_page(cpu_buffer->meta);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Close the ring */
|
||||
bpage->link.next = &cpu_buffer->tail_page->link;
|
||||
cpu_buffer->tail_page->link.prev = &bpage->link;
|
||||
|
||||
/* The last init'ed page points to the head page */
|
||||
simple_bpage_set_head_link(bpage);
|
||||
|
||||
cpu_buffer->bpages = bpages;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *__load_page(unsigned long page)
|
||||
{
|
||||
return (void *)page;
|
||||
}
|
||||
|
||||
static void __unload_page(void *page) { }
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_init - Init @cpu_buffer based on @desc
|
||||
* @cpu_buffer: A simple_rb_per_cpu buffer to init, allocated by the caller.
|
||||
* @bpages: Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
|
||||
* @desc: A ring_buffer_desc
|
||||
*
|
||||
* Returns 0 on success or -EINVAL if the content of @desc is invalid
|
||||
*/
|
||||
int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
|
||||
const struct ring_buffer_desc *desc)
|
||||
{
|
||||
return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
|
||||
|
||||
void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
|
||||
void (*unload_page)(void *))
|
||||
{
|
||||
int p;
|
||||
|
||||
if (!simple_rb_loaded(cpu_buffer))
|
||||
return;
|
||||
|
||||
simple_rb_enable_tracing(cpu_buffer, false);
|
||||
|
||||
unload_page(cpu_buffer->meta);
|
||||
for (p = 0; p < cpu_buffer->nr_pages; p++)
|
||||
unload_page(cpu_buffer->bpages[p].page);
|
||||
|
||||
cpu_buffer->bpages = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
|
||||
* @cpu_buffer: A simple_rb_per_cpu that will be deleted.
|
||||
*/
|
||||
void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
|
||||
{
|
||||
return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
|
||||
|
||||
/**
|
||||
* simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
|
||||
* @cpu_buffer: A simple_rb_per_cpu
|
||||
* @enable: True to enable tracing, False to disable it
|
||||
*
|
||||
* Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
|
||||
*/
|
||||
int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
|
||||
{
|
||||
if (!simple_rb_loaded(cpu_buffer))
|
||||
return -ENODEV;
|
||||
|
||||
simple_rb_enable_tracing(cpu_buffer, enable);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
|
||||
|
|
@ -3856,7 +3856,7 @@ static int s_show(struct seq_file *m, void *v)
|
|||
* Should be used after trace_array_get(), trace_types_lock
|
||||
* ensures that i_cdev was already initialized.
|
||||
*/
|
||||
static inline int tracing_get_cpu(struct inode *inode)
|
||||
int tracing_get_cpu(struct inode *inode)
|
||||
{
|
||||
if (inode->i_cdev) /* See trace_create_cpu_file() */
|
||||
return (long)inode->i_cdev - 1;
|
||||
|
|
@ -8589,7 +8589,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
|
|||
return tr->percpu_dir;
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
struct dentry *
|
||||
trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
|
||||
void *data, long cpu, const struct file_operations *fops)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -689,6 +689,13 @@ struct dentry *trace_create_file(const char *name,
|
|||
struct dentry *parent,
|
||||
void *data,
|
||||
const struct file_operations *fops);
|
||||
struct dentry *trace_create_cpu_file(const char *name,
|
||||
umode_t mode,
|
||||
struct dentry *parent,
|
||||
void *data,
|
||||
long cpu,
|
||||
const struct file_operations *fops);
|
||||
int tracing_get_cpu(struct inode *inode);
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
|||
1384
kernel/trace/trace_remote.c
Normal file
1384
kernel/trace/trace_remote.c
Normal file
File diff suppressed because it is too large
Load Diff
25
tools/testing/selftests/ftrace/test.d/remotes/buffer_size.tc
Normal file
25
tools/testing/selftests/ftrace/test.d/remotes/buffer_size.tc
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote buffer size
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
test_buffer_size()
|
||||
{
|
||||
echo 0 > tracing_on
|
||||
assert_unloaded
|
||||
|
||||
echo 4096 > buffer_size_kb
|
||||
echo 1 > tracing_on
|
||||
assert_loaded
|
||||
|
||||
echo 0 > tracing_on
|
||||
echo 7 > buffer_size_kb
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
setup_remote_test
|
||||
test_buffer_size
|
||||
fi
|
||||
99
tools/testing/selftests/ftrace/test.d/remotes/functions
Normal file
99
tools/testing/selftests/ftrace/test.d/remotes/functions
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
setup_remote()
|
||||
{
|
||||
local name=$1
|
||||
|
||||
[ -e $TRACING_DIR/remotes/$name/write_event ] || exit_unresolved
|
||||
|
||||
cd remotes/$name/
|
||||
echo 0 > tracing_on
|
||||
clear_trace
|
||||
echo 7 > buffer_size_kb
|
||||
echo 0 > events/enable
|
||||
echo 1 > events/$name/selftest/enable
|
||||
echo 1 > tracing_on
|
||||
}
|
||||
|
||||
setup_remote_test()
|
||||
{
|
||||
[ -d $TRACING_DIR/remotes/test/ ] || modprobe remote_test || exit_unresolved
|
||||
|
||||
setup_remote "test"
|
||||
}
|
||||
|
||||
assert_loaded()
|
||||
{
|
||||
grep -q "(loaded)" buffer_size_kb || return 1
|
||||
}
|
||||
|
||||
assert_unloaded()
|
||||
{
|
||||
grep -q "(unloaded)" buffer_size_kb || return 1
|
||||
}
|
||||
|
||||
reload_remote()
|
||||
{
|
||||
echo 0 > tracing_on
|
||||
clear_trace
|
||||
assert_unloaded
|
||||
echo 1 > tracing_on
|
||||
assert_loaded
|
||||
}
|
||||
|
||||
dump_trace_pipe()
|
||||
{
|
||||
output=$(mktemp $TMPDIR/remote_test.XXXXXX)
|
||||
cat trace_pipe > $output &
|
||||
pid=$!
|
||||
sleep 1
|
||||
kill -1 $pid
|
||||
|
||||
echo $output
|
||||
}
|
||||
|
||||
check_trace()
|
||||
{
|
||||
start_id="$1"
|
||||
end_id="$2"
|
||||
file="$3"
|
||||
|
||||
# Ensure the file is not empty
|
||||
test -n "$(head $file)"
|
||||
|
||||
prev_ts=0
|
||||
id=0
|
||||
|
||||
# Only keep <timestamp> <id>
|
||||
tmp=$(mktemp $TMPDIR/remote_test.XXXXXX)
|
||||
sed -e 's/\[[0-9]*\]\s*\([0-9]*.[0-9]*\): [a-z]* id=\([0-9]*\)/\1 \2/' $file > $tmp
|
||||
|
||||
while IFS= read -r line; do
|
||||
ts=$(echo $line | cut -d ' ' -f 1)
|
||||
id=$(echo $line | cut -d ' ' -f 2)
|
||||
|
||||
test $(echo "$ts>$prev_ts" | bc) -eq 1
|
||||
test $id -eq $start_id
|
||||
|
||||
prev_ts=$ts
|
||||
start_id=$((start_id + 1))
|
||||
done < $tmp
|
||||
|
||||
test $id -eq $end_id
|
||||
rm $tmp
|
||||
}
|
||||
|
||||
get_cpu_ids()
|
||||
{
|
||||
sed -n 's/^processor\s*:\s*\([0-9]\+\).*/\1/p' /proc/cpuinfo
|
||||
}
|
||||
|
||||
get_page_size()
|
||||
{
|
||||
sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
|
||||
}
|
||||
|
||||
get_selftest_event_size()
|
||||
{
|
||||
sed -ne 's/^.*field:.*;.*size:\([0-9][0-9]*\);.*/\1/p' events/*/selftest/format | awk '{s+=$1} END {print s}'
|
||||
}
|
||||
88
tools/testing/selftests/ftrace/test.d/remotes/hotplug.tc
Normal file
88
tools/testing/selftests/ftrace/test.d/remotes/hotplug.tc
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote read with an offline CPU
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
hotunplug_one_cpu()
|
||||
{
|
||||
[ "$(get_cpu_ids | wc -l)" -ge 2 ] || return 1
|
||||
|
||||
for cpu in $(get_cpu_ids); do
|
||||
echo 0 > /sys/devices/system/cpu/cpu$cpu/online || return 1
|
||||
break
|
||||
done
|
||||
|
||||
echo $cpu
|
||||
}
|
||||
|
||||
# Check non-consuming and consuming read
|
||||
check_read()
|
||||
{
|
||||
for i in $(seq 1 8); do
|
||||
echo $i > write_event
|
||||
done
|
||||
|
||||
check_trace 1 8 trace
|
||||
|
||||
output=$(dump_trace_pipe)
|
||||
check_trace 1 8 $output
|
||||
rm $output
|
||||
}
|
||||
|
||||
test_hotplug()
|
||||
{
|
||||
echo 0 > trace
|
||||
assert_loaded
|
||||
|
||||
#
|
||||
# Test a trace buffer containing an offline CPU
|
||||
#
|
||||
|
||||
cpu=$(hotunplug_one_cpu) || exit_unsupported
|
||||
trap "echo 1 > /sys/devices/system/cpu/cpu$cpu/online" EXIT
|
||||
|
||||
check_read
|
||||
|
||||
#
|
||||
# Test a trace buffer with a missing CPU
|
||||
#
|
||||
|
||||
reload_remote
|
||||
|
||||
check_read
|
||||
|
||||
#
|
||||
# Test a trace buffer with a CPU added later
|
||||
#
|
||||
|
||||
echo 1 > /sys/devices/system/cpu/cpu$cpu/online
|
||||
trap "" EXIT
|
||||
assert_loaded
|
||||
|
||||
check_read
|
||||
|
||||
# Test if the ring-buffer for the newly added CPU is both writable and
|
||||
# readable
|
||||
for i in $(seq 1 8); do
|
||||
taskset -c $cpu echo $i > write_event
|
||||
done
|
||||
|
||||
cd per_cpu/cpu$cpu/
|
||||
|
||||
check_trace 1 8 trace
|
||||
|
||||
output=$(dump_trace_pipe)
|
||||
check_trace 1 8 $output
|
||||
rm $output
|
||||
|
||||
cd -
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
|
||||
setup_remote_test
|
||||
test_hotplug
|
||||
fi
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor trace buffer size
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/buffer_size.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_buffer_size
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor trace read with an offline CPU
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/hotplug.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_hotplug
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor trace buffer reset
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/reset.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_reset
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor non-consuming trace read
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/trace.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_trace
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor consuming trace read
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/trace_pipe.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_trace_pipe
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test hypervisor trace buffer unloading
|
||||
# requires: remotes/hypervisor/write_event
|
||||
|
||||
SOURCE_REMOTE_TEST=1
|
||||
. $TEST_DIR/remotes/unloading.tc
|
||||
|
||||
set -e
|
||||
setup_remote "hypervisor"
|
||||
test_unloading
|
||||
90
tools/testing/selftests/ftrace/test.d/remotes/reset.tc
Normal file
90
tools/testing/selftests/ftrace/test.d/remotes/reset.tc
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote reset
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
check_reset()
|
||||
{
|
||||
write_event_path="write_event"
|
||||
taskset=""
|
||||
|
||||
clear_trace
|
||||
|
||||
# Is the buffer empty?
|
||||
output=$(dump_trace_pipe)
|
||||
test $(wc -l $output | cut -d ' ' -f1) -eq 0
|
||||
|
||||
if $(echo $(pwd) | grep -q "per_cpu/cpu"); then
|
||||
write_event_path="../../write_event"
|
||||
cpu_id=$(echo $(pwd) | sed -e 's/.*per_cpu\/cpu//')
|
||||
taskset="taskset -c $cpu_id"
|
||||
fi
|
||||
rm $output
|
||||
|
||||
# Can we properly write a new event?
|
||||
$taskset echo 7890 > $write_event_path
|
||||
output=$(dump_trace_pipe)
|
||||
test $(wc -l $output | cut -d ' ' -f1) -eq 1
|
||||
grep -q "id=7890" $output
|
||||
rm $output
|
||||
}
|
||||
|
||||
test_global_interface()
|
||||
{
|
||||
output=$(mktemp $TMPDIR/remote_test.XXXXXX)
|
||||
|
||||
# Confidence check
|
||||
echo 123456 > write_event
|
||||
output=$(dump_trace_pipe)
|
||||
grep -q "id=123456" $output
|
||||
rm $output
|
||||
|
||||
# Reset single event
|
||||
echo 1 > write_event
|
||||
check_reset
|
||||
|
||||
# Reset lost events
|
||||
for i in $(seq 1 10000); do
|
||||
echo 1 > write_event
|
||||
done
|
||||
check_reset
|
||||
}
|
||||
|
||||
test_percpu_interface()
|
||||
{
|
||||
[ "$(get_cpu_ids | wc -l)" -ge 2 ] || return 0
|
||||
|
||||
for cpu in $(get_cpu_ids); do
|
||||
taskset -c $cpu echo 1 > write_event
|
||||
done
|
||||
|
||||
check_non_empty=0
|
||||
for cpu in $(get_cpu_ids); do
|
||||
cd per_cpu/cpu$cpu/
|
||||
|
||||
if [ $check_non_empty -eq 0 ]; then
|
||||
check_reset
|
||||
check_non_empty=1
|
||||
else
|
||||
# Check we have only reset 1 CPU
|
||||
output=$(dump_trace_pipe)
|
||||
test $(wc -l $output | cut -d ' ' -f1) -eq 1
|
||||
rm $output
|
||||
fi
|
||||
cd -
|
||||
done
|
||||
}
|
||||
|
||||
test_reset()
|
||||
{
|
||||
test_global_interface
|
||||
test_percpu_interface
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
setup_remote_test
|
||||
test_reset
|
||||
fi
|
||||
102
tools/testing/selftests/ftrace/test.d/remotes/trace.tc
Normal file
102
tools/testing/selftests/ftrace/test.d/remotes/trace.tc
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote non-consuming read
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
test_trace()
|
||||
{
|
||||
echo 0 > tracing_on
|
||||
assert_unloaded
|
||||
|
||||
echo 7 > buffer_size_kb
|
||||
echo 1 > tracing_on
|
||||
assert_loaded
|
||||
|
||||
# Simple test: Emit few events and try to read them
|
||||
for i in $(seq 1 8); do
|
||||
echo $i > write_event
|
||||
done
|
||||
|
||||
check_trace 1 8 trace
|
||||
|
||||
#
|
||||
# Test interaction with consuming read
|
||||
#
|
||||
|
||||
cat trace_pipe > /dev/null &
|
||||
pid=$!
|
||||
|
||||
sleep 1
|
||||
kill $pid
|
||||
|
||||
test $(wc -l < trace) -eq 0
|
||||
|
||||
for i in $(seq 16 32); do
|
||||
echo $i > write_event
|
||||
done
|
||||
|
||||
check_trace 16 32 trace
|
||||
|
||||
#
|
||||
# Test interaction with reset
|
||||
#
|
||||
|
||||
echo 0 > trace
|
||||
|
||||
test $(wc -l < trace) -eq 0
|
||||
|
||||
for i in $(seq 1 8); do
|
||||
echo $i > write_event
|
||||
done
|
||||
|
||||
check_trace 1 8 trace
|
||||
|
||||
#
|
||||
# Test interaction with lost events
|
||||
#
|
||||
|
||||
# Ensure the writer is not on the reader page by reloading the buffer
|
||||
reload_remote
|
||||
|
||||
# Ensure ring-buffer overflow by emitting events from the same CPU
|
||||
for cpu in $(get_cpu_ids); do
|
||||
break
|
||||
done
|
||||
|
||||
events_per_page=$(($(get_page_size) / $(get_selftest_event_size))) # Approx: does not take TS into account
|
||||
nr_events=$(($events_per_page * 2))
|
||||
for i in $(seq 1 $nr_events); do
|
||||
taskset -c $cpu echo $i > write_event
|
||||
done
|
||||
|
||||
id=$(sed -n -e '1s/\[[0-9]*\]\s*[0-9]*.[0-9]*: [a-z]* id=\([0-9]*\)/\1/p' trace)
|
||||
test $id -ne 1
|
||||
|
||||
check_trace $id $nr_events trace
|
||||
|
||||
#
|
||||
# Test per-CPU interface
|
||||
#
|
||||
echo 0 > trace
|
||||
|
||||
for cpu in $(get_cpu_ids) ; do
|
||||
taskset -c $cpu echo $cpu > write_event
|
||||
done
|
||||
|
||||
for cpu in $(get_cpu_ids); do
|
||||
cd per_cpu/cpu$cpu/
|
||||
|
||||
check_trace $cpu $cpu trace
|
||||
|
||||
cd - > /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
|
||||
setup_remote_test
|
||||
test_trace
|
||||
fi
|
||||
102
tools/testing/selftests/ftrace/test.d/remotes/trace_pipe.tc
Normal file
102
tools/testing/selftests/ftrace/test.d/remotes/trace_pipe.tc
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote consuming read
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
test_trace_pipe()
|
||||
{
|
||||
echo 0 > tracing_on
|
||||
assert_unloaded
|
||||
|
||||
# Emit events from the same CPU
|
||||
for cpu in $(get_cpu_ids); do
|
||||
break
|
||||
done
|
||||
|
||||
#
|
||||
# Simple test: Emit enough events to fill few pages
|
||||
#
|
||||
|
||||
echo 1024 > buffer_size_kb
|
||||
echo 1 > tracing_on
|
||||
assert_loaded
|
||||
|
||||
events_per_page=$(($(get_page_size) / $(get_selftest_event_size)))
|
||||
nr_events=$(($events_per_page * 4))
|
||||
|
||||
output=$(mktemp $TMPDIR/remote_test.XXXXXX)
|
||||
|
||||
cat trace_pipe > $output &
|
||||
pid=$!
|
||||
|
||||
for i in $(seq 1 $nr_events); do
|
||||
taskset -c $cpu echo $i > write_event
|
||||
done
|
||||
|
||||
echo 0 > tracing_on
|
||||
sleep 1
|
||||
kill $pid
|
||||
|
||||
check_trace 1 $nr_events $output
|
||||
|
||||
rm $output
|
||||
|
||||
#
|
||||
# Test interaction with lost events
|
||||
#
|
||||
|
||||
assert_unloaded
|
||||
echo 7 > buffer_size_kb
|
||||
echo 1 > tracing_on
|
||||
assert_loaded
|
||||
|
||||
nr_events=$((events_per_page * 2))
|
||||
for i in $(seq 1 $nr_events); do
|
||||
taskset -c $cpu echo $i > write_event
|
||||
done
|
||||
|
||||
output=$(dump_trace_pipe)
|
||||
|
||||
lost_events=$(sed -n -e '1s/CPU:.*\[LOST \([0-9]*\) EVENTS\]/\1/p' $output)
|
||||
test -n "$lost_events"
|
||||
|
||||
id=$(sed -n -e '2s/\[[0-9]*\]\s*[0-9]*.[0-9]*: [a-z]* id=\([0-9]*\)/\1/p' $output)
|
||||
test "$id" -eq $(($lost_events + 1))
|
||||
|
||||
# Drop [LOST EVENTS] line
|
||||
sed -i '1d' $output
|
||||
|
||||
check_trace $id $nr_events $output
|
||||
|
||||
rm $output
|
||||
|
||||
#
|
||||
# Test per-CPU interface
|
||||
#
|
||||
|
||||
echo 0 > trace
|
||||
echo 1 > tracing_on
|
||||
|
||||
for cpu in $(get_cpu_ids); do
|
||||
taskset -c $cpu echo $cpu > write_event
|
||||
done
|
||||
|
||||
for cpu in $(get_cpu_ids); do
|
||||
cd per_cpu/cpu$cpu/
|
||||
output=$(dump_trace_pipe)
|
||||
|
||||
check_trace $cpu $cpu $output
|
||||
|
||||
rm $output
|
||||
cd - > /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
|
||||
setup_remote_test
|
||||
test_trace_pipe
|
||||
fi
|
||||
41
tools/testing/selftests/ftrace/test.d/remotes/unloading.tc
Normal file
41
tools/testing/selftests/ftrace/test.d/remotes/unloading.tc
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# description: Test trace remote unloading
|
||||
# requires: remotes/test
|
||||
|
||||
. $TEST_DIR/remotes/functions
|
||||
|
||||
test_unloading()
|
||||
{
|
||||
# No reader, writing
|
||||
assert_loaded
|
||||
|
||||
# No reader, no writing
|
||||
echo 0 > tracing_on
|
||||
assert_unloaded
|
||||
|
||||
# 1 reader, no writing
|
||||
cat trace_pipe &
|
||||
pid=$!
|
||||
sleep 1
|
||||
assert_loaded
|
||||
kill $pid
|
||||
assert_unloaded
|
||||
|
||||
# No reader, no writing, events
|
||||
echo 1 > tracing_on
|
||||
echo 1 > write_event
|
||||
echo 0 > tracing_on
|
||||
assert_loaded
|
||||
|
||||
# Test reset
|
||||
clear_trace
|
||||
assert_unloaded
|
||||
}
|
||||
|
||||
if [ -z "$SOURCE_REMOTE_TEST" ]; then
|
||||
set -e
|
||||
|
||||
setup_remote_test
|
||||
test_unloading
|
||||
fi
|
||||
Loading…
Reference in New Issue
Block a user