mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
drm/xe: Convert GT stats to per-cpu counters
Current GT statistics use atomic64_t counters. Atomic operations incur a global coherency penalty. Transition to dynamic per-cpu counters using alloc_percpu(). This allows stats to be incremented via this_cpu_add(), which compiles to a single non-locking instruction. This approach keeps the hot-path updates local to the CPU, avoiding expensive cross-core cache invalidation traffic. Use for_each_possible_cpu() during aggregation and clear operations to ensure data consistency across CPU hotplug events. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Stuart Summers <stuart.summers@intel.com> Link: https://patch.msgid.link/20260217200552.596718-1-matthew.brost@intel.com
This commit is contained in:
parent
48eb073c7d
commit
9ff885ef8b
|
|
@ -33,6 +33,7 @@
|
|||
#include "xe_gt_printk.h"
|
||||
#include "xe_gt_sriov_pf.h"
|
||||
#include "xe_gt_sriov_vf.h"
|
||||
#include "xe_gt_stats.h"
|
||||
#include "xe_gt_sysfs.h"
|
||||
#include "xe_gt_topology.h"
|
||||
#include "xe_guc_exec_queue_types.h"
|
||||
|
|
@ -455,6 +456,10 @@ int xe_gt_init_early(struct xe_gt *gt)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
err = xe_gt_stats_init(gt);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
|
||||
if (!fw_ref.domains)
|
||||
return -ETIMEDOUT;
|
||||
|
|
|
|||
|
|
@ -3,12 +3,37 @@
|
|||
* Copyright © 2024 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
|
||||
#include "xe_device.h"
|
||||
#include "xe_gt_stats.h"
|
||||
#include "xe_gt_types.h"
|
||||
|
||||
static void xe_gt_stats_fini(struct drm_device *drm, void *arg)
|
||||
{
|
||||
struct xe_gt *gt = arg;
|
||||
|
||||
free_percpu(gt->stats);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_gt_stats_init() - Initialize GT statistics
|
||||
* @gt: GT structure
|
||||
*
|
||||
* Allocate per-CPU GT statistics. Using per-CPU stats allows increments
|
||||
* to occur without cross-CPU atomics.
|
||||
*
|
||||
* Return: 0 on success, -ENOMEM on failure.
|
||||
*/
|
||||
int xe_gt_stats_init(struct xe_gt *gt)
|
||||
{
|
||||
gt->stats = alloc_percpu(struct xe_gt_stats);
|
||||
if (!gt->stats)
|
||||
return -ENOMEM;
|
||||
|
||||
return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini,
|
||||
gt);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_gt_stats_incr - Increments the specified stats counter
|
||||
|
|
@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
|
|||
if (id >= __XE_GT_STATS_NUM_IDS)
|
||||
return;
|
||||
|
||||
atomic64_add(incr, >->stats.counters[id]);
|
||||
this_cpu_add(gt->stats->counters[id], incr);
|
||||
}
|
||||
|
||||
#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
|
||||
|
|
@ -94,23 +119,37 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
|
|||
{
|
||||
enum xe_gt_stats_id id;
|
||||
|
||||
for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
|
||||
drm_printf(p, "%s: %lld\n", stat_description[id],
|
||||
atomic64_read(>->stats.counters[id]));
|
||||
for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) {
|
||||
u64 total = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
|
||||
|
||||
total += s->counters[id];
|
||||
}
|
||||
|
||||
drm_printf(p, "%s: %lld\n", stat_description[id], total);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_gt_stats_clear - Clear the GT stats
|
||||
* xe_gt_stats_clear() - Clear the GT stats
|
||||
* @gt: GT structure
|
||||
*
|
||||
* This clear (zeros) all the available GT stats.
|
||||
* Clear (zero) all available GT stats. Note that if the stats are being
|
||||
* updated while this function is running, the results may be unpredictable.
|
||||
* Intended to be called on an idle GPU.
|
||||
*/
|
||||
void xe_gt_stats_clear(struct xe_gt *gt)
|
||||
{
|
||||
int id;
|
||||
int cpu;
|
||||
|
||||
for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
|
||||
atomic64_set(>->stats.counters[id], 0);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu);
|
||||
|
||||
memset(s, 0, sizeof(*s));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,10 +14,16 @@ struct xe_gt;
|
|||
struct drm_printer;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
int xe_gt_stats_init(struct xe_gt *gt);
|
||||
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
|
||||
void xe_gt_stats_clear(struct xe_gt *gt);
|
||||
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
|
||||
#else
|
||||
static inline int xe_gt_stats_init(struct xe_gt *gt)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id,
|
||||
int incr)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef _XE_GT_STATS_TYPES_H_
|
||||
#define _XE_GT_STATS_TYPES_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
enum xe_gt_stats_id {
|
||||
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
|
||||
XE_GT_STATS_ID_TLB_INVAL,
|
||||
|
|
@ -58,4 +60,21 @@ enum xe_gt_stats_id {
|
|||
__XE_GT_STATS_NUM_IDS,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct xe_gt_stats - Per-CPU GT statistics counters
|
||||
* @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id
|
||||
*
|
||||
* This structure is used for high-frequency, per-CPU statistics collection
|
||||
* in the Xe driver. By using a per-CPU allocation and ensuring the structure
|
||||
* is cache-line aligned, we avoid the performance-heavy atomics and cache
|
||||
* coherency traffic.
|
||||
*
|
||||
* Updates to these counters should be performed using the this_cpu_add()
|
||||
* macro to ensure they are atomic with respect to local interrupts and
|
||||
* preemption-safe without the overhead of explicit locking.
|
||||
*/
|
||||
struct xe_gt_stats {
|
||||
u64 counters[__XE_GT_STATS_NUM_IDS];
|
||||
} ____cacheline_aligned;
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -158,10 +158,7 @@ struct xe_gt {
|
|||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
||||
/** @stats: GT stats */
|
||||
struct {
|
||||
/** @stats.counters: counters for various GT stats */
|
||||
atomic64_t counters[__XE_GT_STATS_NUM_IDS];
|
||||
} stats;
|
||||
struct xe_gt_stats __percpu *stats;
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user