mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
A few resctrl features and hooks need to be provided, but aren't needed or supported on MPAM platforms. resctrl has individual hooks to separately enable and disable the closid/partid and rmid/pmg context switching code. For MPAM this is all the same thing, as the value in struct task_struct is used to cache the value that should be written to hardware. arm64's context switching code is enabled once MPAM is usable, but doesn't touch the hardware unless the value has changed. For now event configuration is not supported, and can be turned off by returning 'false' from resctrl_arch_is_evt_configurable(). The new io_alloc feature is not supported either, always return false from the enable helper to indicate and fail the enable. Add this, and empty definitions for the other hooks. Tested-by: Gavin Shan <gshan@redhat.com> Tested-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Zeng Heng <zengheng4@huawei.com> Tested-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com> Tested-by: Jesse Chick <jessechick@os.amperecomputing.com> Reviewed-by: Zeng Heng <zengheng4@huawei.com> Reviewed-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Co-developed-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: Ben Horgan <ben.horgan@arm.com> Signed-off-by: James Morse <james.morse@arm.com>
1644 lines
42 KiB
C
1644 lines
42 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2025 Arm Ltd.
|
|
|
|
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
|
|
|
|
#include <linux/arm_mpam.h>
|
|
#include <linux/cacheinfo.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/limits.h>
|
|
#include <linux/list.h>
|
|
#include <linux/math.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/rculist.h>
|
|
#include <linux/resctrl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/types.h>
|
|
#include <linux/wait.h>
|
|
|
|
#include <asm/mpam.h>
|
|
|
|
#include "mpam_internal.h"
|
|
|
|
DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
|
|
|
|
/*
|
|
* The classes we've picked to map to resctrl resources, wrapped
|
|
* in with their resctrl structure.
|
|
* Class pointer may be NULL.
|
|
*/
|
|
static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
|
|
|
|
#define for_each_mpam_resctrl_control(res, rid) \
|
|
for (rid = 0, res = &mpam_resctrl_controls[rid]; \
|
|
rid < RDT_NUM_RESOURCES; \
|
|
rid++, res = &mpam_resctrl_controls[rid])
|
|
|
|
/*
|
|
* The classes we've picked to map to resctrl events.
|
|
* Resctrl believes all the worlds a Xeon, and these are all on the L3. This
|
|
* array lets us find the actual class backing the event counters. e.g.
|
|
* the only memory bandwidth counters may be on the memory controller, but to
|
|
* make use of them, we pretend they are on L3. Restrict the events considered
|
|
* to those supported by MPAM.
|
|
* Class pointer may be NULL.
|
|
*/
|
|
#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID
|
|
static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
|
|
|
|
#define for_each_mpam_resctrl_mon(mon, eventid) \
|
|
for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \
|
|
eventid <= MPAM_MAX_EVENT; \
|
|
eventid++, mon = &mpam_resctrl_counters[eventid])
|
|
|
|
/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
|
|
static DEFINE_MUTEX(domain_list_lock);
|
|
|
|
/*
|
|
* MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
|
|
* This applies globally to all traffic the CPU generates.
|
|
*/
|
|
static bool cdp_enabled;
|
|
|
|
/*
|
|
* We use cacheinfo to discover the size of the caches and their id. cacheinfo
|
|
* populates this from a device_initcall(). mpam_resctrl_setup() must wait.
|
|
*/
|
|
static bool cacheinfo_ready;
|
|
static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready);
|
|
|
|
bool resctrl_arch_alloc_capable(void)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
if (res->resctrl_res.alloc_capable)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool resctrl_arch_mon_capable(void)
|
|
{
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
|
|
/* All monitors are presented as being on the L3 cache */
|
|
return l3->mon_capable;
|
|
}
|
|
|
|
bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_read(void *info)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_mon_event_config_write(void *info)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 closid, u32 rmid, int cntr_id,
|
|
enum resctrl_event_id eventid)
|
|
{
|
|
}
|
|
|
|
void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
enum resctrl_event_id evtid, u32 rmid, u32 closid,
|
|
u32 cntr_id, bool assign)
|
|
{
|
|
}
|
|
|
|
int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
|
|
u32 unused, u32 rmid, int cntr_id,
|
|
enum resctrl_event_id eventid, u64 *val)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
void resctrl_arch_pre_mount(void)
|
|
{
|
|
}
|
|
|
|
bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
|
|
{
|
|
return mpam_resctrl_controls[rid].cdp_enabled;
|
|
}
|
|
|
|
/**
|
|
* resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks.
|
|
*
|
|
* At boot, all existing tasks use partid zero for D and I.
|
|
* To enable/disable CDP emulation, all these tasks need relabelling.
|
|
*/
|
|
static void resctrl_reset_task_closids(void)
|
|
{
|
|
struct task_struct *p, *t;
|
|
|
|
read_lock(&tasklist_lock);
|
|
for_each_process_thread(p, t) {
|
|
resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID,
|
|
RESCTRL_RESERVED_RMID);
|
|
}
|
|
read_unlock(&tasklist_lock);
|
|
}
|
|
|
|
int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable)
|
|
{
|
|
u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID;
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
int cpu;
|
|
|
|
if (!IS_ENABLED(CONFIG_EXPERT) && enable) {
|
|
/*
|
|
* If the resctrl fs is mounted more than once, sequentially,
|
|
* then CDP can lead to the use of out of range PARTIDs.
|
|
*/
|
|
pr_warn("CDP not supported\n");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (enable)
|
|
pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n");
|
|
|
|
/*
|
|
* resctrl_arch_set_cdp_enabled() is only called with enable set to
|
|
* false on error and unmount.
|
|
*/
|
|
cdp_enabled = enable;
|
|
mpam_resctrl_controls[rid].cdp_enabled = enable;
|
|
|
|
if (enable)
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2;
|
|
else
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
|
|
|
|
/* The mbw_max feature can't hide cdp as it's a per-partid maximum. */
|
|
if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled)
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false;
|
|
|
|
if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled &&
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].class)
|
|
mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true;
|
|
|
|
if (enable) {
|
|
if (mpam_partid_max < 1)
|
|
return -EINVAL;
|
|
|
|
partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA);
|
|
partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE);
|
|
}
|
|
|
|
mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0);
|
|
WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current));
|
|
|
|
resctrl_reset_task_closids();
|
|
|
|
for_each_possible_cpu(cpu)
|
|
mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0);
|
|
on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid)
|
|
{
|
|
return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid);
|
|
}
|
|
|
|
/*
|
|
* MSC may raise an error interrupt if it sees an out or range partid/pmg,
|
|
* and go on to truncate the value. Regardless of what the hardware supports,
|
|
* only the system wide safe value is safe to use.
|
|
*/
|
|
u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored)
|
|
{
|
|
return mpam_partid_max + 1;
|
|
}
|
|
|
|
u32 resctrl_arch_system_num_rmid_idx(void)
|
|
{
|
|
return (mpam_pmg_max + 1) * (mpam_partid_max + 1);
|
|
}
|
|
|
|
u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
|
|
{
|
|
return closid * (mpam_pmg_max + 1) + rmid;
|
|
}
|
|
|
|
void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
|
|
{
|
|
*closid = idx / (mpam_pmg_max + 1);
|
|
*rmid = idx % (mpam_pmg_max + 1);
|
|
}
|
|
|
|
void resctrl_arch_sched_in(struct task_struct *tsk)
|
|
{
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
mpam_thread_switch(tsk);
|
|
}
|
|
|
|
void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid)
|
|
{
|
|
WARN_ON_ONCE(closid > U16_MAX);
|
|
WARN_ON_ONCE(rmid > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid);
|
|
} else {
|
|
/*
|
|
* When CDP is enabled, resctrl halves the closid range and we
|
|
* use odd/even partid for one closid.
|
|
*/
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid);
|
|
}
|
|
}
|
|
|
|
void resctrl_arch_sync_cpu_closid_rmid(void *info)
|
|
{
|
|
struct resctrl_cpu_defaults *r = info;
|
|
|
|
lockdep_assert_preemption_disabled();
|
|
|
|
if (r) {
|
|
resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(),
|
|
r->closid, r->rmid);
|
|
}
|
|
|
|
resctrl_arch_sched_in(current);
|
|
}
|
|
|
|
void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
WARN_ON_ONCE(closid > U16_MAX);
|
|
WARN_ON_ONCE(rmid > U8_MAX);
|
|
|
|
if (!cdp_enabled) {
|
|
mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid);
|
|
} else {
|
|
u32 partid_d = resctrl_get_config_index(closid, CDP_DATA);
|
|
u32 partid_i = resctrl_get_config_index(closid, CDP_CODE);
|
|
|
|
mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid);
|
|
}
|
|
}
|
|
|
|
bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return tsk_closid == closid;
|
|
}
|
|
|
|
/* The task's pmg is not unique, the partid must be considered too */
|
|
bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid)
|
|
{
|
|
u64 regval = mpam_get_regval(tsk);
|
|
u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval);
|
|
u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval);
|
|
|
|
if (cdp_enabled)
|
|
tsk_closid >>= 1;
|
|
|
|
return (tsk_closid == closid) && (tsk_rmid == rmid);
|
|
}
|
|
|
|
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
|
|
{
|
|
if (l >= RDT_NUM_RESOURCES)
|
|
return NULL;
|
|
|
|
return &mpam_resctrl_controls[l].resctrl_res;
|
|
}
|
|
|
|
static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid)
|
|
{
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
|
|
|
|
if (!mon->class)
|
|
return -EINVAL;
|
|
|
|
switch (evtid) {
|
|
case QOS_L3_OCCUP_EVENT_ID:
|
|
/* With CDP, one monitor gets used for both code/data reads */
|
|
return mpam_alloc_csu_mon(mon->class);
|
|
case QOS_L3_MBM_LOCAL_EVENT_ID:
|
|
case QOS_L3_MBM_TOTAL_EVENT_ID:
|
|
return USE_PRE_ALLOCATED;
|
|
default:
|
|
return -EOPNOTSUPP;
|
|
}
|
|
}
|
|
|
|
void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r,
|
|
enum resctrl_event_id evtid)
|
|
{
|
|
DEFINE_WAIT(wait);
|
|
int *ret;
|
|
|
|
ret = kmalloc_obj(*ret);
|
|
if (!ret)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
do {
|
|
prepare_to_wait(&resctrl_mon_ctx_waiters, &wait,
|
|
TASK_INTERRUPTIBLE);
|
|
*ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid);
|
|
if (*ret == -ENOSPC)
|
|
schedule();
|
|
} while (*ret == -ENOSPC && !signal_pending(current));
|
|
finish_wait(&resctrl_mon_ctx_waiters, &wait);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid,
|
|
u32 mon_idx)
|
|
{
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid];
|
|
|
|
if (!mon->class)
|
|
return;
|
|
|
|
if (evtid == QOS_L3_OCCUP_EVENT_ID)
|
|
mpam_free_csu_mon(mon->class, mon_idx);
|
|
|
|
wake_up(&resctrl_mon_ctx_waiters);
|
|
}
|
|
|
|
void resctrl_arch_mon_ctx_free(struct rdt_resource *r,
|
|
enum resctrl_event_id evtid, void *arch_mon_ctx)
|
|
{
|
|
u32 mon_idx = *(u32 *)arch_mon_ctx;
|
|
|
|
kfree(arch_mon_ctx);
|
|
|
|
resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx);
|
|
}
|
|
|
|
static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
|
|
enum mpam_device_features mon_type,
|
|
int mon_idx,
|
|
enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
|
|
{
|
|
struct mon_cfg cfg;
|
|
|
|
if (!mpam_is_enabled())
|
|
return -EINVAL;
|
|
|
|
/* Shift closid to account for CDP */
|
|
closid = resctrl_get_config_index(closid, cdp_type);
|
|
|
|
if (irqs_disabled()) {
|
|
/* Check if we can access this domain without an IPI */
|
|
return -EIO;
|
|
}
|
|
|
|
cfg = (struct mon_cfg) {
|
|
.mon = mon_idx,
|
|
.match_pmg = true,
|
|
.partid = closid,
|
|
.pmg = rmid,
|
|
};
|
|
|
|
return mpam_msmon_read(mon_comp, &cfg, mon_type, val);
|
|
}
|
|
|
|
static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
|
|
enum mpam_device_features mon_type,
|
|
int mon_idx, u32 closid, u32 rmid, u64 *val)
|
|
{
|
|
if (cdp_enabled) {
|
|
u64 code_val = 0, data_val = 0;
|
|
int err;
|
|
|
|
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_CODE, closid, rmid, &code_val);
|
|
if (err)
|
|
return err;
|
|
|
|
err = __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_DATA, closid, rmid, &data_val);
|
|
if (err)
|
|
return err;
|
|
|
|
*val += code_val + data_val;
|
|
return 0;
|
|
}
|
|
|
|
return __read_mon(mon, mon_comp, mon_type, mon_idx,
|
|
CDP_NONE, closid, rmid, val);
|
|
}
|
|
|
|
/* MBWU when not in ABMC mode (not supported), and CSU counters. */
|
|
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
|
|
u32 closid, u32 rmid, enum resctrl_event_id eventid,
|
|
void *arch_priv, u64 *val, void *arch_mon_ctx)
|
|
{
|
|
struct mpam_resctrl_dom *l3_dom;
|
|
struct mpam_component *mon_comp;
|
|
u32 mon_idx = *(u32 *)arch_mon_ctx;
|
|
enum mpam_device_features mon_type;
|
|
struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
|
|
|
|
resctrl_arch_rmid_read_context_check();
|
|
|
|
if (eventid >= QOS_NUM_EVENTS || !mon->class)
|
|
return -EINVAL;
|
|
|
|
l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
|
|
mon_comp = l3_dom->mon_comp[eventid];
|
|
|
|
if (eventid != QOS_L3_OCCUP_EVENT_ID)
|
|
return -EINVAL;
|
|
|
|
mon_type = mpam_feat_msmon_csu;
|
|
|
|
return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
|
|
closid, rmid, val);
|
|
}
|
|
|
|
/*
|
|
* The rmid realloc threshold should be for the smallest cache exposed to
|
|
* resctrl.
|
|
*/
|
|
static int update_rmid_limits(struct mpam_class *class)
|
|
{
|
|
u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx();
|
|
struct mpam_props *cprops = &class->props;
|
|
struct cacheinfo *ci;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
return 0;
|
|
|
|
/*
|
|
* Assume cache levels are the same size for all CPUs...
|
|
* The check just requires any online CPU and it can't go offline as we
|
|
* hold the cpu lock.
|
|
*/
|
|
ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level);
|
|
if (!ci || ci->size == 0) {
|
|
pr_debug("Could not read cache size for class %u\n",
|
|
class->level);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!resctrl_rmid_realloc_limit ||
|
|
ci->size < resctrl_rmid_realloc_limit) {
|
|
resctrl_rmid_realloc_limit = ci->size;
|
|
resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool cache_has_usable_cpor(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_cpor_part, cprops))
|
|
return false;
|
|
|
|
/* resctrl uses u32 for all bitmap configurations */
|
|
return class->props.cpbm_wd <= 32;
|
|
}
|
|
|
|
static bool mba_class_use_mbw_max(struct mpam_props *cprops)
|
|
{
|
|
return (mpam_has_feature(mpam_feat_mbw_max, cprops) &&
|
|
cprops->bwa_wd);
|
|
}
|
|
|
|
static bool class_has_usable_mba(struct mpam_props *cprops)
|
|
{
|
|
return mba_class_use_mbw_max(cprops);
|
|
}
|
|
|
|
static bool cache_has_usable_csu(struct mpam_class *class)
|
|
{
|
|
struct mpam_props *cprops;
|
|
|
|
if (!class)
|
|
return false;
|
|
|
|
cprops = &class->props;
|
|
|
|
if (!mpam_has_feature(mpam_feat_msmon_csu, cprops))
|
|
return false;
|
|
|
|
/*
|
|
* CSU counters settle on the value, so we can get away with
|
|
* having only one.
|
|
*/
|
|
if (!cprops->num_csu_mon)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Calculate the worst-case percentage change from each implemented step
|
|
* in the control.
|
|
*/
|
|
static u32 get_mba_granularity(struct mpam_props *cprops)
|
|
{
|
|
if (!mba_class_use_mbw_max(cprops))
|
|
return 0;
|
|
|
|
/*
|
|
* bwa_wd is the number of bits implemented in the 0.xxx
|
|
* fixed point fraction. 1 bit is 50%, 2 is 25% etc.
|
|
*/
|
|
return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd);
|
|
}
|
|
|
|
/*
|
|
* Each fixed-point hardware value architecturally represents a range
|
|
* of values: the full range 0% - 100% is split contiguously into
|
|
* (1 << cprops->bwa_wd) equal bands.
|
|
*
|
|
* Although the bwa_bwd fields have 6 bits the maximum valid value is 16
|
|
* as it reports the width of fields that are at most 16 bits. When
|
|
* fewer than 16 bits are valid the least significant bits are
|
|
* ignored. The implied binary point is kept between bits 15 and 16 and
|
|
* so the valid bits are leftmost.
|
|
*
|
|
* See ARM IHI0099B.a "MPAM system component specification", Section 9.3,
|
|
* "The fixed-point fractional format" for more information.
|
|
*
|
|
* Find the nearest percentage value to the upper bound of the selected band:
|
|
*/
|
|
static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
|
|
{
|
|
u32 val = mbw_max;
|
|
|
|
val >>= 16 - cprops->bwa_wd;
|
|
val += 1;
|
|
val *= MAX_MBA_BW;
|
|
val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd);
|
|
|
|
return val;
|
|
}
|
|
|
|
/*
|
|
* Find the band whose upper bound is closest to the specified percentage.
|
|
*
|
|
* A round-to-nearest policy is followed here as a balanced compromise
|
|
* between unexpected under-commit of the resource (where the total of
|
|
* a set of resource allocations after conversion is less than the
|
|
* expected total, due to rounding of the individual converted
|
|
* percentages) and over-commit (where the total of the converted
|
|
* allocations is greater than expected).
|
|
*/
|
|
static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
|
|
{
|
|
u32 val = pc;
|
|
|
|
val <<= cprops->bwa_wd;
|
|
val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
|
|
val = max(val, 1) - 1;
|
|
val <<= 16 - cprops->bwa_wd;
|
|
|
|
return val;
|
|
}
|
|
|
|
static u32 get_mba_min(struct mpam_props *cprops)
|
|
{
|
|
if (!mba_class_use_mbw_max(cprops)) {
|
|
WARN_ON_ONCE(1);
|
|
return 0;
|
|
}
|
|
|
|
return mbw_max_to_percent(0, cprops);
|
|
}
|
|
|
|
/* Find the L3 cache that has affinity with this CPU */
|
|
static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask)
|
|
{
|
|
u32 cache_id = get_cpu_cacheinfo_id(cpu, 3);
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask);
|
|
}
|
|
|
|
/*
|
|
* topology_matches_l3() - Is the provided class the same shape as L3
|
|
* @victim: The class we'd like to pretend is L3.
|
|
*
|
|
* resctrl expects all the world's a Xeon, and all counters are on the
|
|
* L3. We allow some mapping counters on other classes. This requires
|
|
* that the CPU->domain mapping is the same kind of shape.
|
|
*
|
|
* Using cacheinfo directly would make this work even if resctrl can't
|
|
* use the L3 - but cacheinfo can't tell us anything about offline CPUs.
|
|
* Using the L3 resctrl domain list also depends on CPUs being online.
|
|
* Using the mpam_class we picked for L3 so we can use its domain list
|
|
* assumes that there are MPAM controls on the L3.
|
|
* Instead, this path eventually uses the mpam_get_cpumask_from_cache_id()
|
|
* helper which can tell us about offline CPUs ... but getting the cache_id
|
|
* to start with relies on at least one CPU per L3 cache being online at
|
|
* boot.
|
|
*
|
|
* Walk the victim component list and compare the affinity mask with the
|
|
* corresponding L3. The topology matches if each victim:component's affinity
|
|
* mask is the same as the CPU's corresponding L3's. These lists/masks are
|
|
* computed from firmware tables so don't change at runtime.
|
|
*/
|
|
static bool topology_matches_l3(struct mpam_class *victim)
|
|
{
|
|
int cpu, err;
|
|
struct mpam_component *victim_iter;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
|
|
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
|
|
return false;
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(victim_iter, &victim->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_empty(&victim_iter->affinity)) {
|
|
pr_debug("class %u has CPU-less component %u - can't match L3!\n",
|
|
victim->level, victim_iter->comp_id);
|
|
return false;
|
|
}
|
|
|
|
cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
|
|
if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
|
|
return false;
|
|
|
|
cpumask_clear(tmp_cpumask);
|
|
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
|
|
if (err) {
|
|
pr_debug("Failed to find L3's equivalent component to class %u component %u\n",
|
|
victim->level, victim_iter->comp_id);
|
|
return false;
|
|
}
|
|
|
|
/* Any differing bits in the affinity mask? */
|
|
if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) {
|
|
pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n"
|
|
"L3:%*pbl != victim:%*pbl\n",
|
|
victim->level, victim_iter->comp_id,
|
|
cpumask_pr_args(tmp_cpumask),
|
|
cpumask_pr_args(&victim_iter->affinity));
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Test if the traffic for a class matches that at egress from the L3. For
|
|
* MSC at memory controllers this is only possible if there is a single L3
|
|
* as otherwise the counters at the memory can include bandwidth from the
|
|
* non-local L3.
|
|
*/
|
|
static bool traffic_matches_l3(struct mpam_class *class)
|
|
{
|
|
int err, cpu;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (class->type == MPAM_CLASS_CACHE && class->level == 3)
|
|
return true;
|
|
|
|
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
|
|
pr_debug("class %u is a different cache from L3\n", class->level);
|
|
return false;
|
|
}
|
|
|
|
if (class->type != MPAM_CLASS_MEMORY) {
|
|
pr_debug("class %u is neither of type cache or memory\n", class->level);
|
|
return false;
|
|
}
|
|
|
|
cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL;
|
|
if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
|
|
pr_debug("cpumask allocation failed\n");
|
|
return false;
|
|
}
|
|
|
|
cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
|
|
err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
|
|
if (err) {
|
|
pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
|
|
return false;
|
|
}
|
|
|
|
if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
|
|
pr_debug("There is more than one L3\n");
|
|
return false;
|
|
}
|
|
|
|
/* Be strict; the traffic might stop in the intermediate cache. */
|
|
if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
|
|
pr_debug("L3 isn't the last level of cache\n");
|
|
return false;
|
|
}
|
|
|
|
if (num_possible_nodes() > 1) {
|
|
pr_debug("There is more than one numa node\n");
|
|
return false;
|
|
}
|
|
|
|
#ifdef CONFIG_HMEM_REPORTING
|
|
if (node_devices[cpu_to_node(cpu)]->cache_dev) {
|
|
pr_debug("There is a memory side cache\n");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
|
|
static void mpam_resctrl_pick_caches(void)
|
|
{
|
|
struct mpam_class *class;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (class->type != MPAM_CLASS_CACHE) {
|
|
pr_debug("class %u is not a cache\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (class->level != 2 && class->level != 3) {
|
|
pr_debug("class %u is not L2 or L3\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cache_has_usable_cpor(class)) {
|
|
pr_debug("class %u cache misses CPOR\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level,
|
|
cpumask_pr_args(&class->affinity),
|
|
cpumask_pr_args(cpu_possible_mask));
|
|
continue;
|
|
}
|
|
|
|
if (class->level == 2)
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_L2];
|
|
else
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
res->class = class;
|
|
}
|
|
}
|
|
|
|
static void mpam_resctrl_pick_mba(void)
|
|
{
|
|
struct mpam_class *class, *candidate_class = NULL;
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
struct mpam_props *cprops = &class->props;
|
|
|
|
if (class->level != 3 && class->type == MPAM_CLASS_CACHE) {
|
|
pr_debug("class %u is a cache but not the L3\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!class_has_usable_mba(cprops)) {
|
|
pr_debug("class %u has no bandwidth control\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u has missing CPUs\n", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!topology_matches_l3(class)) {
|
|
pr_debug("class %u topology doesn't match L3\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!traffic_matches_l3(class)) {
|
|
pr_debug("class %u traffic doesn't match L3 egress\n",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Pick a resource to be MBA that as close as possible to
|
|
* the L3. mbm_total counts the bandwidth leaving the L3
|
|
* cache and MBA should correspond as closely as possible
|
|
* for proper operation of mba_sc.
|
|
*/
|
|
if (!candidate_class || class->level < candidate_class->level)
|
|
candidate_class = class;
|
|
}
|
|
|
|
if (candidate_class) {
|
|
pr_debug("selected class %u to back MBA\n",
|
|
candidate_class->level);
|
|
res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
|
|
res->class = candidate_class;
|
|
}
|
|
}
|
|
|
|
static void counter_update_class(enum resctrl_event_id evt_id,
|
|
struct mpam_class *class)
|
|
{
|
|
struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class;
|
|
|
|
if (existing_class) {
|
|
if (class->level == 3) {
|
|
pr_debug("Existing class is L3 - L3 wins\n");
|
|
return;
|
|
}
|
|
|
|
if (existing_class->level < class->level) {
|
|
pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n",
|
|
existing_class->level, class->level);
|
|
return;
|
|
}
|
|
}
|
|
|
|
mpam_resctrl_counters[evt_id].class = class;
|
|
}
|
|
|
|
static void mpam_resctrl_pick_counters(void)
|
|
{
|
|
struct mpam_class *class;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(class, &mpam_classes, classes_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
/* The name of the resource is L3... */
|
|
if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
|
|
pr_debug("class %u is a cache but not the L3", class->level);
|
|
continue;
|
|
}
|
|
|
|
if (!cpumask_equal(&class->affinity, cpu_possible_mask)) {
|
|
pr_debug("class %u does not cover all CPUs",
|
|
class->level);
|
|
continue;
|
|
}
|
|
|
|
if (cache_has_usable_csu(class)) {
|
|
pr_debug("class %u has usable CSU",
|
|
class->level);
|
|
|
|
/* CSU counters only make sense on a cache. */
|
|
switch (class->type) {
|
|
case MPAM_CLASS_CACHE:
|
|
if (update_rmid_limits(class))
|
|
break;
|
|
|
|
counter_update_class(QOS_L3_OCCUP_EVENT_ID, class);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int mpam_resctrl_control_init(struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_class *class = res->class;
|
|
struct mpam_props *cprops = &class->props;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
r->schema_fmt = RESCTRL_SCHEMA_BITMAP;
|
|
r->cache.arch_has_sparse_bitmasks = true;
|
|
|
|
r->cache.cbm_len = class->props.cpbm_wd;
|
|
/* mpam_devices will reject empty bitmaps */
|
|
r->cache.min_cbm_bits = 1;
|
|
|
|
if (r->rid == RDT_RESOURCE_L2) {
|
|
r->name = "L2";
|
|
r->ctrl_scope = RESCTRL_L2_CACHE;
|
|
r->cdp_capable = true;
|
|
} else {
|
|
r->name = "L3";
|
|
r->ctrl_scope = RESCTRL_L3_CACHE;
|
|
r->cdp_capable = true;
|
|
}
|
|
|
|
/*
|
|
* Which bits are shared with other ...things... Unknown
|
|
* devices use partid-0 which uses all the bitmap fields. Until
|
|
* we have configured the SMMU and GIC not to do this 'all the
|
|
* bits' is the correct answer here.
|
|
*/
|
|
r->cache.shareable_bits = resctrl_get_default_ctrl(r);
|
|
r->alloc_capable = true;
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
r->schema_fmt = RESCTRL_SCHEMA_RANGE;
|
|
r->ctrl_scope = RESCTRL_L3_CACHE;
|
|
|
|
r->membw.delay_linear = true;
|
|
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
|
|
r->membw.min_bw = get_mba_min(cprops);
|
|
r->membw.max_bw = MAX_MBA_BW;
|
|
r->membw.bw_gran = get_mba_granularity(cprops);
|
|
|
|
r->name = "MB";
|
|
r->alloc_capable = true;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
|
|
{
|
|
struct mpam_class *class = comp->class;
|
|
|
|
if (class->type == MPAM_CLASS_CACHE)
|
|
return comp->comp_id;
|
|
|
|
if (topology_matches_l3(class)) {
|
|
/* Use the corresponding L3 component ID as the domain ID */
|
|
int id = get_cpu_cacheinfo_id(cpu, 3);
|
|
|
|
/* Implies topology_matches_l3() made a mistake */
|
|
if (WARN_ON_ONCE(id == -1))
|
|
return comp->comp_id;
|
|
|
|
return id;
|
|
}
|
|
|
|
/* Otherwise, expose the ID used by the firmware table code. */
|
|
return comp->comp_id;
|
|
}
|
|
|
|
static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
|
|
enum resctrl_event_id type)
|
|
{
|
|
struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
struct rdt_resource *l3 = &res->resctrl_res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
/*
|
|
* There also needs to be an L3 cache present.
|
|
* The check just requires any online CPU and it can't go offline as we
|
|
* hold the cpu lock.
|
|
*/
|
|
if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1)
|
|
return 0;
|
|
|
|
/*
|
|
* If there are no MPAM resources on L3, force it into existence.
|
|
* topology_matches_l3() already ensures this looks like the L3.
|
|
* The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init().
|
|
*/
|
|
if (!res->class) {
|
|
pr_warn_once("Faking L3 MSC to enable counters.\n");
|
|
res->class = mpam_resctrl_counters[type].class;
|
|
}
|
|
|
|
/*
|
|
* Called multiple times!, once per event type that has a
|
|
* monitoring class.
|
|
* Setting name is necessary on monitor only platforms.
|
|
*/
|
|
l3->name = "L3";
|
|
l3->mon_scope = RESCTRL_L3_CACHE;
|
|
|
|
/*
|
|
* num-rmid is the upper bound for the number of monitoring groups that
|
|
* can exist simultaneously, including the default monitoring group for
|
|
* each control group. Hence, advertise the whole rmid_idx space even
|
|
* though each control group has its own pmg/rmid space. Unfortunately,
|
|
* this does mean userspace needs to know the architecture to correctly
|
|
* interpret this value.
|
|
*/
|
|
l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
|
|
|
|
if (resctrl_enable_mon_event(type, false, 0, NULL))
|
|
l3->mon_capable = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
|
|
u32 closid, enum resctrl_conf_type type)
|
|
{
|
|
u32 partid;
|
|
struct mpam_config *cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
enum mpam_device_features configured_by;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return resctrl_get_default_ctrl(r);
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
/*
|
|
* When CDP is enabled, but the resource doesn't support it,
|
|
* the control is cloned across both partids.
|
|
* Pick one at random to read:
|
|
*/
|
|
if (mpam_resctrl_hide_cdp(r->rid))
|
|
type = CDP_DATA;
|
|
|
|
partid = resctrl_get_config_index(closid, type);
|
|
cfg = &dom->ctrl_comp->cfg[partid];
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
configured_by = mpam_feat_cpor_part;
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
configured_by = mpam_feat_mbw_max;
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return resctrl_get_default_ctrl(r);
|
|
}
|
|
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) ||
|
|
!mpam_has_feature(configured_by, cfg))
|
|
return resctrl_get_default_ctrl(r);
|
|
|
|
switch (configured_by) {
|
|
case mpam_feat_cpor_part:
|
|
return cfg->cpbm;
|
|
case mpam_feat_mbw_max:
|
|
return mbw_max_to_percent(cfg->mbw_max, cprops);
|
|
default:
|
|
return resctrl_get_default_ctrl(r);
|
|
}
|
|
}
|
|
|
|
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
|
|
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
|
|
{
|
|
int err;
|
|
u32 partid;
|
|
struct mpam_config cfg;
|
|
struct mpam_props *cprops;
|
|
struct mpam_resctrl_res *res;
|
|
struct mpam_resctrl_dom *dom;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
/*
|
|
* No need to check the CPU as mpam_apply_config() doesn't care, and
|
|
* resctrl_arch_update_domains() relies on this.
|
|
*/
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
|
|
cprops = &res->class->props;
|
|
|
|
if (mpam_resctrl_hide_cdp(r->rid))
|
|
t = CDP_DATA;
|
|
|
|
partid = resctrl_get_config_index(closid, t);
|
|
if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) {
|
|
pr_debug("Not alloc capable or computed PARTID out of range\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Copy the current config to avoid clearing other resources when the
|
|
* same component is exposed multiple times through resctrl.
|
|
*/
|
|
cfg = dom->ctrl_comp->cfg[partid];
|
|
|
|
switch (r->rid) {
|
|
case RDT_RESOURCE_L2:
|
|
case RDT_RESOURCE_L3:
|
|
cfg.cpbm = cfg_val;
|
|
mpam_set_feature(mpam_feat_cpor_part, &cfg);
|
|
break;
|
|
case RDT_RESOURCE_MBA:
|
|
if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
|
|
cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
|
|
mpam_set_feature(mpam_feat_mbw_max, &cfg);
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* When CDP is enabled, but the resource doesn't support it, we need to
|
|
* apply the same configuration to the other partid.
|
|
*/
|
|
if (mpam_resctrl_hide_cdp(r->rid)) {
|
|
partid = resctrl_get_config_index(closid, CDP_CODE);
|
|
err = mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
if (err)
|
|
return err;
|
|
|
|
partid = resctrl_get_config_index(closid, CDP_DATA);
|
|
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
}
|
|
|
|
return mpam_apply_config(dom->ctrl_comp, partid, &cfg);
|
|
}
|
|
|
|
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
|
|
{
|
|
int err;
|
|
struct rdt_ctrl_domain *d;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) {
|
|
for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) {
|
|
struct resctrl_staged_config *cfg = &d->staged_config[t];
|
|
|
|
if (!cfg->have_new_ctrl)
|
|
continue;
|
|
|
|
err = resctrl_arch_update_one(r, d, closid, t,
|
|
cfg->new_ctrl);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!mpam_is_enabled())
|
|
return;
|
|
|
|
res = container_of(r, struct mpam_resctrl_res, resctrl_res);
|
|
mpam_reset_class_locked(res->class);
|
|
}
|
|
|
|
static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
|
|
enum resctrl_res_level rid,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_cpus_held();
|
|
|
|
INIT_LIST_HEAD(&hdr->list);
|
|
hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
|
|
hdr->rid = rid;
|
|
cpumask_set_cpu(cpu, &hdr->cpu_mask);
|
|
}
|
|
|
|
static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_cpus_held();
|
|
|
|
cpumask_set_cpu(cpu, &hdr->cpu_mask);
|
|
}
|
|
|
|
/**
|
|
* mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
|
|
* @cpu: The CPU to remove from the domain.
|
|
* @hdr: The domain's header.
|
|
*
|
|
* Removes @cpu from the header mask. If this was the last CPU in the domain,
|
|
* the domain header is removed from its parent list and true is returned,
|
|
* indicating the parent structure can be freed.
|
|
* If there are other CPUs in the domain, returns false.
|
|
*/
|
|
static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
|
|
struct rdt_domain_hdr *hdr)
|
|
{
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
cpumask_clear_cpu(cpu, &hdr->cpu_mask);
|
|
if (cpumask_empty(&hdr->cpu_mask)) {
|
|
list_del_rcu(&hdr->list);
|
|
synchronize_rcu();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void mpam_resctrl_domain_insert(struct list_head *list,
|
|
struct rdt_domain_hdr *new)
|
|
{
|
|
struct rdt_domain_hdr *err;
|
|
struct list_head *pos = NULL;
|
|
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
err = resctrl_find_domain(list, new->id, &pos);
|
|
if (WARN_ON_ONCE(err))
|
|
return;
|
|
|
|
list_add_tail_rcu(&new->list, pos);
|
|
}
|
|
|
|
static struct mpam_component *find_component(struct mpam_class *class, int cpu)
|
|
{
|
|
struct mpam_component *comp;
|
|
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(comp, &class->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_test_cpu(cpu, &comp->affinity))
|
|
return comp;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct mpam_resctrl_dom *
|
|
mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
int err;
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_l3_mon_domain *mon_d;
|
|
struct rdt_ctrl_domain *ctrl_d;
|
|
struct mpam_class *class = res->class;
|
|
struct mpam_component *comp_iter, *ctrl_comp;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
lockdep_assert_held(&domain_list_lock);
|
|
|
|
ctrl_comp = NULL;
|
|
guard(srcu)(&mpam_srcu);
|
|
list_for_each_entry_srcu(comp_iter, &class->components, class_list,
|
|
srcu_read_lock_held(&mpam_srcu)) {
|
|
if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
|
|
ctrl_comp = comp_iter;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* class has no component for this CPU */
|
|
if (WARN_ON_ONCE(!ctrl_comp))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
|
|
if (!dom)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (r->alloc_capable) {
|
|
dom->ctrl_comp = ctrl_comp;
|
|
|
|
ctrl_d = &dom->resctrl_ctrl_dom;
|
|
mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr);
|
|
ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
|
|
err = resctrl_online_ctrl_domain(r, ctrl_d);
|
|
if (err)
|
|
goto free_domain;
|
|
|
|
mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
|
|
} else {
|
|
pr_debug("Skipped control domain online - no controls\n");
|
|
}
|
|
|
|
if (r->mon_capable) {
|
|
struct mpam_component *any_mon_comp;
|
|
struct mpam_resctrl_mon *mon;
|
|
enum resctrl_event_id eventid;
|
|
|
|
/*
|
|
* Even if the monitor domain is backed by a different
|
|
* component, the L3 component IDs need to be used... only
|
|
* there may be no ctrl_comp for the L3.
|
|
* Search each event's class list for a component with
|
|
* overlapping CPUs and set up the dom->mon_comp array.
|
|
*/
|
|
|
|
for_each_mpam_resctrl_mon(mon, eventid) {
|
|
struct mpam_component *mon_comp;
|
|
|
|
if (!mon->class)
|
|
continue; // dummy resource
|
|
|
|
mon_comp = find_component(mon->class, cpu);
|
|
dom->mon_comp[eventid] = mon_comp;
|
|
if (mon_comp)
|
|
any_mon_comp = mon_comp;
|
|
}
|
|
if (!any_mon_comp) {
|
|
WARN_ON_ONCE(0);
|
|
err = -EFAULT;
|
|
goto offline_ctrl_domain;
|
|
}
|
|
|
|
mon_d = &dom->resctrl_mon_dom;
|
|
mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
|
|
mon_d->hdr.type = RESCTRL_MON_DOMAIN;
|
|
err = resctrl_online_mon_domain(r, &mon_d->hdr);
|
|
if (err)
|
|
goto offline_ctrl_domain;
|
|
|
|
mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
|
|
} else {
|
|
pr_debug("Skipped monitor domain online - no monitors\n");
|
|
}
|
|
|
|
return dom;
|
|
|
|
offline_ctrl_domain:
|
|
if (r->alloc_capable) {
|
|
mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
|
|
resctrl_offline_ctrl_domain(r, ctrl_d);
|
|
}
|
|
free_domain:
|
|
kfree(dom);
|
|
dom = ERR_PTR(err);
|
|
|
|
return dom;
|
|
}
|
|
|
|
/*
|
|
* We know all the monitors are associated with the L3, even if there are no
|
|
* controls and therefore no control component. Find the cache-id for the CPU
|
|
* and use that to search for existing resctrl domains.
|
|
* This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
|
|
* for anything that is not a cache.
|
|
*/
|
|
static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
|
|
{
|
|
int cache_id;
|
|
struct mpam_resctrl_dom *dom;
|
|
struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (!l3->class)
|
|
return NULL;
|
|
cache_id = get_cpu_cacheinfo_id(cpu, 3);
|
|
if (cache_id < 0)
|
|
return NULL;
|
|
|
|
list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
|
|
if (dom->resctrl_mon_dom.hdr.id == cache_id)
|
|
return dom;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct mpam_resctrl_dom *
|
|
mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
|
|
{
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
|
|
if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
|
|
return dom;
|
|
}
|
|
|
|
if (r->rid != RDT_RESOURCE_L3)
|
|
return NULL;
|
|
|
|
/* Search the mon domain list too - needed on monitor only platforms. */
|
|
return mpam_resctrl_get_mon_domain_from_cpu(cpu);
|
|
}
|
|
|
|
int mpam_resctrl_online_cpu(unsigned int cpu)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
guard(mutex)(&domain_list_lock);
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
if (!res->class)
|
|
continue; // dummy_resource;
|
|
|
|
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
|
|
if (!dom) {
|
|
dom = mpam_resctrl_alloc_domain(cpu, res);
|
|
if (IS_ERR(dom))
|
|
return PTR_ERR(dom);
|
|
} else {
|
|
if (r->alloc_capable) {
|
|
struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom;
|
|
|
|
mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
|
|
}
|
|
if (r->mon_capable) {
|
|
struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom;
|
|
|
|
mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
|
|
}
|
|
}
|
|
}
|
|
|
|
resctrl_online_cpu(cpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void mpam_resctrl_offline_cpu(unsigned int cpu)
|
|
{
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
|
|
resctrl_offline_cpu(cpu);
|
|
|
|
guard(mutex)(&domain_list_lock);
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
struct mpam_resctrl_dom *dom;
|
|
struct rdt_l3_mon_domain *mon_d;
|
|
struct rdt_ctrl_domain *ctrl_d;
|
|
bool ctrl_dom_empty, mon_dom_empty;
|
|
struct rdt_resource *r = &res->resctrl_res;
|
|
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
|
|
if (WARN_ON_ONCE(!dom))
|
|
continue;
|
|
|
|
if (r->alloc_capable) {
|
|
ctrl_d = &dom->resctrl_ctrl_dom;
|
|
ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
|
|
if (ctrl_dom_empty)
|
|
resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
|
|
} else {
|
|
ctrl_dom_empty = true;
|
|
}
|
|
|
|
if (r->mon_capable) {
|
|
mon_d = &dom->resctrl_mon_dom;
|
|
mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
|
|
if (mon_dom_empty)
|
|
resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
|
|
} else {
|
|
mon_dom_empty = true;
|
|
}
|
|
|
|
if (ctrl_dom_empty && mon_dom_empty)
|
|
kfree(dom);
|
|
}
|
|
}
|
|
|
|
int mpam_resctrl_setup(void)
|
|
{
|
|
int err = 0;
|
|
struct mpam_resctrl_res *res;
|
|
enum resctrl_res_level rid;
|
|
struct mpam_resctrl_mon *mon;
|
|
enum resctrl_event_id eventid;
|
|
|
|
wait_event(wait_cacheinfo_ready, cacheinfo_ready);
|
|
|
|
cpus_read_lock();
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
|
|
INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
|
|
res->resctrl_res.rid = rid;
|
|
}
|
|
|
|
/* Find some classes to use for controls */
|
|
mpam_resctrl_pick_caches();
|
|
mpam_resctrl_pick_mba();
|
|
|
|
/* Initialise the resctrl structures from the classes */
|
|
for_each_mpam_resctrl_control(res, rid) {
|
|
if (!res->class)
|
|
continue; // dummy resource
|
|
|
|
err = mpam_resctrl_control_init(res);
|
|
if (err) {
|
|
pr_debug("Failed to initialise rid %u\n", rid);
|
|
goto internal_error;
|
|
}
|
|
}
|
|
|
|
/* Find some classes to use for monitors */
|
|
mpam_resctrl_pick_counters();
|
|
|
|
for_each_mpam_resctrl_mon(mon, eventid) {
|
|
if (!mon->class)
|
|
continue; // dummy resource
|
|
|
|
err = mpam_resctrl_monitor_init(mon, eventid);
|
|
if (err) {
|
|
pr_debug("Failed to initialise event %u\n", eventid);
|
|
goto internal_error;
|
|
}
|
|
}
|
|
|
|
cpus_read_unlock();
|
|
|
|
if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
|
|
pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
|
|
resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
/* TODO: call resctrl_init() */
|
|
|
|
return 0;
|
|
|
|
internal_error:
|
|
cpus_read_unlock();
|
|
pr_debug("Internal error %d - resctrl not supported\n", err);
|
|
return err;
|
|
}
|
|
|
|
static int __init __cacheinfo_ready(void)
|
|
{
|
|
cacheinfo_ready = true;
|
|
wake_up(&wait_cacheinfo_ready);
|
|
|
|
return 0;
|
|
}
|
|
device_initcall_sync(__cacheinfo_ready);
|
|
|
|
#ifdef CONFIG_MPAM_KUNIT_TEST
|
|
#include "test_mpam_resctrl.c"
|
|
#endif
|