- Extend resctrl with the capability of total memory bandwidth monitoring,

thus accomodating systems which support only total but not local memory
   bandwidth monitoring. Add the respective new mount options
 
 - The usual cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmeOU2QACgkQEsHwGGHe
 VUoqTw/+LOt/36tCMbqUEIhUIvPciqdhAk+gBzP9XzGEWRJDcYLmgmBvlrFcYgIL
 lZVZ/tBsYqljycMFaJ4K4vishpkJr9s8GtDCOItkMA4cYOJcXSaGSpNojctC2Qs6
 42J+qABENRZYWFmWWcCkf8jTG0QWebmVRJwV9Q/4uYRicLFW/B+Em0sOTFn9n7OX
 39ZyCXKlmu2wsTW/DrQ8wX0KkW5hevLkJk/NFN3CDuWg7LOs09IotALs/U7ayj+C
 BIU5ZEAvan8LAPLX4Nrhb5HArsP6eCmB0Kbr+Mm+X9lRBLSbMmThXy58WlRDT0v6
 LEx+IbKUnEVoYJnC2QiqxPB4FghhZs9RE6cDnCQivwkigFhIau9krPZUc2klnVtv
 AmjUx8BporU3rcvrtKycInQQCVdzi9Q8ai7WRrpCqNEItHSOtNk9BipnFGqcnFDr
 va0SCZ3N1vIiVnZkTZ0CObA2F8RRLMiBCAB9XNfD0TzY+K0p2KCNqS6UnxmrOtR+
 8Fbk+C624u7LhmjbrPr7Hj30wIu2b/CqncIDHrh8O+jK2awvTEl6T0Tryd+jXxR1
 ERy0OSUhHEcnW73ckdfdn3/6fsL/9bnBZrTufRWxPzaM+3E9YR9KGmyc7hf5fkSX
 WE/ZRMse4QwELmSajdv9NK7oO48VDOije+cgUzPgme0i4Lsg4c0=
 =i4Yw
 -----END PGP SIGNATURE-----

Merge tag 'x86_cache_for_v6.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 resource control updates from Borislav Petkov:

 - Extend resctrl with the capability of total memory bandwidth
   monitoring, thus accomodating systems which support only total but
   not local memory bandwidth monitoring. Add the respective new mount
   options

 - The usual cleanups

* tag 'x86_cache_for_v6.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Document the new "mba_MBps_event" file
  x86/resctrl: Add write option to "mba_MBps_event" file
  x86/resctrl: Add "mba_MBps_event" file to CTRL_MON directories
  x86/resctrl: Make mba_sc use total bandwidth if local is not supported
  x86/resctrl: Compute memory bandwidth for all supported events
  x86/resctrl: Modify update_mba_bw() to use per CTRL_MON group event
  x86/resctrl: Prepare for per-CTRL_MON group mba_MBps control
  x86/resctrl: Introduce resctrl_file_fflags_init() to initialize fflags
  x86/resctrl: Use kthread_run_on_cpu()
This commit is contained in:
Linus Torvalds 2025-01-21 08:31:04 -08:00
commit 3357d1d1f9
7 changed files with 178 additions and 87 deletions

View File

@ -384,6 +384,16 @@ When monitoring is enabled all MON groups will also contain:
Available only with debug option. The identifier used by hardware
for the monitor group. On x86 this is the RMID.
When the "mba_MBps" mount option is used all CTRL_MON groups will also contain:
"mba_MBps_event":
Reading this file shows which memory bandwidth event is used
as input to the software feedback loop that keeps memory bandwidth
below the value specified in the schemata file. Writing the
name of one of the supported memory bandwidth events found in
/sys/fs/resctrl/info/L3_MON/mon_features changes the input
event.
Resource allocation rules
-------------------------

View File

@ -234,7 +234,9 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
else
r->membw.throttle_mode = THREAD_THROTTLE_MAX;
thread_throttle_mode_init();
resctrl_file_fflags_init("thread_throttle_mode",
RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
r->alloc_capable = true;
@ -961,6 +963,11 @@ static __init bool get_rdt_mon_resources(void)
if (!rdt_mon_features)
return false;
if (is_mbm_local_enabled())
mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
else if (is_mbm_total_enabled())
mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
return !rdt_get_mon_l3_config(r);
}

View File

@ -518,6 +518,76 @@ static int smp_mon_event_count(void *arg)
return 0;
}
ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct rdtgroup *rdtgrp;
int ret = 0;
/* Valid input requires a trailing newline */
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
buf[nbytes - 1] = '\0';
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
return -ENOENT;
}
rdt_last_cmd_clear();
if (!strcmp(buf, "mbm_local_bytes")) {
if (is_mbm_local_enabled())
rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
else
ret = -EINVAL;
} else if (!strcmp(buf, "mbm_total_bytes")) {
if (is_mbm_total_enabled())
rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
else
ret = -EINVAL;
} else {
ret = -EINVAL;
}
if (ret)
rdt_last_cmd_printf("Unsupported event id '%s'\n", buf);
rdtgroup_kn_unlock(of->kn);
return ret ?: nbytes;
}
int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
switch (rdtgrp->mba_mbps_event) {
case QOS_L3_MBM_LOCAL_EVENT_ID:
seq_puts(s, "mbm_local_bytes\n");
break;
case QOS_L3_MBM_TOTAL_EVENT_ID:
seq_puts(s, "mbm_total_bytes\n");
break;
default:
pr_warn_once("Bad event %d\n", rdtgrp->mba_mbps_event);
ret = -EINVAL;
break;
}
} else {
ret = -ENOENT;
}
rdtgroup_kn_unlock(of->kn);
return ret;
}
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
cpumask_t *cpumask, int evtid, int first)

View File

@ -283,6 +283,7 @@ struct pseudo_lock_region {
* monitor only or ctrl_mon group
* @mon: mongroup related data
* @mode: mode of resource group
* @mba_mbps_event: input monitoring event id when mba_sc is enabled
* @plr: pseudo-locked region
*/
struct rdtgroup {
@ -295,6 +296,7 @@ struct rdtgroup {
enum rdt_group_type type;
struct mongroup mon;
enum rdtgrp_mode mode;
enum resctrl_event_id mba_mbps_event;
struct pseudo_lock_region *plr;
};
@ -508,6 +510,7 @@ extern struct mutex rdtgroup_mutex;
extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
extern struct dentry *debugfs_resctrl;
extern enum resctrl_event_id mba_mbps_default_event;
enum resctrl_res_level {
RDT_RESOURCE_L3,
@ -607,6 +610,10 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
@ -647,10 +654,8 @@ void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_mon_domain *d);
void __check_limbo(struct rdt_mon_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
void resctrl_file_fflags_init(const char *config, unsigned long fflags);
void rdt_staged_configs_clear(void);
bool closid_allocated(unsigned int closid);
int resctrl_find_cleanest_closid(void);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */

View File

@ -663,9 +663,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;
struct mbm_state *m;
m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
if (WARN_ON_ONCE(!m))
return;
cur_bytes = rr->val;
bytes = cur_bytes - m->prev_bw_bytes;
@ -752,20 +755,20 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
struct rdt_ctrl_domain *dom_mba;
enum resctrl_event_id evt_id;
struct rdt_resource *r_mba;
u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
if (!is_mbm_local_enabled())
return;
u32 cur_bw, user_bw;
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
evt_id = rgrp->mba_mbps_event;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
idx = resctrl_arch_rmid_idx_encode(closid, rmid);
pmbm_data = &dom_mbm->mbm_local[idx];
pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
if (WARN_ON_ONCE(!pmbm_data))
return;
dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@ -784,7 +787,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
if (WARN_ON_ONCE(!cmbm_data))
return;
cur_bw += cmbm_data->prev_bw;
}
@ -813,54 +818,45 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}
static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid)
static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id evtid)
{
struct rmid_read rr = {0};
rr.r = r;
rr.d = d;
rr.evtid = evtid;
rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
if (IS_ERR(rr.arch_mon_ctx)) {
pr_warn_ratelimited("Failed to allocate monitor context: %ld",
PTR_ERR(rr.arch_mon_ctx));
return;
}
__mon_event_count(closid, rmid, &rr);
/*
* This is protected from concurrent reads from user
* as both the user and we hold the global mutex.
* If the software controller is enabled, compute the
* bandwidth for this event id.
*/
if (is_mbm_total_enabled()) {
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
if (IS_ERR(rr.arch_mon_ctx)) {
pr_warn_ratelimited("Failed to allocate monitor context: %ld",
PTR_ERR(rr.arch_mon_ctx));
return;
}
if (is_mba_sc(NULL))
mbm_bw_count(closid, rmid, &rr);
__mon_event_count(closid, rmid, &rr);
resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
if (IS_ERR(rr.arch_mon_ctx)) {
pr_warn_ratelimited("Failed to allocate monitor context: %ld",
PTR_ERR(rr.arch_mon_ctx));
return;
}
static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid)
{
/*
* This is protected from concurrent reads from user as both
* the user and overflow handler hold the global mutex.
*/
if (is_mbm_total_enabled())
mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
__mon_event_count(closid, rmid, &rr);
/*
* Call the MBA software controller only for the
* control groups and when user has enabled
* the software controller explicitly.
*/
if (is_mba_sc(NULL))
mbm_bw_count(closid, rmid, &rr);
resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
if (is_mbm_local_enabled())
mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
}
/*
@ -1224,11 +1220,13 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
mbm_total_event.configurable = true;
mbm_config_rftype_init("mbm_total_bytes_config");
resctrl_file_fflags_init("mbm_total_bytes_config",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
}
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
mbm_local_event.configurable = true;
mbm_config_rftype_init("mbm_local_bytes_config");
resctrl_file_fflags_init("mbm_local_bytes_config",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
}
}

View File

@ -1205,20 +1205,14 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
plr->cpu = cpu;
if (sel == 1)
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
thread = kthread_run_on_cpu(measure_cycles_lat_fn, plr,
cpu, "pseudo_lock_measure/%u");
else if (sel == 2)
thread = kthread_create_on_node(measure_l2_residency, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
thread = kthread_run_on_cpu(measure_l2_residency, plr,
cpu, "pseudo_lock_measure/%u");
else if (sel == 3)
thread = kthread_create_on_node(measure_l3_residency, plr,
cpu_to_node(cpu),
"pseudo_lock_measure/%u",
cpu);
thread = kthread_run_on_cpu(measure_l3_residency, plr,
cpu, "pseudo_lock_measure/%u");
else
goto out;
@ -1226,8 +1220,6 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
ret = PTR_ERR(thread);
goto out;
}
kthread_bind(thread, cpu);
wake_up_process(thread);
ret = wait_event_interruptible(plr->lock_thread_wq,
plr->thread_done == 1);
@ -1315,18 +1307,14 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
plr->thread_done = 0;
thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
cpu_to_node(plr->cpu),
"pseudo_lock/%u", plr->cpu);
thread = kthread_run_on_cpu(pseudo_lock_fn, rdtgrp,
plr->cpu, "pseudo_lock/%u");
if (IS_ERR(thread)) {
ret = PTR_ERR(thread);
rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
goto out_cstates;
}
kthread_bind(thread, plr->cpu);
wake_up_process(thread);
ret = wait_event_interruptible(plr->lock_thread_wq,
plr->thread_done == 1);
if (ret < 0) {

View File

@ -65,6 +65,15 @@ static void rdtgroup_destroy_root(void);
struct dentry *debugfs_resctrl;
/*
* Memory bandwidth monitoring event to use for the default CTRL_MON group
* and each new CTRL_MON group created by the user. Only relevant when
* the filesystem is mounted with the "mba_MBps" option so it does not
* matter that it remains uninitialized on systems that do not support
* the "mba_MBps" option.
*/
enum resctrl_event_id mba_mbps_default_event;
static bool resctrl_debug;
void rdt_last_cmd_clear(void)
@ -1941,6 +1950,13 @@ static struct rftype res_common_files[] = {
.seq_show = rdtgroup_schemata_show,
.fflags = RFTYPE_CTRL_BASE,
},
{
.name = "mba_MBps_event",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
.write = rdtgroup_mba_mbps_event_write,
.seq_show = rdtgroup_mba_mbps_event_show,
},
{
.name = "mode",
.mode = 0644,
@ -2020,24 +2036,13 @@ static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
return NULL;
}
void __init thread_throttle_mode_init(void)
{
struct rftype *rft;
rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
if (!rft)
return;
rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
}
void __init mbm_config_rftype_init(const char *config)
void resctrl_file_fflags_init(const char *config, unsigned long fflags)
{
struct rftype *rft;
rft = rdtgroup_get_rftype_by_name(config);
if (rft)
rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
rft->fflags = fflags;
}
/**
@ -2343,7 +2348,7 @@ static bool supports_mba_mbps(void)
struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
return (is_mbm_local_enabled() &&
return (is_mbm_enabled() &&
r->alloc_capable && is_mba_linear() &&
r->ctrl_scope == rmbm->mon_scope);
}
@ -2357,6 +2362,7 @@ static int set_mba_sc(bool mba_sc)
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
struct rdt_ctrl_domain *d;
unsigned long fflags;
int i;
if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@ -2364,11 +2370,16 @@ static int set_mba_sc(bool mba_sc)
r->membw.mba_sc = mba_sc;
rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
resctrl_file_fflags_init("mba_MBps_event", fflags);
return 0;
}
@ -2768,7 +2779,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope";
msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
@ -3622,6 +3633,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
}
if (is_mba_sc(NULL))
rdtgrp->mba_mbps_event = mba_mbps_default_event;
}
goto out_unlock;