cgroup: Increment nr_dying_subsys_* from rmdir context

Incrementing nr_dying_subsys_* in offline_css(), which is executed by
cgroup_offline_wq worker, leads to a race where user can see the value
to be 0 if he reads cgroup.stat after calling rmdir and before the worker
executes. This makes the user wrongly expect resources released by the
removed cgroup to be available for a new assignment.

Increment nr_dying_subsys_* from kill_css(), which is called from the
cgroup_rmdir() context.

Fixes: ab03125268 ("cgroup: Show # of subsystem CSSes in cgroup.stat")
Signed-off-by: Petr Malat <oss@malat.biz>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Petr Malat 2026-04-23 04:48:26 -05:00 committed by Tejun Heo
parent 41d701ddc3
commit 13e786b64b

View File

@ -5724,16 +5724,6 @@ static void offline_css(struct cgroup_subsys_state *css)
RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
wake_up_all(&css->cgroup->offline_waitq);
css->cgroup->nr_dying_subsys[ss->id]++;
/*
* Parent css and cgroup cannot be freed until after the freeing
* of child css, see css_free_rwork_fn().
*/
while ((css = css->parent)) {
css->nr_descendants--;
css->cgroup->nr_dying_subsys[ss->id]++;
}
}
/**
@ -6045,6 +6035,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
*/
static void kill_css(struct cgroup_subsys_state *css)
{
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
if (css->flags & CSS_DYING)
@ -6081,6 +6073,16 @@ static void kill_css(struct cgroup_subsys_state *css)
* css is confirmed to be seen as killed on all CPUs.
*/
percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
css->cgroup->nr_dying_subsys[ss->id]++;
/*
* Parent css and cgroup cannot be freed until after the freeing
* of child css, see css_free_rwork_fn().
*/
while ((css = css->parent)) {
css->nr_descendants--;
css->cgroup->nr_dying_subsys[ss->id]++;
}
}
/**