linux/mm/memcontrol-v1.h
Qi Zheng 8285917d6f mm: memcontrol: prepare for reparenting non-hierarchical stats
To resolve the dying memcg issue, we need to reparent LRU folios of child
memcg to its parent memcg.  This could cause problems for non-hierarchical
stats.

As Yosry Ahmed pointed out:

In short, if memory is charged to a dying cgroup at the time of
reparenting, when the memory gets uncharged the stats updates will occur
at the parent. This will update both hierarchical and non-hierarchical
stats of the parent, which would corrupt the parent's non-hierarchical
stats (because those counters were never incremented when the memory was
charged).

Now we have the following two types of non-hierarchical stats, and they
are only used in CONFIG_MEMCG_V1:

a. memcg->vmstats->state_local[i]
b. pn->lruvec_stats->state_local[i]

To ensure that these non-hierarchical stats work properly, we need to
reparent these non-hierarchical stats after reparenting LRU folios. To
this end, this commit makes the following preparations:

1. implement reparent_state_local() to reparent non-hierarchical stats
2. make css_killed_work_fn() to be called in rcu work, and implement
   get_non_dying_memcg_start() and get_non_dying_memcg_end() to avoid race
   between mod_memcg_state()/mod_memcg_lruvec_state()
   and reparent_state_local()

Link: https://lore.kernel.org/e862995c45a7101a541284b6ebee5e5c32c89066.1772711148.git.zhengqi.arch@bytedance.com
Co-developed-by: Yosry Ahmed <yosry@kernel.org>
Signed-off-by: Yosry Ahmed <yosry@kernel.org>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Allen Pais <apais@linux.microsoft.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Imran Khan <imran.f.khan@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-04-18 00:10:47 -07:00

132 lines
4.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __MM_MEMCONTROL_V1_H
#define __MM_MEMCONTROL_V1_H
#include <linux/cgroup-defs.h>
/* Cgroup v1 and v2 common declarations */
/*
* Iteration constructs for visiting all cgroups (under a tree). If
* loops are exited prematurely (break), mem_cgroup_iter_break() must
* be used for reference counting.
*/
#define for_each_mem_cgroup_tree(iter, root) \
for (iter = mem_cgroup_iter(root, NULL, NULL); \
iter != NULL; \
iter = mem_cgroup_iter(root, iter, NULL))
#define for_each_mem_cgroup(iter) \
for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
iter != NULL; \
iter = mem_cgroup_iter(NULL, iter, NULL))
void drain_all_stock(struct mem_cgroup *root_memcg);
unsigned long memcg_events(struct mem_cgroup *memcg, int event);
int memory_stat_show(struct seq_file *m, void *v);
struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg,
unsigned int n);
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
/* Whether legacy memory+swap accounting is active */
static inline bool do_memsw_account(void)
{
return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
}
unsigned long memcg_events_local(struct mem_cgroup *memcg, int event);
unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx);
unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item);
bool memcg1_alloc_events(struct mem_cgroup *memcg);
void memcg1_free_events(struct mem_cgroup *memcg);
void memcg1_memcg_init(struct mem_cgroup *memcg);
void memcg1_remove_from_trees(struct mem_cgroup *memcg);
static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg)
{
WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
}
struct cgroup_taskset;
void memcg1_css_offline(struct mem_cgroup *memcg);
/* for encoding cft->private value on file */
enum res_type {
_MEM,
_MEMSWAP,
_KMEM,
_TCP,
};
bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked);
void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
void memcg1_oom_recover(struct mem_cgroup *memcg);
void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
unsigned long nr_memory, int nid);
void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
void reparent_memcg1_state_local(struct mem_cgroup *memcg, struct mem_cgroup *parent);
void reparent_memcg1_lruvec_state_local(struct mem_cgroup *memcg, struct mem_cgroup *parent);
void reparent_memcg_state_local(struct mem_cgroup *memcg,
struct mem_cgroup *parent, int idx);
void reparent_memcg_lruvec_state_local(struct mem_cgroup *memcg,
struct mem_cgroup *parent, int idx);
void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages);
static inline bool memcg1_tcpmem_active(struct mem_cgroup *memcg)
{
return memcg->tcpmem_active;
}
bool memcg1_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
gfp_t gfp_mask);
static inline void memcg1_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
{
page_counter_uncharge(&memcg->tcpmem, nr_pages);
}
extern struct cftype memsw_files[];
extern struct cftype mem_cgroup_legacy_files[];
#else /* CONFIG_MEMCG_V1 */
static inline bool do_memsw_account(void) { return false; }
static inline bool memcg1_alloc_events(struct mem_cgroup *memcg) { return true; }
static inline void memcg1_free_events(struct mem_cgroup *memcg) {}
static inline void memcg1_memcg_init(struct mem_cgroup *memcg) {}
static inline void memcg1_remove_from_trees(struct mem_cgroup *memcg) {}
static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg) {}
static inline void memcg1_css_offline(struct mem_cgroup *memcg) {}
static inline bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked) { return true; }
static inline void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) {}
static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
static inline void memcg1_commit_charge(struct folio *folio,
struct mem_cgroup *memcg) {}
static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg,
unsigned long pgpgout,
unsigned long nr_memory, int nid) {}
static inline void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) {}
static inline void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages) {}
static inline bool memcg1_tcpmem_active(struct mem_cgroup *memcg) { return false; }
static inline bool memcg1_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
gfp_t gfp_mask) { return true; }
static inline void memcg1_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) {}
#endif /* CONFIG_MEMCG_V1 */
#endif /* __MM_MEMCONTROL_V1_H */