mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 08:08:03 +02:00
To resolve the dying memcg issue, we need to reparent LRU folios of child memcg to its parent memcg. This could cause problems for non-hierarchical stats. As Yosry Ahmed pointed out: In short, if memory is charged to a dying cgroup at the time of reparenting, when the memory gets uncharged the stats updates will occur at the parent. This will update both hierarchical and non-hierarchical stats of the parent, which would corrupt the parent's non-hierarchical stats (because those counters were never incremented when the memory was charged). Now we have the following two types of non-hierarchical stats, and they are only used in CONFIG_MEMCG_V1: a. memcg->vmstats->state_local[i] b. pn->lruvec_stats->state_local[i] To ensure that these non-hierarchical stats work properly, we need to reparent these non-hierarchical stats after reparenting LRU folios. To this end, this commit makes the following preparations: 1. implement reparent_state_local() to reparent non-hierarchical stats 2. make css_killed_work_fn() to be called in rcu work, and implement get_non_dying_memcg_start() and get_non_dying_memcg_end() to avoid race between mod_memcg_state()/mod_memcg_lruvec_state() and reparent_state_local() Link: https://lore.kernel.org/e862995c45a7101a541284b6ebee5e5c32c89066.1772711148.git.zhengqi.arch@bytedance.com Co-developed-by: Yosry Ahmed <yosry@kernel.org> Signed-off-by: Yosry Ahmed <yosry@kernel.org> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Acked-by: Shakeel Butt <shakeel.butt@linux.dev> Cc: Allen Pais <apais@linux.microsoft.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Baoquan He <bhe@redhat.com> Cc: Chengming Zhou <chengming.zhou@linux.dev> Cc: Chen Ridong <chenridong@huawei.com> Cc: David Hildenbrand <david@kernel.org> Cc: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Cc: Harry Yoo <harry.yoo@oracle.com> Cc: Hugh Dickins <hughd@google.com> Cc: Imran Khan <imran.f.khan@oracle.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com> Cc: Lance Yang <lance.yang@linux.dev> Cc: Liam Howlett <Liam.Howlett@oracle.com> Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Michal Koutný <mkoutny@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Usama Arif <usamaarif642@gmail.com> Cc: Vlastimil Babka <vbabka@kernel.org> Cc: Wei Xu <weixugc@google.com> Cc: Yuanchu Xie <yuanchu@google.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
132 lines
4.7 KiB
C
132 lines
4.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#ifndef __MM_MEMCONTROL_V1_H
|
|
#define __MM_MEMCONTROL_V1_H
|
|
|
|
#include <linux/cgroup-defs.h>
|
|
|
|
/* Cgroup v1 and v2 common declarations */
|
|
|
|
/*
|
|
* Iteration constructs for visiting all cgroups (under a tree). If
|
|
* loops are exited prematurely (break), mem_cgroup_iter_break() must
|
|
* be used for reference counting.
|
|
*/
|
|
#define for_each_mem_cgroup_tree(iter, root) \
|
|
for (iter = mem_cgroup_iter(root, NULL, NULL); \
|
|
iter != NULL; \
|
|
iter = mem_cgroup_iter(root, iter, NULL))
|
|
|
|
#define for_each_mem_cgroup(iter) \
|
|
for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
|
|
iter != NULL; \
|
|
iter = mem_cgroup_iter(NULL, iter, NULL))
|
|
|
|
void drain_all_stock(struct mem_cgroup *root_memcg);
|
|
|
|
unsigned long memcg_events(struct mem_cgroup *memcg, int event);
|
|
int memory_stat_show(struct seq_file *m, void *v);
|
|
|
|
struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg,
|
|
unsigned int n);
|
|
|
|
/* Cgroup v1-specific declarations */
|
|
#ifdef CONFIG_MEMCG_V1
|
|
|
|
/* Whether legacy memory+swap accounting is active */
|
|
static inline bool do_memsw_account(void)
|
|
{
|
|
return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
|
|
}
|
|
|
|
unsigned long memcg_events_local(struct mem_cgroup *memcg, int event);
|
|
unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx);
|
|
unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item);
|
|
bool memcg1_alloc_events(struct mem_cgroup *memcg);
|
|
void memcg1_free_events(struct mem_cgroup *memcg);
|
|
|
|
void memcg1_memcg_init(struct mem_cgroup *memcg);
|
|
void memcg1_remove_from_trees(struct mem_cgroup *memcg);
|
|
|
|
static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg)
|
|
{
|
|
WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
|
|
}
|
|
|
|
struct cgroup_taskset;
|
|
void memcg1_css_offline(struct mem_cgroup *memcg);
|
|
|
|
/* for encoding cft->private value on file */
|
|
enum res_type {
|
|
_MEM,
|
|
_MEMSWAP,
|
|
_KMEM,
|
|
_TCP,
|
|
};
|
|
|
|
bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked);
|
|
void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
|
|
void memcg1_oom_recover(struct mem_cgroup *memcg);
|
|
|
|
void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
|
|
void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
|
|
unsigned long nr_memory, int nid);
|
|
|
|
void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
|
|
void reparent_memcg1_state_local(struct mem_cgroup *memcg, struct mem_cgroup *parent);
|
|
void reparent_memcg1_lruvec_state_local(struct mem_cgroup *memcg, struct mem_cgroup *parent);
|
|
|
|
void reparent_memcg_state_local(struct mem_cgroup *memcg,
|
|
struct mem_cgroup *parent, int idx);
|
|
void reparent_memcg_lruvec_state_local(struct mem_cgroup *memcg,
|
|
struct mem_cgroup *parent, int idx);
|
|
|
|
void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages);
|
|
static inline bool memcg1_tcpmem_active(struct mem_cgroup *memcg)
|
|
{
|
|
return memcg->tcpmem_active;
|
|
}
|
|
bool memcg1_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
|
|
gfp_t gfp_mask);
|
|
static inline void memcg1_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
|
|
{
|
|
page_counter_uncharge(&memcg->tcpmem, nr_pages);
|
|
}
|
|
|
|
extern struct cftype memsw_files[];
|
|
extern struct cftype mem_cgroup_legacy_files[];
|
|
|
|
#else /* CONFIG_MEMCG_V1 */
|
|
|
|
static inline bool do_memsw_account(void) { return false; }
|
|
static inline bool memcg1_alloc_events(struct mem_cgroup *memcg) { return true; }
|
|
static inline void memcg1_free_events(struct mem_cgroup *memcg) {}
|
|
|
|
static inline void memcg1_memcg_init(struct mem_cgroup *memcg) {}
|
|
static inline void memcg1_remove_from_trees(struct mem_cgroup *memcg) {}
|
|
static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg) {}
|
|
static inline void memcg1_css_offline(struct mem_cgroup *memcg) {}
|
|
|
|
static inline bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked) { return true; }
|
|
static inline void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) {}
|
|
static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
|
|
|
|
static inline void memcg1_commit_charge(struct folio *folio,
|
|
struct mem_cgroup *memcg) {}
|
|
|
|
static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg,
|
|
unsigned long pgpgout,
|
|
unsigned long nr_memory, int nid) {}
|
|
|
|
static inline void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) {}
|
|
|
|
static inline void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages) {}
|
|
static inline bool memcg1_tcpmem_active(struct mem_cgroup *memcg) { return false; }
|
|
static inline bool memcg1_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
|
|
gfp_t gfp_mask) { return true; }
|
|
static inline void memcg1_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) {}
|
|
|
|
#endif /* CONFIG_MEMCG_V1 */
|
|
|
|
#endif /* __MM_MEMCONTROL_V1_H */
|