mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 22:52:19 +02:00
mm: vmscan: make memcg slab shrink lockless
Like global slab shrink, this commit also uses SRCU to make memcg slab
shrink lockless.
We can reproduce the down_read_trylock() hotspot through the
following script:
```
DIR="/root/shrinker/memcg/mnt"
do_create()
{
mkdir -p /sys/fs/cgroup/memory/test
mkdir -p /sys/fs/cgroup/perf_event/test
echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
for i in `seq 0 $1`;
do
mkdir -p /sys/fs/cgroup/memory/test/$i;
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
echo $$ > /sys/fs/cgroup/perf_event/test/cgroup.procs;
mkdir -p $DIR/$i;
done
}
do_mount()
{
for i in `seq $1 $2`;
do
mount -t tmpfs $i $DIR/$i;
done
}
do_touch()
{
for i in `seq $1 $2`;
do
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
echo $$ > /sys/fs/cgroup/perf_event/test/cgroup.procs;
dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 &
done
}
case "$1" in
touch)
do_touch $2 $3
;;
test)
do_create 4000
do_mount 0 4000
do_touch 0 3000
;;
*)
exit 1
;;
esac
```
Save the above script, then run test and touch commands.
Then we can use the following perf command to view hotspots:
perf top -U -F 999
1) Before applying this patchset:
32.31% [kernel] [k] down_read_trylock
19.40% [kernel] [k] pv_native_safe_halt
16.24% [kernel] [k] up_read
15.70% [kernel] [k] shrink_slab
4.69% [kernel] [k] _find_next_bit
2.62% [kernel] [k] shrink_node
1.78% [kernel] [k] shrink_lruvec
0.76% [kernel] [k] do_shrink_slab
2) After applying this patchset:
27.83% [kernel] [k] _find_next_bit
16.97% [kernel] [k] shrink_slab
15.82% [kernel] [k] pv_native_safe_halt
9.58% [kernel] [k] shrink_node
8.31% [kernel] [k] shrink_lruvec
5.64% [kernel] [k] do_shrink_slab
3.88% [kernel] [k] mem_cgroup_iter
At the same time, we use the following perf command to capture
IPC information:
perf stat -e cycles,instructions -G test -a --repeat 5 -- sleep 10
1) Before applying this patchset:
Performance counter stats for 'system wide' (5 runs):
454187219766 cycles test ( +- 1.84% )
78896433101 instructions test # 0.17 insn per cycle ( +- 0.44% )
10.0020430 +- 0.0000366 seconds time elapsed ( +- 0.00% )
2) After applying this patchset:
Performance counter stats for 'system wide' (5 runs):
841954709443 cycles test ( +- 15.80% ) (98.69%)
527258677936 instructions test # 0.63 insn per cycle ( +- 15.11% ) (98.68%)
10.01064 +- 0.00831 seconds time elapsed ( +- 0.08% )
We can see that IPC drops very seriously when calling
down_read_trylock() at high frequency. After using SRCU,
the IPC is at a normal level.
Link: https://lkml.kernel.org/r/20230313112819.38938-4-zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Kirill Tkhai <tkhai@ya.ru>
Acked-by: Vlastimil Babka <Vbabka@suse.cz>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Christian König <christian.koenig@amd.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
f95bdb700b
commit
caa05325c9
45
mm/vmscan.c
45
mm/vmscan.c
|
|
@ -222,8 +222,21 @@ static inline int shrinker_defer_size(int nr_items)
|
|||
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
|
||||
int nid)
|
||||
{
|
||||
return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
|
||||
lockdep_is_held(&shrinker_rwsem));
|
||||
return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
|
||||
&shrinker_srcu,
|
||||
lockdep_is_held(&shrinker_rwsem));
|
||||
}
|
||||
|
||||
static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
|
||||
int nid)
|
||||
{
|
||||
return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
|
||||
&shrinker_srcu);
|
||||
}
|
||||
|
||||
static void free_shrinker_info_rcu(struct rcu_head *head)
|
||||
{
|
||||
kvfree(container_of(head, struct shrinker_info, rcu));
|
||||
}
|
||||
|
||||
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
|
||||
|
|
@ -264,7 +277,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
|
|||
defer_size - old_defer_size);
|
||||
|
||||
rcu_assign_pointer(pn->shrinker_info, new);
|
||||
kvfree_rcu(old, rcu);
|
||||
call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -350,15 +363,16 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
|
|||
{
|
||||
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
|
||||
struct shrinker_info *info;
|
||||
int srcu_idx;
|
||||
|
||||
rcu_read_lock();
|
||||
info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
|
||||
srcu_idx = srcu_read_lock(&shrinker_srcu);
|
||||
info = shrinker_info_srcu(memcg, nid);
|
||||
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
|
||||
/* Pairs with smp mb in shrink_slab() */
|
||||
smp_mb__before_atomic();
|
||||
set_bit(shrinker_id, info->map);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&shrinker_srcu, srcu_idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -372,7 +386,6 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
|
|||
return -ENOSYS;
|
||||
|
||||
down_write(&shrinker_rwsem);
|
||||
/* This may call shrinker, so it must use down_read_trylock() */
|
||||
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
|
||||
if (id < 0)
|
||||
goto unlock;
|
||||
|
|
@ -406,7 +419,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
|
|||
{
|
||||
struct shrinker_info *info;
|
||||
|
||||
info = shrinker_info_protected(memcg, nid);
|
||||
info = shrinker_info_srcu(memcg, nid);
|
||||
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
|
||||
}
|
||||
|
||||
|
|
@ -415,7 +428,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
|
|||
{
|
||||
struct shrinker_info *info;
|
||||
|
||||
info = shrinker_info_protected(memcg, nid);
|
||||
info = shrinker_info_srcu(memcg, nid);
|
||||
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
|
||||
}
|
||||
|
||||
|
|
@ -893,15 +906,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
|
|||
{
|
||||
struct shrinker_info *info;
|
||||
unsigned long ret, freed = 0;
|
||||
int srcu_idx;
|
||||
int i;
|
||||
|
||||
if (!mem_cgroup_online(memcg))
|
||||
return 0;
|
||||
|
||||
if (!down_read_trylock(&shrinker_rwsem))
|
||||
return 0;
|
||||
|
||||
info = shrinker_info_protected(memcg, nid);
|
||||
srcu_idx = srcu_read_lock(&shrinker_srcu);
|
||||
info = shrinker_info_srcu(memcg, nid);
|
||||
if (unlikely(!info))
|
||||
goto unlock;
|
||||
|
||||
|
|
@ -951,14 +963,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
|
|||
set_shrinker_bit(memcg, nid, i);
|
||||
}
|
||||
freed += ret;
|
||||
|
||||
if (rwsem_is_contended(&shrinker_rwsem)) {
|
||||
freed = freed ? : 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unlock:
|
||||
up_read(&shrinker_rwsem);
|
||||
srcu_read_unlock(&shrinker_srcu, srcu_idx);
|
||||
return freed;
|
||||
}
|
||||
#else /* CONFIG_MEMCG */
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user