mirror of
https://github.com/torvalds/linux.git
synced 2026-05-26 08:02:27 +02:00
zsmalloc: sleepable zspage reader-lock
In order to implement preemptible object mapping we need a zspage lock that satisfies several preconditions: - it should be reader-write type of a lock - it should be possible to hold it from any context, but also being preemptible if the context allows it - we never sleep while acquiring but can sleep while holding in read mode An rwsemaphore doesn't suffice, due to atomicity requirements, rwlock doesn't satisfy due to reader-preemptability requirement. It's also worth to mention, that per-zspage rwsem is a little too memory heavy (we can easily have double digits megabytes used only on rwsemaphores). Switch over from rwlock_t to a atomic_t-based implementation of a reader-writer semaphore that satisfies all of the preconditions. The spin-lock based zspage_lock is suggested by Hillf Danton. Link: https://lkml.kernel.org/r/20250303022425.285971-14-senozhatsky@chromium.org Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> Suggested-by: Hillf Danton <hdanton@sina.com> Cc: Kairui Song <ryncsn@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Yosry Ahmed <yosry.ahmed@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
0d6fa44e4e
commit
e27af3f936
166
mm/zsmalloc.c
166
mm/zsmalloc.c
|
|
@ -257,6 +257,15 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
|
|||
__free_page(page);
|
||||
}
|
||||
|
||||
#define ZS_PAGE_UNLOCKED 0
|
||||
#define ZS_PAGE_WRLOCKED -1
|
||||
|
||||
struct zspage_lock {
|
||||
spinlock_t lock;
|
||||
int cnt;
|
||||
struct lockdep_map dep_map;
|
||||
};
|
||||
|
||||
struct zspage {
|
||||
struct {
|
||||
unsigned int huge:HUGE_BITS;
|
||||
|
|
@ -269,7 +278,7 @@ struct zspage {
|
|||
struct zpdesc *first_zpdesc;
|
||||
struct list_head list; /* fullness list */
|
||||
struct zs_pool *pool;
|
||||
rwlock_t lock;
|
||||
struct zspage_lock zsl;
|
||||
};
|
||||
|
||||
struct mapping_area {
|
||||
|
|
@ -279,6 +288,84 @@ struct mapping_area {
|
|||
enum zs_mapmode vm_mm; /* mapping mode */
|
||||
};
|
||||
|
||||
static void zspage_lock_init(struct zspage *zspage)
|
||||
{
|
||||
static struct lock_class_key __key;
|
||||
struct zspage_lock *zsl = &zspage->zsl;
|
||||
|
||||
lockdep_init_map(&zsl->dep_map, "zspage->lock", &__key, 0);
|
||||
spin_lock_init(&zsl->lock);
|
||||
zsl->cnt = ZS_PAGE_UNLOCKED;
|
||||
}
|
||||
|
||||
/*
|
||||
* The zspage lock can be held from atomic contexts, but it needs to remain
|
||||
* preemptible when held for reading because it remains held outside of those
|
||||
* atomic contexts, otherwise we unnecessarily lose preemptibility.
|
||||
*
|
||||
* To achieve this, the following rules are enforced on readers and writers:
|
||||
*
|
||||
* - Writers are blocked by both writers and readers, while readers are only
|
||||
* blocked by writers (i.e. normal rwlock semantics).
|
||||
*
|
||||
* - Writers are always atomic (to allow readers to spin waiting for them).
|
||||
*
|
||||
* - Writers always use trylock (as the lock may be held be sleeping readers).
|
||||
*
|
||||
* - Readers may spin on the lock (as they can only wait for atomic writers).
|
||||
*
|
||||
* - Readers may sleep while holding the lock (as writes only use trylock).
|
||||
*/
|
||||
static void zspage_read_lock(struct zspage *zspage)
|
||||
{
|
||||
struct zspage_lock *zsl = &zspage->zsl;
|
||||
|
||||
rwsem_acquire_read(&zsl->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
spin_lock(&zsl->lock);
|
||||
zsl->cnt++;
|
||||
spin_unlock(&zsl->lock);
|
||||
|
||||
lock_acquired(&zsl->dep_map, _RET_IP_);
|
||||
}
|
||||
|
||||
static void zspage_read_unlock(struct zspage *zspage)
|
||||
{
|
||||
struct zspage_lock *zsl = &zspage->zsl;
|
||||
|
||||
rwsem_release(&zsl->dep_map, _RET_IP_);
|
||||
|
||||
spin_lock(&zsl->lock);
|
||||
zsl->cnt--;
|
||||
spin_unlock(&zsl->lock);
|
||||
}
|
||||
|
||||
static __must_check bool zspage_write_trylock(struct zspage *zspage)
|
||||
{
|
||||
struct zspage_lock *zsl = &zspage->zsl;
|
||||
|
||||
spin_lock(&zsl->lock);
|
||||
if (zsl->cnt == ZS_PAGE_UNLOCKED) {
|
||||
zsl->cnt = ZS_PAGE_WRLOCKED;
|
||||
rwsem_acquire(&zsl->dep_map, 0, 1, _RET_IP_);
|
||||
lock_acquired(&zsl->dep_map, _RET_IP_);
|
||||
return true;
|
||||
}
|
||||
|
||||
spin_unlock(&zsl->lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void zspage_write_unlock(struct zspage *zspage)
|
||||
{
|
||||
struct zspage_lock *zsl = &zspage->zsl;
|
||||
|
||||
rwsem_release(&zsl->dep_map, _RET_IP_);
|
||||
|
||||
zsl->cnt = ZS_PAGE_UNLOCKED;
|
||||
spin_unlock(&zsl->lock);
|
||||
}
|
||||
|
||||
/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
|
||||
static void SetZsHugePage(struct zspage *zspage)
|
||||
{
|
||||
|
|
@ -290,12 +377,6 @@ static bool ZsHugePage(struct zspage *zspage)
|
|||
return zspage->huge;
|
||||
}
|
||||
|
||||
static void migrate_lock_init(struct zspage *zspage);
|
||||
static void migrate_read_lock(struct zspage *zspage);
|
||||
static void migrate_read_unlock(struct zspage *zspage);
|
||||
static void migrate_write_lock(struct zspage *zspage);
|
||||
static void migrate_write_unlock(struct zspage *zspage);
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
static void kick_deferred_free(struct zs_pool *pool);
|
||||
static void init_deferred_free(struct zs_pool *pool);
|
||||
|
|
@ -992,7 +1073,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
|
|||
return NULL;
|
||||
|
||||
zspage->magic = ZSPAGE_MAGIC;
|
||||
migrate_lock_init(zspage);
|
||||
zspage->pool = pool;
|
||||
zspage->class = class->index;
|
||||
zspage_lock_init(zspage);
|
||||
|
||||
for (i = 0; i < class->pages_per_zspage; i++) {
|
||||
struct zpdesc *zpdesc;
|
||||
|
|
@ -1015,8 +1098,6 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
|
|||
|
||||
create_page_chain(class, zspage, zpdescs);
|
||||
init_zspage(class, zspage);
|
||||
zspage->pool = pool;
|
||||
zspage->class = class->index;
|
||||
|
||||
return zspage;
|
||||
}
|
||||
|
|
@ -1217,7 +1298,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
|
|||
* zs_unmap_object API so delegate the locking from class to zspage
|
||||
* which is smaller granularity.
|
||||
*/
|
||||
migrate_read_lock(zspage);
|
||||
zspage_read_lock(zspage);
|
||||
read_unlock(&pool->lock);
|
||||
|
||||
class = zspage_class(pool, zspage);
|
||||
|
|
@ -1277,7 +1358,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
|
|||
}
|
||||
local_unlock(&zs_map_area.lock);
|
||||
|
||||
migrate_read_unlock(zspage);
|
||||
zspage_read_unlock(zspage);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_unmap_object);
|
||||
|
||||
|
|
@ -1671,18 +1752,18 @@ static void lock_zspage(struct zspage *zspage)
|
|||
/*
|
||||
* Pages we haven't locked yet can be migrated off the list while we're
|
||||
* trying to lock them, so we need to be careful and only attempt to
|
||||
* lock each page under migrate_read_lock(). Otherwise, the page we lock
|
||||
* lock each page under zspage_read_lock(). Otherwise, the page we lock
|
||||
* may no longer belong to the zspage. This means that we may wait for
|
||||
* the wrong page to unlock, so we must take a reference to the page
|
||||
* prior to waiting for it to unlock outside migrate_read_lock().
|
||||
* prior to waiting for it to unlock outside zspage_read_lock().
|
||||
*/
|
||||
while (1) {
|
||||
migrate_read_lock(zspage);
|
||||
zspage_read_lock(zspage);
|
||||
zpdesc = get_first_zpdesc(zspage);
|
||||
if (zpdesc_trylock(zpdesc))
|
||||
break;
|
||||
zpdesc_get(zpdesc);
|
||||
migrate_read_unlock(zspage);
|
||||
zspage_read_unlock(zspage);
|
||||
zpdesc_wait_locked(zpdesc);
|
||||
zpdesc_put(zpdesc);
|
||||
}
|
||||
|
|
@ -1693,41 +1774,16 @@ static void lock_zspage(struct zspage *zspage)
|
|||
curr_zpdesc = zpdesc;
|
||||
} else {
|
||||
zpdesc_get(zpdesc);
|
||||
migrate_read_unlock(zspage);
|
||||
zspage_read_unlock(zspage);
|
||||
zpdesc_wait_locked(zpdesc);
|
||||
zpdesc_put(zpdesc);
|
||||
migrate_read_lock(zspage);
|
||||
zspage_read_lock(zspage);
|
||||
}
|
||||
}
|
||||
migrate_read_unlock(zspage);
|
||||
zspage_read_unlock(zspage);
|
||||
}
|
||||
#endif /* CONFIG_COMPACTION */
|
||||
|
||||
static void migrate_lock_init(struct zspage *zspage)
|
||||
{
|
||||
rwlock_init(&zspage->lock);
|
||||
}
|
||||
|
||||
static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
|
||||
{
|
||||
read_lock(&zspage->lock);
|
||||
}
|
||||
|
||||
static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
|
||||
{
|
||||
read_unlock(&zspage->lock);
|
||||
}
|
||||
|
||||
static void migrate_write_lock(struct zspage *zspage)
|
||||
{
|
||||
write_lock(&zspage->lock);
|
||||
}
|
||||
|
||||
static void migrate_write_unlock(struct zspage *zspage)
|
||||
{
|
||||
write_unlock(&zspage->lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
|
||||
static const struct movable_operations zsmalloc_mops;
|
||||
|
|
@ -1785,9 +1841,6 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
|
|||
|
||||
VM_BUG_ON_PAGE(!zpdesc_is_isolated(zpdesc), zpdesc_page(zpdesc));
|
||||
|
||||
/* We're committed, tell the world that this is a Zsmalloc page. */
|
||||
__zpdesc_set_zsmalloc(newzpdesc);
|
||||
|
||||
/* The page is locked, so this pointer must remain valid */
|
||||
zspage = get_zspage(zpdesc);
|
||||
pool = zspage->pool;
|
||||
|
|
@ -1803,8 +1856,15 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
|
|||
* the class lock protects zpage alloc/free in the zspage.
|
||||
*/
|
||||
spin_lock(&class->lock);
|
||||
/* the migrate_write_lock protects zpage access via zs_map_object */
|
||||
migrate_write_lock(zspage);
|
||||
/* the zspage write_lock protects zpage access via zs_map_object */
|
||||
if (!zspage_write_trylock(zspage)) {
|
||||
spin_unlock(&class->lock);
|
||||
write_unlock(&pool->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* We're committed, tell the world that this is a Zsmalloc page. */
|
||||
__zpdesc_set_zsmalloc(newzpdesc);
|
||||
|
||||
offset = get_first_obj_offset(zpdesc);
|
||||
s_addr = kmap_local_zpdesc(zpdesc);
|
||||
|
|
@ -1835,7 +1895,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
|
|||
*/
|
||||
write_unlock(&pool->lock);
|
||||
spin_unlock(&class->lock);
|
||||
migrate_write_unlock(zspage);
|
||||
zspage_write_unlock(zspage);
|
||||
|
||||
zpdesc_get(newzpdesc);
|
||||
if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
|
||||
|
|
@ -1971,9 +2031,11 @@ static unsigned long __zs_compact(struct zs_pool *pool,
|
|||
if (!src_zspage)
|
||||
break;
|
||||
|
||||
migrate_write_lock(src_zspage);
|
||||
if (!zspage_write_trylock(src_zspage))
|
||||
break;
|
||||
|
||||
migrate_zspage(pool, src_zspage, dst_zspage);
|
||||
migrate_write_unlock(src_zspage);
|
||||
zspage_write_unlock(src_zspage);
|
||||
|
||||
fg = putback_zspage(class, src_zspage);
|
||||
if (fg == ZS_INUSE_RATIO_0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user