mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
Since we no longer bypass the swap cache, every swap-in will clear the swap shadow by inserting the folio into the swap table. The only place we may seem to need to free the swap shadow is when the swap slots are freed directly without a folio (swap_put_entries_direct). But with the swap table, that is not needed either. Freeing a slot in the swap table will set the table entry to NULL, which erases the shadow just fine. So just delete all explicit shadow clearing, it's no longer needed. Also, rearrange the freeing. Link: https://lkml.kernel.org/r/20260218-swap-table-p3-v3-12-f4e34be021a7@tencent.com Signed-off-by: Kairui Song <kasong@tencent.com> Acked-by: Chris Li <chrisl@kernel.org> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <baohua@kernel.org> Cc: David Hildenbrand <david@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kairui Song <ryncsn@gmail.com> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: kernel test robot <lkp@intel.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Nhat Pham <nphamcs@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
503 lines
14 KiB
C
503 lines
14 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _MM_SWAP_H
|
|
#define _MM_SWAP_H
|
|
|
|
#include <linux/atomic.h> /* for atomic_long_t */
|
|
struct mempolicy;
|
|
struct swap_iocb;
|
|
|
|
extern int page_cluster;
|
|
|
|
#ifdef CONFIG_THP_SWAP
|
|
#define SWAPFILE_CLUSTER HPAGE_PMD_NR
|
|
#define swap_entry_order(order) (order)
|
|
#else
|
|
#define SWAPFILE_CLUSTER 256
|
|
#define swap_entry_order(order) 0
|
|
#endif
|
|
|
|
extern struct swap_info_struct *swap_info[];
|
|
|
|
/*
|
|
* We use this to track usage of a cluster. A cluster is a block of swap disk
|
|
* space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
|
|
* free clusters are organized into a list. We fetch an entry from the list to
|
|
* get a free cluster.
|
|
*
|
|
* The flags field determines if a cluster is free. This is
|
|
* protected by cluster lock.
|
|
*/
|
|
struct swap_cluster_info {
|
|
spinlock_t lock; /*
|
|
* Protect swap_cluster_info fields
|
|
* other than list, and swap_info_struct->swap_map
|
|
* elements corresponding to the swap cluster.
|
|
*/
|
|
u16 count;
|
|
u8 flags;
|
|
u8 order;
|
|
atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
|
|
unsigned int *extend_table; /* For large swap count, protected by ci->lock */
|
|
struct list_head list;
|
|
};
|
|
|
|
/* All on-list cluster must have a non-zero flag. */
|
|
enum swap_cluster_flags {
|
|
CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
|
|
CLUSTER_FLAG_FREE,
|
|
CLUSTER_FLAG_NONFULL,
|
|
CLUSTER_FLAG_FRAG,
|
|
/* Clusters with flags above are allocatable */
|
|
CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
|
|
CLUSTER_FLAG_FULL,
|
|
CLUSTER_FLAG_DISCARD,
|
|
CLUSTER_FLAG_MAX,
|
|
};
|
|
|
|
#ifdef CONFIG_SWAP
|
|
#include <linux/swapops.h> /* for swp_offset */
|
|
#include <linux/blk_types.h> /* for bio_end_io_t */
|
|
|
|
static inline unsigned int swp_cluster_offset(swp_entry_t entry)
|
|
{
|
|
return swp_offset(entry) % SWAPFILE_CLUSTER;
|
|
}
|
|
|
|
/*
|
|
* Callers of all helpers below must ensure the entry, type, or offset is
|
|
* valid, and protect the swap device with reference count or locks.
|
|
*/
|
|
static inline struct swap_info_struct *__swap_type_to_info(int type)
|
|
{
|
|
struct swap_info_struct *si;
|
|
|
|
si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
|
|
VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
|
|
return si;
|
|
}
|
|
|
|
static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
|
|
{
|
|
return __swap_type_to_info(swp_type(entry));
|
|
}
|
|
|
|
static inline struct swap_cluster_info *__swap_offset_to_cluster(
|
|
struct swap_info_struct *si, pgoff_t offset)
|
|
{
|
|
VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
|
|
VM_WARN_ON_ONCE(offset >= roundup(si->max, SWAPFILE_CLUSTER));
|
|
return &si->cluster_info[offset / SWAPFILE_CLUSTER];
|
|
}
|
|
|
|
static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
|
|
{
|
|
return __swap_offset_to_cluster(__swap_entry_to_info(entry),
|
|
swp_offset(entry));
|
|
}
|
|
|
|
static __always_inline struct swap_cluster_info *__swap_cluster_lock(
|
|
struct swap_info_struct *si, unsigned long offset, bool irq)
|
|
{
|
|
struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
|
|
|
|
/*
|
|
* Nothing modifies swap cache in an IRQ context. All access to
|
|
* swap cache is wrapped by swap_cache_* helpers, and swap cache
|
|
* writeback is handled outside of IRQs. Swapin or swapout never
|
|
* occurs in IRQ, and neither does in-place split or replace.
|
|
*
|
|
* Besides, modifying swap cache requires synchronization with
|
|
* swap_map, which was never IRQ safe.
|
|
*/
|
|
VM_WARN_ON_ONCE(!in_task());
|
|
VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
|
|
if (irq)
|
|
spin_lock_irq(&ci->lock);
|
|
else
|
|
spin_lock(&ci->lock);
|
|
return ci;
|
|
}
|
|
|
|
/**
|
|
* swap_cluster_lock - Lock and return the swap cluster of given offset.
|
|
* @si: swap device the cluster belongs to.
|
|
* @offset: the swap entry offset, pointing to a valid slot.
|
|
*
|
|
* Context: The caller must ensure the offset is in the valid range and
|
|
* protect the swap device with reference count or locks.
|
|
*/
|
|
static inline struct swap_cluster_info *swap_cluster_lock(
|
|
struct swap_info_struct *si, unsigned long offset)
|
|
{
|
|
return __swap_cluster_lock(si, offset, false);
|
|
}
|
|
|
|
static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
|
|
const struct folio *folio, bool irq)
|
|
{
|
|
VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
|
|
VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
|
|
return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
|
|
swp_offset(folio->swap), irq);
|
|
}
|
|
|
|
/*
|
|
* swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
|
|
* @folio: The folio.
|
|
*
|
|
* This locks and returns the swap cluster that contains a folio's swap
|
|
* entries. The swap entries of a folio are always in one single cluster.
|
|
* The folio has to be locked so its swap entries won't change and the
|
|
* cluster won't be freed.
|
|
*
|
|
* Context: Caller must ensure the folio is locked and in the swap cache.
|
|
* Return: Pointer to the swap cluster.
|
|
*/
|
|
static inline struct swap_cluster_info *swap_cluster_get_and_lock(
|
|
const struct folio *folio)
|
|
{
|
|
return __swap_cluster_get_and_lock(folio, false);
|
|
}
|
|
|
|
/*
|
|
* swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
|
|
* @folio: The folio.
|
|
*
|
|
* Same as swap_cluster_get_and_lock but also disable IRQ.
|
|
*
|
|
* Context: Caller must ensure the folio is locked and in the swap cache.
|
|
* Return: Pointer to the swap cluster.
|
|
*/
|
|
static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
|
|
const struct folio *folio)
|
|
{
|
|
return __swap_cluster_get_and_lock(folio, true);
|
|
}
|
|
|
|
static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
|
|
{
|
|
spin_unlock(&ci->lock);
|
|
}
|
|
|
|
static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
|
|
{
|
|
spin_unlock_irq(&ci->lock);
|
|
}
|
|
|
|
extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp);
|
|
|
|
/*
|
|
* Below are the core routines for doing swap for a folio.
|
|
* All helpers requires the folio to be locked, and a locked folio
|
|
* in the swap cache pins the swap entries / slots allocated to the
|
|
* folio, swap relies heavily on the swap cache and folio lock for
|
|
* synchronization.
|
|
*
|
|
* folio_alloc_swap(): the entry point for a folio to be swapped
|
|
* out. It allocates swap slots and pins the slots with swap cache.
|
|
* The slots start with a swap count of zero. The slots are pinned
|
|
* by swap cache reference which doesn't contribute to swap count.
|
|
*
|
|
* folio_dup_swap(): increases the swap count of a folio, usually
|
|
* during it gets unmapped and a swap entry is installed to replace
|
|
* it (e.g., swap entry in page table). A swap slot with swap
|
|
* count == 0 can only be increased by this helper.
|
|
*
|
|
* folio_put_swap(): does the opposite thing of folio_dup_swap().
|
|
*/
|
|
int folio_alloc_swap(struct folio *folio);
|
|
int folio_dup_swap(struct folio *folio, struct page *subpage);
|
|
void folio_put_swap(struct folio *folio, struct page *subpage);
|
|
|
|
/* For internal use */
|
|
extern void __swap_cluster_free_entries(struct swap_info_struct *si,
|
|
struct swap_cluster_info *ci,
|
|
unsigned int ci_off, unsigned int nr_pages);
|
|
|
|
/* linux/mm/page_io.c */
|
|
int sio_pool_init(void);
|
|
struct swap_iocb;
|
|
void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
|
|
void __swap_read_unplug(struct swap_iocb *plug);
|
|
static inline void swap_read_unplug(struct swap_iocb *plug)
|
|
{
|
|
if (unlikely(plug))
|
|
__swap_read_unplug(plug);
|
|
}
|
|
void swap_write_unplug(struct swap_iocb *sio);
|
|
int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
|
|
void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
|
|
|
|
/* linux/mm/swap_state.c */
|
|
extern struct address_space swap_space __read_mostly;
|
|
static inline struct address_space *swap_address_space(swp_entry_t entry)
|
|
{
|
|
return &swap_space;
|
|
}
|
|
|
|
/*
|
|
* Return the swap device position of the swap entry.
|
|
*/
|
|
static inline loff_t swap_dev_pos(swp_entry_t entry)
|
|
{
|
|
return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
|
|
}
|
|
|
|
/**
|
|
* folio_matches_swap_entry - Check if a folio matches a given swap entry.
|
|
* @folio: The folio.
|
|
* @entry: The swap entry to check against.
|
|
*
|
|
* Context: The caller should have the folio locked to ensure it's stable
|
|
* and nothing will move it in or out of the swap cache.
|
|
* Return: true or false.
|
|
*/
|
|
static inline bool folio_matches_swap_entry(const struct folio *folio,
|
|
swp_entry_t entry)
|
|
{
|
|
swp_entry_t folio_entry = folio->swap;
|
|
long nr_pages = folio_nr_pages(folio);
|
|
|
|
VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
|
|
if (!folio_test_swapcache(folio))
|
|
return false;
|
|
VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
|
|
return folio_entry.val == round_down(entry.val, nr_pages);
|
|
}
|
|
|
|
/*
|
|
* All swap cache helpers below require the caller to ensure the swap entries
|
|
* used are valid and stabilize the device by any of the following ways:
|
|
* - Hold a reference by get_swap_device(): this ensures a single entry is
|
|
* valid and increases the swap device's refcount.
|
|
* - Locking a folio in the swap cache: this ensures the folio's swap entries
|
|
* are valid and pinned, also implies reference to the device.
|
|
* - Locking anything referencing the swap entry: e.g. PTL that protects
|
|
* swap entries in the page table, similar to locking swap cache folio.
|
|
* - See the comment of get_swap_device() for more complex usage.
|
|
*/
|
|
bool swap_cache_has_folio(swp_entry_t entry);
|
|
struct folio *swap_cache_get_folio(swp_entry_t entry);
|
|
void *swap_cache_get_shadow(swp_entry_t entry);
|
|
void swap_cache_del_folio(struct folio *folio);
|
|
struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
|
|
struct mempolicy *mpol, pgoff_t ilx,
|
|
bool *alloced);
|
|
/* Below helpers require the caller to lock and pass in the swap cluster. */
|
|
void __swap_cache_add_folio(struct swap_cluster_info *ci,
|
|
struct folio *folio, swp_entry_t entry);
|
|
void __swap_cache_del_folio(struct swap_cluster_info *ci,
|
|
struct folio *folio, swp_entry_t entry, void *shadow);
|
|
void __swap_cache_replace_folio(struct swap_cluster_info *ci,
|
|
struct folio *old, struct folio *new);
|
|
|
|
void show_swap_cache_info(void);
|
|
void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
|
|
struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
|
struct vm_area_struct *vma, unsigned long addr,
|
|
struct swap_iocb **plug);
|
|
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
|
|
struct mempolicy *mpol, pgoff_t ilx);
|
|
struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
|
|
struct vm_fault *vmf);
|
|
struct folio *swapin_folio(swp_entry_t entry, struct folio *folio);
|
|
void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
|
|
unsigned long addr);
|
|
|
|
static inline unsigned int folio_swap_flags(struct folio *folio)
|
|
{
|
|
return __swap_entry_to_info(folio->swap)->flags;
|
|
}
|
|
|
|
/*
|
|
* Return the count of contiguous swap entries that share the same
|
|
* zeromap status as the starting entry. If is_zeromap is not NULL,
|
|
* it will return the zeromap status of the starting entry.
|
|
*/
|
|
static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
|
|
bool *is_zeromap)
|
|
{
|
|
struct swap_info_struct *sis = __swap_entry_to_info(entry);
|
|
unsigned long start = swp_offset(entry);
|
|
unsigned long end = start + max_nr;
|
|
bool first_bit;
|
|
|
|
first_bit = test_bit(start, sis->zeromap);
|
|
if (is_zeromap)
|
|
*is_zeromap = first_bit;
|
|
|
|
if (max_nr <= 1)
|
|
return max_nr;
|
|
if (first_bit)
|
|
return find_next_zero_bit(sis->zeromap, end, start) - start;
|
|
else
|
|
return find_next_bit(sis->zeromap, end, start) - start;
|
|
}
|
|
|
|
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
|
|
{
|
|
int i;
|
|
|
|
/*
|
|
* While allocating a large folio and doing mTHP swapin, we need to
|
|
* ensure all entries are not cached, otherwise, the mTHP folio will
|
|
* be in conflict with the folio in swap cache.
|
|
*/
|
|
for (i = 0; i < max_nr; i++) {
|
|
if (swap_cache_has_folio(entry))
|
|
return i;
|
|
entry.val++;
|
|
}
|
|
|
|
return i;
|
|
}
|
|
|
|
#else /* CONFIG_SWAP */
|
|
struct swap_iocb;
|
|
static inline struct swap_cluster_info *swap_cluster_lock(
|
|
struct swap_info_struct *si, pgoff_t offset, bool irq)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct swap_cluster_info *swap_cluster_get_and_lock(
|
|
struct folio *folio)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
|
|
struct folio *folio)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
|
|
{
|
|
}
|
|
|
|
static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
|
|
{
|
|
}
|
|
|
|
static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline int folio_alloc_swap(struct folio *folio)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline int folio_dup_swap(struct folio *folio, struct page *page)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline void folio_put_swap(struct folio *folio, struct page *page)
|
|
{
|
|
}
|
|
|
|
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
|
|
{
|
|
}
|
|
|
|
static inline void swap_write_unplug(struct swap_iocb *sio)
|
|
{
|
|
}
|
|
|
|
static inline struct address_space *swap_address_space(swp_entry_t entry)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void show_swap_cache_info(void)
|
|
{
|
|
}
|
|
|
|
static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
|
|
gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
|
|
struct vm_fault *vmf)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void swap_update_readahead(struct folio *folio,
|
|
struct vm_area_struct *vma, unsigned long addr)
|
|
{
|
|
}
|
|
|
|
static inline int swap_writeout(struct folio *folio,
|
|
struct swap_iocb **swap_plug)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline bool swap_cache_has_folio(swp_entry_t entry)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void *swap_cache_get_shadow(swp_entry_t entry)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void swap_cache_del_folio(struct folio *folio)
|
|
{
|
|
}
|
|
|
|
static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
|
|
struct folio *folio, swp_entry_t entry, void *shadow)
|
|
{
|
|
}
|
|
|
|
static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
|
|
struct folio *old, struct folio *new)
|
|
{
|
|
}
|
|
|
|
static inline unsigned int folio_swap_flags(struct folio *folio)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
|
|
bool *has_zeromap)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_SWAP */
|
|
#endif /* _MM_SWAP_H */
|