diff --git a/mm/swapfile.c b/mm/swapfile.c index 281ee2762358..a795cae82415 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -65,6 +65,13 @@ static void move_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, struct list_head *list, enum swap_cluster_flags new_flags); +/* + * Protects the swap_info array, and the SWP_USED flag. swap_info contains + * lazily allocated & freed swap device info struts, and SWP_USED indicates + * which device is used, ~SWP_USED devices and can be reused. + * + * Also protects swap_active_head total_swap_pages, and the SWP_WRITEOK flag. + */ static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; @@ -2657,8 +2664,6 @@ static int setup_swap_extents(struct swap_info_struct *sis, } static void setup_swap_info(struct swap_info_struct *si, int prio, - unsigned char *swap_map, - struct swap_cluster_info *cluster_info, unsigned long *zeromap) { si->prio = prio; @@ -2668,8 +2673,6 @@ static void setup_swap_info(struct swap_info_struct *si, int prio, */ si->list.prio = -si->prio; si->avail_list.prio = -si->prio; - si->swap_map = swap_map; - si->cluster_info = cluster_info; si->zeromap = zeromap; } @@ -2687,13 +2690,11 @@ static void _enable_swap_info(struct swap_info_struct *si) } static void enable_swap_info(struct swap_info_struct *si, int prio, - unsigned char *swap_map, - struct swap_cluster_info *cluster_info, - unsigned long *zeromap) + unsigned long *zeromap) { spin_lock(&swap_lock); spin_lock(&si->lock); - setup_swap_info(si, prio, swap_map, cluster_info, zeromap); + setup_swap_info(si, prio, zeromap); spin_unlock(&si->lock); spin_unlock(&swap_lock); /* @@ -2711,7 +2712,7 @@ static void reinsert_swap_info(struct swap_info_struct *si) { spin_lock(&swap_lock); spin_lock(&si->lock); - setup_swap_info(si, si->prio, si->swap_map, si->cluster_info, si->zeromap); + setup_swap_info(si, si->prio, si->zeromap); _enable_swap_info(si); spin_unlock(&si->lock); spin_unlock(&swap_lock); @@ -2735,8 +2736,8 @@ static void wait_for_allocation(struct swap_info_struct *si) } } -static void free_cluster_info(struct swap_cluster_info *cluster_info, - unsigned long maxpages) +static void free_swap_cluster_info(struct swap_cluster_info *cluster_info, + unsigned long maxpages) { struct swap_cluster_info *ci; int i, nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); @@ -2889,7 +2890,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->global_cluster = NULL; vfree(swap_map); kvfree(zeromap); - free_cluster_info(cluster_info, maxpages); + free_swap_cluster_info(cluster_info, maxpages); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); @@ -3236,10 +3237,15 @@ static unsigned long read_swap_header(struct swap_info_struct *si, static int setup_swap_map(struct swap_info_struct *si, union swap_header *swap_header, - unsigned char *swap_map, unsigned long maxpages) { unsigned long i; + unsigned char *swap_map; + + swap_map = vzalloc(maxpages); + si->swap_map = swap_map; + if (!swap_map) + return -ENOMEM; swap_map[0] = SWAP_MAP_BAD; /* omit header page */ for (i = 0; i < swap_header->info.nr_badpages; i++) { @@ -3260,9 +3266,9 @@ static int setup_swap_map(struct swap_info_struct *si, return 0; } -static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, - union swap_header *swap_header, - unsigned long maxpages) +static int setup_swap_clusters_info(struct swap_info_struct *si, + union swap_header *swap_header, + unsigned long maxpages) { unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; @@ -3331,10 +3337,11 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, } } - return cluster_info; + si->cluster_info = cluster_info; + return 0; err: - free_cluster_info(cluster_info, maxpages); - return ERR_PTR(err); + free_swap_cluster_info(cluster_info, maxpages); + return err; } SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) @@ -3349,9 +3356,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) int nr_extents; sector_t span; unsigned long maxpages; - unsigned char *swap_map = NULL; unsigned long *zeromap = NULL; - struct swap_cluster_info *cluster_info = NULL; struct folio *folio = NULL; struct inode *inode = NULL; bool inced_nr_rotate_swap = false; @@ -3362,6 +3367,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + /* + * Allocate or reuse existing !SWP_USED swap_info. The returned + * si will stay in a dying status, so nothing will access its content + * until enable_swap_info resurrects its percpu ref and expose it. + */ si = alloc_swap_info(); if (IS_ERR(si)) return PTR_ERR(si); @@ -3439,18 +3449,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) maxpages = si->max; - /* OK, set up the swap map and apply the bad block list */ - swap_map = vzalloc(maxpages); - if (!swap_map) { - error = -ENOMEM; - goto bad_swap_unlock_inode; - } - - error = swap_cgroup_swapon(si->type, maxpages); + /* Setup the swap map and apply bad block */ + error = setup_swap_map(si, swap_header, maxpages); if (error) goto bad_swap_unlock_inode; - error = setup_swap_map(si, swap_header, swap_map, maxpages); + /* Set up the swap cluster info */ + error = setup_swap_clusters_info(si, swap_header, maxpages); + if (error) + goto bad_swap_unlock_inode; + + error = swap_cgroup_swapon(si->type, maxpages); if (error) goto bad_swap_unlock_inode; @@ -3478,13 +3487,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) inced_nr_rotate_swap = true; } - cluster_info = setup_clusters(si, swap_header, maxpages); - if (IS_ERR(cluster_info)) { - error = PTR_ERR(cluster_info); - cluster_info = NULL; - goto bad_swap_unlock_inode; - } - if ((swap_flags & SWAP_FLAG_DISCARD) && si->bdev && bdev_max_discard_sectors(si->bdev)) { /* @@ -3537,7 +3539,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) prio = swap_flags & SWAP_FLAG_PRIO_MASK; si->swap_file = swap_file; - enable_swap_info(si, prio, swap_map, cluster_info, zeromap); + + /* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */ + enable_swap_info(si, prio, zeromap); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", K(si->pages), name->name, si->prio, nr_extents, @@ -3563,13 +3567,18 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) inode = NULL; destroy_swap_extents(si, swap_file); swap_cgroup_swapoff(si->type); + vfree(si->swap_map); + si->swap_map = NULL; + free_swap_cluster_info(si->cluster_info, si->max); + si->cluster_info = NULL; + /* + * Clear the SWP_USED flag after all resources are freed so + * alloc_swap_info can reuse this si safely. + */ spin_lock(&swap_lock); si->flags = 0; spin_unlock(&swap_lock); - vfree(swap_map); kvfree(zeromap); - if (cluster_info) - free_cluster_info(cluster_info, maxpages); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); if (swap_file)