diff --git a/include/linux/slab.h b/include/linux/slab.h index 15a60b501b95..2b5ab488e96b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1234,6 +1234,9 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) +extern void kvfree_atomic(const void *addr); +DEFINE_FREE(kvfree_atomic, void *, if (!IS_ERR_OR_NULL(_T)) kvfree_atomic(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 7a67ef5b67b6..04b3a808fca9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -114,6 +114,14 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_atomic(const struct bucket_table *tbl) +{ + if (tbl->nest) + nested_bucket_table_free(tbl); + + kvfree_atomic(tbl); +} + static void bucket_table_free_rcu(struct rcu_head *head) { bucket_table_free(container_of(head, struct bucket_table, rcu)); @@ -496,7 +504,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { - bucket_table_free(new_tbl); + bucket_table_free_atomic(new_tbl); if (err == -EEXIST) err = 0; } else @@ -1166,6 +1174,11 @@ static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, * This function will eventually sleep to wait for an async resize * to complete. The caller is responsible that no further write operations * occurs in parallel. + * + * After cancel_work_sync() has returned, the deferred rehash worker is + * quiesced and, per the contract above, no other concurrent access to the + * rhashtable is possible. The tables are therefore owned exclusively by + * this function and can be walked without ht->mutex held. */ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), @@ -1177,8 +1190,15 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, irq_work_sync(&ht->run_irq_work); cancel_work_sync(&ht->run_work); - mutex_lock(&ht->mutex); - tbl = rht_dereference(ht->tbl, ht); + /* + * Do NOT take ht->mutex here. The rehash worker establishes + * ht->mutex -> fs_reclaim via GFP_KERNEL bucket allocation under + * the mutex; callers on the reclaim path (e.g. simple_xattr_ht_free() + * from evict() under the dcache shrinker for shmem/kernfs/pidfs + * inodes) would otherwise close a circular dependency + * fs_reclaim -> ht->mutex. + */ + tbl = rcu_dereference_raw(ht->tbl); restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { @@ -1187,22 +1207,21 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, cond_resched(); for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL; + rcu_dereference_raw(pos->next) : NULL; !rht_is_a_nulls(pos); pos = next, next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL) + rcu_dereference_raw(pos->next) : NULL) rhashtable_free_one(ht, pos, free_fn, arg); } } - next_tbl = rht_dereference(tbl->future_tbl, ht); + next_tbl = rcu_dereference_raw(tbl->future_tbl); bucket_table_free(tbl); if (next_tbl) { tbl = next_tbl; goto restart; } - mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); diff --git a/mm/slub.c b/mm/slub.c index 0baa906f39ab..8f9004536729 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6882,6 +6882,22 @@ void kvfree(const void *addr) } EXPORT_SYMBOL(kvfree); +/** + * kvfree_atomic() - Free memory. + * @addr: Pointer to allocated memory. + * + * Same as kvfree(), but uses vfree_atomic() for vmalloc + * backed memory. Must not be called from NMI context. + */ +void kvfree_atomic(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree_atomic(addr); + else + kfree(addr); +} +EXPORT_SYMBOL(kvfree_atomic); + /** * kvfree_sensitive - Free a data object containing sensitive information. * @addr: address of the data object to be freed.