mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
rhashtable: Bounce deferred worker kick through irq_work
Inserts past 75% load call schedule_work(&ht->run_work) to kick an
async resize. If a caller holds a raw spinlock (e.g. an
insecure_elasticity user), schedule_work() under that lock records
caller_lock -> pool->lock -> pi_lock -> rq->__lock
A cycle forms if any of these locks is acquired in the reverse
direction elsewhere. sched_ext, the only current insecure_elasticity
user, hits this: it holds scx_sched_lock across rhashtable inserts of
sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock.
Exercising the resize path produces:
Chain exists of:
&pool->lock --> &rq->__lock --> scx_sched_lock
Bounce the kick from the insert paths through irq_work so
schedule_work() runs from hard IRQ context with the caller's lock no
longer held. rht_deferred_worker()'s self-rearm on error stays on
schedule_work(&ht->run_work) - the worker runs in process context with
no caller lock held, and keeping the self-requeue on @run_work lets
cancel_work_sync() in rhashtable_free_and_destroy() drain it.
v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work).
Routing it through irq_work in v2 broke cancel_work_sync()'s
self-requeue handling - an irq_work queued after irq_work_sync()
returned but while cancel_work_sync() was still waiting could fire
post-teardown.
v2: Bounce unconditionally instead of gating on insecure_elasticity,
as suggested by Herbert.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
5897ca15d2
commit
4fe9852927
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/alloc_tag.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/irq_work_types.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/workqueue_types.h>
|
||||
|
||||
|
|
@ -77,6 +78,7 @@ struct rhashtable_params {
|
|||
* @p: Configuration parameters
|
||||
* @rhlist: True if this is an rhltable
|
||||
* @run_work: Deferred worker to expand/shrink asynchronously
|
||||
* @run_irq_work: Bounces the @run_work kick through hard IRQ context.
|
||||
* @mutex: Mutex to protect current/future table swapping
|
||||
* @lock: Spin lock to protect walker list
|
||||
* @nelems: Number of elements in table
|
||||
|
|
@ -88,6 +90,7 @@ struct rhashtable {
|
|||
struct rhashtable_params p;
|
||||
bool rhlist;
|
||||
struct work_struct run_work;
|
||||
struct irq_work run_irq_work;
|
||||
struct mutex mutex;
|
||||
spinlock_t lock;
|
||||
atomic_t nelems;
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include <linux/err.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/list_nulls.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
|
@ -847,7 +848,7 @@ static __always_inline void *__rhashtable_insert_fast(
|
|||
rht_assign_unlock(tbl, bkt, obj, flags);
|
||||
|
||||
if (rht_grow_above_75(ht, tbl))
|
||||
schedule_work(&ht->run_work);
|
||||
irq_work_queue(&ht->run_irq_work);
|
||||
|
||||
data = NULL;
|
||||
out:
|
||||
|
|
|
|||
|
|
@ -441,10 +441,33 @@ static void rht_deferred_worker(struct work_struct *work)
|
|||
|
||||
mutex_unlock(&ht->mutex);
|
||||
|
||||
/*
|
||||
* Re-arm via @run_work, not @run_irq_work.
|
||||
* rhashtable_free_and_destroy() drains async work as irq_work_sync()
|
||||
* followed by cancel_work_sync(). If this site queued irq_work while
|
||||
* cancel_work_sync() was waiting for us, irq_work_sync() would already
|
||||
* have returned and the stale irq_work could fire post-teardown.
|
||||
* cancel_work_sync() natively handles self-requeue on @run_work.
|
||||
*/
|
||||
if (err)
|
||||
schedule_work(&ht->run_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert-path callers can run under a raw spinlock (e.g. an insecure_elasticity
|
||||
* user). Calling schedule_work() under that lock records caller_lock ->
|
||||
* pool->lock -> pi_lock -> rq->__lock, closing a locking cycle if any of
|
||||
* these is acquired in the reverse direction elsewhere. Bounce through
|
||||
* irq_work so the schedule_work() runs with the caller's lock no longer held.
|
||||
*/
|
||||
static void rht_deferred_irq_work(struct irq_work *irq_work)
|
||||
{
|
||||
struct rhashtable *ht = container_of(irq_work, struct rhashtable,
|
||||
run_irq_work);
|
||||
|
||||
schedule_work(&ht->run_work);
|
||||
}
|
||||
|
||||
static int rhashtable_insert_rehash(struct rhashtable *ht,
|
||||
struct bucket_table *tbl)
|
||||
{
|
||||
|
|
@ -477,7 +500,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
|
|||
if (err == -EEXIST)
|
||||
err = 0;
|
||||
} else
|
||||
schedule_work(&ht->run_work);
|
||||
irq_work_queue(&ht->run_irq_work);
|
||||
|
||||
return err;
|
||||
|
||||
|
|
@ -488,7 +511,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
|
|||
|
||||
/* Schedule async rehash to retry allocation in process context. */
|
||||
if (err == -ENOMEM)
|
||||
schedule_work(&ht->run_work);
|
||||
irq_work_queue(&ht->run_irq_work);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
@ -630,7 +653,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
|
|||
rht_unlock(tbl, bkt, flags);
|
||||
|
||||
if (inserted && rht_grow_above_75(ht, tbl))
|
||||
schedule_work(&ht->run_work);
|
||||
irq_work_queue(&ht->run_irq_work);
|
||||
}
|
||||
} while (!IS_ERR_OR_NULL(new_tbl));
|
||||
|
||||
|
|
@ -1085,6 +1108,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
|
|||
RCU_INIT_POINTER(ht->tbl, tbl);
|
||||
|
||||
INIT_WORK(&ht->run_work, rht_deferred_worker);
|
||||
init_irq_work(&ht->run_irq_work, rht_deferred_irq_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1150,6 +1174,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
|
|||
struct bucket_table *tbl, *next_tbl;
|
||||
unsigned int i;
|
||||
|
||||
irq_work_sync(&ht->run_irq_work);
|
||||
cancel_work_sync(&ht->run_work);
|
||||
|
||||
mutex_lock(&ht->mutex);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user