mirror of
https://github.com/torvalds/linux.git
synced 2026-06-04 12:35:52 +02:00
RDMA/hns: Fix flush cqe error when racing with destroy qp
QP needs to be modified to IB_QPS_ERROR to trigger HW flush cqe. But
when this process races with destroy qp, the destroy-qp process may
modify the QP to IB_QPS_RESET first. In this case flush cqe will fail
since it is invalid to modify qp from IB_QPS_RESET to IB_QPS_ERROR.
Add lock and bit flag to make sure pending flush cqe work is completed
first and no more new works will be added.
Fixes: ffd541d457 ("RDMA/hns: Add the workqueue framework for flush cqe handler")
Signed-off-by: wenglianfa <wenglianfa@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241024124000.2931869-3-huangjunxian6@hisilicon.com
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
parent
571e4ab8a4
commit
377a209770
|
|
@ -593,6 +593,7 @@ struct hns_roce_dev;
|
|||
|
||||
enum {
|
||||
HNS_ROCE_FLUSH_FLAG = 0,
|
||||
HNS_ROCE_STOP_FLUSH_FLAG = 1,
|
||||
};
|
||||
|
||||
struct hns_roce_work {
|
||||
|
|
@ -656,6 +657,7 @@ struct hns_roce_qp {
|
|||
enum hns_roce_cong_type cong_type;
|
||||
u8 tc_mode;
|
||||
u8 priority;
|
||||
spinlock_t flush_lock;
|
||||
};
|
||||
|
||||
struct hns_roce_ib_iboe {
|
||||
|
|
|
|||
|
|
@ -5598,8 +5598,15 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
|
|||
{
|
||||
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
|
||||
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/* Make sure flush_cqe() is completed */
|
||||
spin_lock_irqsave(&hr_qp->flush_lock, flags);
|
||||
set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
|
||||
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
|
||||
flush_work(&hr_qp->flush_work.work);
|
||||
|
||||
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
|
||||
if (ret)
|
||||
ibdev_err(&hr_dev->ib_dev,
|
||||
|
|
|
|||
|
|
@ -90,11 +90,18 @@ static void flush_work_handle(struct work_struct *work)
|
|||
void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
|
||||
{
|
||||
struct hns_roce_work *flush_work = &hr_qp->flush_work;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&hr_qp->flush_lock, flags);
|
||||
/* Exit directly after destroy_qp() */
|
||||
if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
|
||||
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
flush_work->hr_dev = hr_dev;
|
||||
INIT_WORK(&flush_work->work, flush_work_handle);
|
||||
refcount_inc(&hr_qp->refcount);
|
||||
queue_work(hr_dev->irq_workq, &flush_work->work);
|
||||
spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
|
||||
}
|
||||
|
||||
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
|
||||
|
|
@ -1140,6 +1147,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
|
|||
struct ib_udata *udata,
|
||||
struct hns_roce_qp *hr_qp)
|
||||
{
|
||||
struct hns_roce_work *flush_work = &hr_qp->flush_work;
|
||||
struct hns_roce_ib_create_qp_resp resp = {};
|
||||
struct ib_device *ibdev = &hr_dev->ib_dev;
|
||||
struct hns_roce_ib_create_qp ucmd = {};
|
||||
|
|
@ -1148,9 +1156,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
|
|||
mutex_init(&hr_qp->mutex);
|
||||
spin_lock_init(&hr_qp->sq.lock);
|
||||
spin_lock_init(&hr_qp->rq.lock);
|
||||
spin_lock_init(&hr_qp->flush_lock);
|
||||
|
||||
hr_qp->state = IB_QPS_RESET;
|
||||
hr_qp->flush_flag = 0;
|
||||
flush_work->hr_dev = hr_dev;
|
||||
INIT_WORK(&flush_work->work, flush_work_handle);
|
||||
|
||||
if (init_attr->create_flags)
|
||||
return -EOPNOTSUPP;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user