From d502297008142645edf5c791af424ed321e5da84 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 19 Jan 2021 15:53:35 +1000 Subject: [PATCH 01/50] drm/nouveau/nvif: fix method count when pushing an array Reported-by: Lyude Paul Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/push.h | 216 ++++++++++---------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/push.h b/drivers/gpu/drm/nouveau/include/nvif/push.h index 168d7694ede5..6d3a8a3d2087 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/push.h +++ b/drivers/gpu/drm/nouveau/include/nvif/push.h @@ -123,131 +123,131 @@ PUSH_KICK(struct nvif_push *push) } while(0) #endif -#define PUSH_1(X,f,ds,n,c,o,p,s,mA,dA) do { \ - PUSH_##o##_HDR((p), s, mA, (c)+(n)); \ - PUSH_##f(X, (p), X##mA, 1, o, (dA), ds, ""); \ +#define PUSH_1(X,f,ds,n,o,p,s,mA,dA) do { \ + PUSH_##o##_HDR((p), s, mA, (ds)+(n)); \ + PUSH_##f(X, (p), X##mA, 1, o, (dA), ds, ""); \ } while(0) -#define PUSH_2(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (1?PUSH_##o##_INC), "mthd1"); \ - PUSH_1(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_2(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (1?PUSH_##o##_INC), "mthd1"); \ + PUSH_1(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_3(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd2"); \ - PUSH_2(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_3(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd2"); \ + PUSH_2(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_4(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd3"); \ - PUSH_3(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_4(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd3"); \ + PUSH_3(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_5(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd4"); \ - PUSH_4(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_5(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd4"); \ + PUSH_4(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_6(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd5"); \ - PUSH_5(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_6(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd5"); \ + PUSH_5(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_7(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd6"); \ - PUSH_6(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_7(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd6"); \ + PUSH_6(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_8(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd7"); \ - PUSH_7(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_8(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd7"); \ + PUSH_7(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_9(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd8"); \ - PUSH_8(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_9(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd8"); \ + PUSH_8(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_10(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd9"); \ - PUSH_9(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_10(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd9"); \ + PUSH_9(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_1D(X,o,p,s,mA,dA) \ - PUSH_1(X, DATA_, 1, 1, 0, o, (p), s, X##mA, (dA)) -#define PUSH_2D(X,o,p,s,mA,dA,mB,dB) \ - PUSH_2(X, DATA_, 1, 1, 0, o, (p), s, X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_3D(X,o,p,s,mA,dA,mB,dB,mC,dC) \ - PUSH_3(X, DATA_, 1, 1, 0, o, (p), s, X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_4D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD) \ - PUSH_4(X, DATA_, 1, 1, 0, o, (p), s, X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_5D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE) \ - PUSH_5(X, DATA_, 1, 1, 0, o, (p), s, X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) +#define PUSH_1D(X,o,p,s,mA,dA) \ + PUSH_1(X, DATA_, 1, 0, o, (p), s, X##mA, (dA)) +#define PUSH_2D(X,o,p,s,mA,dA,mB,dB) \ + PUSH_2(X, DATA_, 1, 0, o, (p), s, X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_3D(X,o,p,s,mA,dA,mB,dB,mC,dC) \ + PUSH_3(X, DATA_, 1, 0, o, (p), s, X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_4D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD) \ + PUSH_4(X, DATA_, 1, 0, o, (p), s, X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_5D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE) \ + PUSH_5(X, DATA_, 1, 0, o, (p), s, X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_6D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF) \ - PUSH_6(X, DATA_, 1, 1, 0, o, (p), s, X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_6(X, DATA_, 1, 0, o, (p), s, X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_7D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG) \ - PUSH_7(X, DATA_, 1, 1, 0, o, (p), s, X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_7(X, DATA_, 1, 0, o, (p), s, X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_8D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH) \ - PUSH_8(X, DATA_, 1, 1, 0, o, (p), s, X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_8(X, DATA_, 1, 0, o, (p), s, X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_9D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH,mI,dI) \ - PUSH_9(X, DATA_, 1, 1, 0, o, (p), s, X##mI, (dI), \ - X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_9(X, DATA_, 1, 0, o, (p), s, X##mI, (dI), \ + X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_10D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH,mI,dI,mJ,dJ) \ - PUSH_10(X, DATA_, 1, 1, 0, o, (p), s, X##mJ, (dJ), \ - X##mI, (dI), \ - X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_10(X, DATA_, 1, 0, o, (p), s, X##mJ, (dJ), \ + X##mI, (dI), \ + X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) -#define PUSH_1P(X,o,p,s,mA,dp,ds) \ - PUSH_1(X, DATAp, ds, ds, 0, o, (p), s, X##mA, (dp)) -#define PUSH_2P(X,o,p,s,mA,dA,mB,dp,ds) \ - PUSH_2(X, DATAp, ds, ds, 0, o, (p), s, X##mB, (dp), \ - X##mA, (dA)) -#define PUSH_3P(X,o,p,s,mA,dA,mB,dB,mC,dp,ds) \ - PUSH_3(X, DATAp, ds, ds, 0, o, (p), s, X##mC, (dp), \ - X##mB, (dB), \ - X##mA, (dA)) +#define PUSH_1P(X,o,p,s,mA,dp,ds) \ + PUSH_1(X, DATAp, ds, 0, o, (p), s, X##mA, (dp)) +#define PUSH_2P(X,o,p,s,mA,dA,mB,dp,ds) \ + PUSH_2(X, DATAp, ds, 0, o, (p), s, X##mB, (dp), \ + X##mA, (dA)) +#define PUSH_3P(X,o,p,s,mA,dA,mB,dB,mC,dp,ds) \ + PUSH_3(X, DATAp, ds, 0, o, (p), s, X##mC, (dp), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,IMPL,...) IMPL #define PUSH(A...) PUSH_(A, PUSH_10P, PUSH_10D, \ From 84965ff8a84f0368b154c9b367b62e59c1193f30 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Jan 2021 15:51:11 -0700 Subject: [PATCH 02/50] io_uring: if we see flush on exit, cancel related tasks Ensure we match tasks that belong to a dead or dying task as well, as we need to reap those in addition to those belonging to the exiting task. Cc: stable@vger.kernel.org # 5.9+ Reported-by: Josef Grieb Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c07913ec0cca..695fe00bafdc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1069,8 +1069,12 @@ static bool io_match_task(struct io_kiocb *head, { struct io_kiocb *req; - if (task && head->task != task) + if (task && head->task != task) { + /* in terms of cancelation, always match if req task is dead */ + if (head->task->flags & PF_EXITING) + return true; return false; + } if (!files) return true; @@ -9136,6 +9140,9 @@ static int io_uring_flush(struct file *file, void *data) struct io_uring_task *tctx = current->io_uring; struct io_ring_ctx *ctx = file->private_data; + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) + io_uring_cancel_task_requests(ctx, NULL); + if (!tctx) return 0; From b18032bb0a883cd7edd22a7fe6c57e1059b81ed0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 24 Jan 2021 16:58:56 -0700 Subject: [PATCH 03/50] io_uring: only call io_cqring_ev_posted() if events were posted This normally doesn't cause any extra harm, but it does mean that we'll increment the eventfd notification count, if one has been registered with the ring. This can confuse applications, when they see more notifications on the eventfd side than are available in the ring. Do the nice thing and only increment this count, if we actually posted (or even overflowed) events. Reported-and-tested-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 695fe00bafdc..2166c469789d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1779,12 +1779,13 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct io_kiocb *req, *tmp; struct io_uring_cqe *cqe; unsigned long flags; - bool all_flushed; + bool all_flushed, posted; LIST_HEAD(list); if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) return false; + posted = false; spin_lock_irqsave(&ctx->completion_lock, flags); list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) @@ -1804,6 +1805,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); } + posted = true; } all_flushed = list_empty(&ctx->cq_overflow_list); @@ -1813,9 +1815,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } - io_commit_cqring(ctx); + if (posted) + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); + if (posted) + io_cqring_ev_posted(ctx); while (!list_empty(&list)) { req = list_first_entry(&list, struct io_kiocb, compl.list); From 2569063c7140c65a0d0ad075e95ddfbcda9ba3c0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 27 Dec 2020 19:34:58 +0800 Subject: [PATCH 04/50] blk-mq: test QUEUE_FLAG_HCTX_ACTIVE for sbitmap_shared in hctx_may_queue In case of blk_mq_is_sbitmap_shared(), we should test QUEUE_FLAG_HCTX_ACTIVE against q->queue_flags instead of BLK_MQ_S_TAG_ACTIVE. So fix it. Cc: John Garry Cc: Kashyap Desai Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index c1458d9502f1..3616453ca28c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -304,7 +304,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; users = atomic_read(&set->active_queues_shared_sbitmap); } else { From ef49d40b61a3e18a11edd5eb1c30b0183af9e850 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 17 Jan 2021 16:50:17 +0800 Subject: [PATCH 05/50] block: Fix an error handling in add_partition Once we have called device_initialize(), we should use put_device() to give up the reference on error, just like what we have done on failure of device_add(). Signed-off-by: Dinghao Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index e7d776db803b..23460cee9de5 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -384,7 +384,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(bdev, &devt); if (err) - goto out_bdput; + goto out_put; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ From 78e5330329ee206d6aa4593a90320fd837f7966e Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Thu, 21 Jan 2021 11:57:58 +0100 Subject: [PATCH 06/50] drm/vc4: Correct lbm size and calculation LBM base address is measured in units of pixels per cycle. That is 4 for 2711 (hvs5) and 2 for 2708. We are wasting 75% of lbm by indexing without the scaling. But we were also using too high a size for the lbm resulting in partial corruption (right hand side) of vertically scaled images, usually at 4K or lower resolutions with more layers. The physical RAM of LBM on 2711 is 8 * 1920 * 16 * 12-bit (pixels are stored 12-bits per component regardless of format). The LBM address indexes work in units of pixels per clock, so for 4 pixels per clock that means we have 32 * 1920 = 60K Fixes: c54619b0bfb3 ("drm/vc4: Add support for the BCM2711 HVS5") Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-By: Lucas Nussbaum Tested-By: Ryutaroh Matsumoto Link: https://patchwork.freedesktop.org/patch/msgid/20210121105759.1262699-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hvs.c | 8 ++++---- drivers/gpu/drm/vc4/vc4_plane.c | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index cccd341e5d67..3b722252d1fb 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -620,11 +620,11 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) * for now we just allocate globally. */ if (!hvs->hvs5) - /* 96kB */ - drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); + /* 48k words of 2x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); else - /* 70k words */ - drm_mm_init(&hvs->lbm_mm, 0, 70 * 2 * 1024); + /* 60k words of 4x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); /* Upload filter kernels. We only have the one for now, so we * keep it around for the lifetime of the driver. diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 6b39cc2ca18d..1a4369c1fb16 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -437,6 +437,7 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) static u32 vc4_lbm_size(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); u32 pix_per_line; u32 lbm; @@ -472,7 +473,11 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) lbm = pix_per_line * 16; } - lbm = roundup(lbm, 32); + /* Align it to 64 or 128 (hvs5) bytes */ + lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); + + /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ + lbm /= vc4->hvs->hvs5 ? 4 : 2; return lbm; } From f6b57101a6b31277a4bde1d8028c46e898bd2ff2 Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Thu, 21 Jan 2021 11:57:59 +0100 Subject: [PATCH 07/50] drm/vc4: Correct POS1_SCL for hvs5 Fixes failure with 4096x1080 resolutions [ 284.315379] WARNING: CPU: 1 PID: 901 at drivers/gpu/drm/vc4/vc4_plane.c:981 vc4_plane_mode_set+0x1374/0x13c4 [ 284.315385] Modules linked in: ir_rc5_decoder rpivid_hevc(C) bcm2835_codec(C) bcm2835_isp(C) bcm2835_mmal_vchiq(C) bcm2835_gpiomem v4l2_mem2mem videobuf2_dma_contig videobuf2_memops videobuf2_v4l2 videobuf2_common videodev mc cdc_acm xpad ir_rc6_decoder rc_rc6_mce gpio_ir_recv fuse [ 284.315509] CPU: 1 PID: 901 Comm: kodi.bin Tainted: G C 5.10.7 #1 [ 284.315514] Hardware name: BCM2711 [ 284.315518] Backtrace: [ 284.315533] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 284.315540] r7:ffffffff r6:00000000 r5:68000013 r4:c18ecf1c [ 284.315549] [] (show_stack) from [] (dump_stack+0xc4/0xf0) [ 284.315558] [] (dump_stack) from [] (__warn+0xfc/0x158) [ 284.315564] r9:00000000 r8:00000009 r7:000003d5 r6:00000009 r5:c08cc7dc r4:c0fd09b8 [ 284.315572] [] (__warn) from [] (warn_slowpath_fmt+0x74/0xe4) [ 284.315577] r7:c08cc7dc r6:000003d5 r5:c0fd09b8 r4:00000000 [ 284.315584] [] (warn_slowpath_fmt) from [] (vc4_plane_mode_set+0x1374/0x13c4) [ 284.315589] r8:00000000 r7:00000000 r6:00001000 r5:c404c600 r4:c2e34600 [ 284.315596] [] (vc4_plane_mode_set) from [] (vc4_plane_atomic_check+0x40/0x1c0) [ 284.315601] r10:00000001 r9:c2e34600 r8:c0e67068 r7:c0fc44e0 r6:c2ce3640 r5:c3d636c0 [ 284.315605] r4:c2e34600 [ 284.315614] [] (vc4_plane_atomic_check) from [] (drm_atomic_helper_check_planes+0xec/0x1ec) [ 284.315620] r9:c2e34600 r8:c0e67068 r7:c0fc44e0 r6:c2ce3640 r5:c3d636c0 r4:00000006 [ 284.315627] [] (drm_atomic_helper_check_planes) from [] (drm_atomic_helper_check+0x54/0x9c) [ 284.315633] r9:c2e35400 r8:00000006 r7:00000000 r6:c2ba7800 r5:c3d636c0 r4:00000000 [ 284.315641] [] (drm_atomic_helper_check) from [] (vc4_atomic_check+0x25c/0x454) [ 284.315645] r7:00000000 r6:c2ba7800 r5:00000001 r4:c3d636c0 [ 284.315652] [] (vc4_atomic_check) from [] (drm_atomic_check_only+0x5cc/0x7e0) [ 284.315658] r10:c404c6c8 r9:ffffffff r8:c472c480 r7:00000003 r6:c3d636c0 r5:00000000 [ 284.315662] r4:0000003c r3:c08b7a4c [ 284.315670] [] (drm_atomic_check_only) from [] (drm_mode_atomic_ioctl+0x758/0xa7c) [ 284.315675] r10:c3d46000 r9:c3d636c0 r8:c2ce8a70 r7:027e3a54 r6:00000043 r5:c1fbb800 [ 284.315679] r4:0281a858 [ 284.315688] [] (drm_mode_atomic_ioctl) from [] (drm_ioctl_kernel+0xc4/0x108) [ 284.315693] r10:c03864bc r9:c1fbb800 r8:c3d47e64 r7:c089b308 r6:00000002 r5:c2ba7800 [ 284.315697] r4:00000000 [ 284.315705] [] (drm_ioctl_kernel) from [] (drm_ioctl+0x1e8/0x3a0) [ 284.315711] r9:c1fbb800 r8:000000bc r7:c3d47e64 r6:00000038 r5:c0e59570 r4:00000038 [ 284.315719] [] (drm_ioctl) from [] (sys_ioctl+0x35c/0x914) [ 284.315724] r10:c2d08200 r9:00000000 r8:c36fa300 r7:befdd870 r6:c03864bc r5:c36fa301 [ 284.315728] r4:c03864bc [ 284.315735] [] (sys_ioctl) from [] (ret_fast_syscall+0x0/0x28) [ 284.315739] Exception stack(0xc3d47fa8 to 0xc3d47ff0) [ 284.315745] 7fa0: 027eb750 befdd870 00000000 c03864bc befdd870 00000000 [ 284.315750] 7fc0: 027eb750 befdd870 c03864bc 00000036 027e3948 0281a640 0281a850 027e3a50 [ 284.315756] 7fe0: b4b64100 befdd844 b4b5ba2c b49c994c [ 284.315762] r10:00000036 r9:c3d46000 r8:c0200204 r7:00000036 r6:c03864bc r5:befdd870 [ 284.315765] r4:027eb750 Fixes: c54619b0bfb3 ("drm/vc4: Add support for the BCM2711 HVS5") Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-By: Lucas Nussbaum Tested-By: Ryutaroh Matsumoto Link: https://patchwork.freedesktop.org/patch/msgid/20210121105759.1262699-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_plane.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 1a4369c1fb16..5612cab55227 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -917,9 +917,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (!vc4_state->is_unity) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(vc4_state->crtc_w, - SCALER_POS1_SCL_WIDTH) | + SCALER5_POS1_SCL_WIDTH) | VC4_SET_FIELD(vc4_state->crtc_h, - SCALER_POS1_SCL_HEIGHT)); + SCALER5_POS1_SCL_HEIGHT)); } /* Position Word 2: Source Image Size */ From 36af2d5c4433fb40ee2af912c4ac0a30991aecfc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 22 Jan 2021 20:53:02 +0800 Subject: [PATCH 08/50] ACPI: sysfs: Prefer "compatible" modalias Commit 8765c5ba1949 ("ACPI / scan: Rework modalias creation when "compatible" is present") may create two "MODALIAS=" in one uevent file if specific conditions are met. This breaks systemd-udevd, which assumes each "key" in one uevent file to be unique. The internal implementation of systemd-udevd overwrites the first MODALIAS with the second one, so its kmod rule doesn't load the driver for the first MODALIAS. So if both the ACPI modalias and the OF modalias are present, use the latter to ensure that there will be only one MODALIAS. Link: https://github.com/systemd/systemd/pull/18163 Suggested-by: Mika Westerberg Fixes: 8765c5ba1949 ("ACPI / scan: Rework modalias creation when "compatible" is present") Signed-off-by: Kai-Heng Feng Reviewed-by: Mika Westerberg Reviewed-by: Greg Kroah-Hartman Cc: 4.1+ # 4.1+ [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_sysfs.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 96869f1538b9..bfca116482b8 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -251,20 +251,12 @@ int __acpi_device_uevent_modalias(struct acpi_device *adev, if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; - len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - if (!adev->data.of_compatible) - return 0; - - if (len > 0 && add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_of_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); + if (adev->data.of_compatible) + len = create_of_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + else + len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); if (len < 0) return len; From 81b704d3e4674e09781d331df73d76675d5ad8cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jan 2021 19:34:22 +0100 Subject: [PATCH 09/50] ACPI: thermal: Do not call acpi_thermal_check() directly Calling acpi_thermal_check() from acpi_thermal_notify() directly is problematic if _TMP triggers Notify () on the thermal zone for which it has been evaluated (which happens on some systems), because it causes a new acpi_thermal_notify() invocation to be queued up every time and if that takes place too often, an indefinite number of pending work items may accumulate in kacpi_notify_wq over time. Besides, it is not really useful to queue up a new invocation of acpi_thermal_check() if one of them is pending already. For these reasons, rework acpi_thermal_notify() to queue up a thermal check instead of calling acpi_thermal_check() directly and only allow one thermal check to be pending at a time. Moreover, only allow one acpi_thermal_check_fn() instance at a time to run thermal_zone_device_update() for one thermal zone and make it return early if it sees other instances running for the same thermal zone. While at it, fold acpi_thermal_check() into acpi_thermal_check_fn(), as it is only called from there after the other changes made here. [This issue appears to have been exposed by commit 6d25be5782e4 ("sched/core, workqueues: Distangle worker accounting from rq lock"), but it is unclear why it was not visible earlier.] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208877 Reported-by: Stephen Berman Diagnosed-by: Sebastian Andrzej Siewior Signed-off-by: Rafael J. Wysocki Reviewed-by: Sebastian Andrzej Siewior Tested-by: Stephen Berman Cc: All applicable --- drivers/acpi/thermal.c | 46 ++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 12c0ece746f0..859b1de31ddc 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -174,6 +174,8 @@ struct acpi_thermal { struct thermal_zone_device *thermal_zone; int kelvin_offset; /* in millidegrees */ struct work_struct thermal_check_work; + struct mutex thermal_check_lock; + refcount_t thermal_check_count; }; /* -------------------------------------------------------------------------- @@ -495,14 +497,6 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz) return 0; } -static void acpi_thermal_check(void *data) -{ - struct acpi_thermal *tz = data; - - thermal_zone_device_update(tz->thermal_zone, - THERMAL_EVENT_UNSPECIFIED); -} - /* sys I/F for generic thermal sysfs support */ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) @@ -900,6 +894,12 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) Driver Interface -------------------------------------------------------------------------- */ +static void acpi_queue_thermal_check(struct acpi_thermal *tz) +{ + if (!work_pending(&tz->thermal_check_work)) + queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); +} + static void acpi_thermal_notify(struct acpi_device *device, u32 event) { struct acpi_thermal *tz = acpi_driver_data(device); @@ -910,17 +910,17 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) switch (event) { case ACPI_THERMAL_NOTIFY_TEMPERATURE: - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); break; case ACPI_THERMAL_NOTIFY_THRESHOLDS: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_THRESHOLDS); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; case ACPI_THERMAL_NOTIFY_DEVICES: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_DEVICES); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; @@ -1020,7 +1020,25 @@ static void acpi_thermal_check_fn(struct work_struct *work) { struct acpi_thermal *tz = container_of(work, struct acpi_thermal, thermal_check_work); - acpi_thermal_check(tz); + + /* + * In general, it is not sufficient to check the pending bit, because + * subsequent instances of this function may be queued after one of them + * has started running (e.g. if _TMP sleeps). Avoid bailing out if just + * one of them is running, though, because it may have done the actual + * check some time ago, so allow at least one of them to block on the + * mutex while another one is running the update. + */ + if (!refcount_dec_not_one(&tz->thermal_check_count)) + return; + + mutex_lock(&tz->thermal_check_lock); + + thermal_zone_device_update(tz->thermal_zone, THERMAL_EVENT_UNSPECIFIED); + + refcount_inc(&tz->thermal_check_count); + + mutex_unlock(&tz->thermal_check_lock); } static int acpi_thermal_add(struct acpi_device *device) @@ -1052,6 +1070,8 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; + refcount_set(&tz->thermal_check_count, 3); + mutex_init(&tz->thermal_check_lock); INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn); pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device), @@ -1117,7 +1137,7 @@ static int acpi_thermal_resume(struct device *dev) tz->state.active |= tz->trips.active[i].flags.enabled; } - queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); + acpi_queue_thermal_check(tz); return AE_OK; } From ac55ad2b5fadb6af8826963d7d3331c9950a2608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Mon, 18 Jan 2021 17:55:18 +0100 Subject: [PATCH 10/50] s390/dasd: Fix inconsistent kobject removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our intention was to only remove path kobjects whenever a device is being set offline. However, one corner case was missing. If a device is disabled and enabled (using the IOCTLs BIODASDDISABLE and BIODASDENABLE respectively), the enabling process will call dasd_eckd_reload_device() which itself calls dasd_eckd_read_conf() in order to update path information. During that update, dasd_eckd_clear_conf_data() clears all old data and also removes all kobjects. This will leave us with an inconsistent state of path kobjects and a subsequent path verification leads to a failing kobject creation. Fix this by removing kobjects only in the context of offlining a device as initially intended. Fixes: 19508b204740 ("s390/dasd: Display FC Endpoint Security information via sysfs") Reported-by: Stefan Haberland Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Reviewed-by: Cornelia Huck Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 20 ++++++++++++++------ drivers/s390/block/dasd_eckd.c | 3 ++- drivers/s390/block/dasd_int.h | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 16bb135c20aa..03d27ee9cac6 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1874,18 +1874,26 @@ void dasd_path_create_kobjects(struct dasd_device *device) } EXPORT_SYMBOL(dasd_path_create_kobjects); -/* - * As we keep kobjects for the lifetime of a device, this function must not be - * called anywhere but in the context of offlining a device. - */ -void dasd_path_remove_kobj(struct dasd_device *device, int chp) +static void dasd_path_remove_kobj(struct dasd_device *device, int chp) { if (device->path[chp].in_sysfs) { kobject_put(&device->path[chp].kobj); device->path[chp].in_sysfs = false; } } -EXPORT_SYMBOL(dasd_path_remove_kobj); + +/* + * As we keep kobjects for the lifetime of a device, this function must not be + * called anywhere but in the context of offlining a device. + */ +void dasd_path_remove_kobjects(struct dasd_device *device) +{ + int i; + + for (i = 0; i < 8; i++) + dasd_path_remove_kobj(device, i); +} +EXPORT_SYMBOL(dasd_path_remove_kobjects); int dasd_add_sysfs_files(struct ccw_device *cdev) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3caa1ee5f4b0..65eb87cbbb9b 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1036,7 +1036,6 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device) device->path[i].ssid = 0; device->path[i].chpid = 0; dasd_path_notoper(device, i); - dasd_path_remove_kobj(device, i); } } @@ -2173,6 +2172,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) device->block = NULL; out_err1: dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); kfree(device->private); device->private = NULL; return rc; @@ -2191,6 +2191,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) private->vdsneq = NULL; private->gneq = NULL; dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); } static struct dasd_ccw_req * diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3bc008f9136c..b8a04c42d1d2 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -858,7 +858,7 @@ int dasd_add_sysfs_files(struct ccw_device *); void dasd_remove_sysfs_files(struct ccw_device *); void dasd_path_create_kobj(struct dasd_device *, int); void dasd_path_create_kobjects(struct dasd_device *); -void dasd_path_remove_kobj(struct dasd_device *, int); +void dasd_path_remove_kobjects(struct dasd_device *); struct dasd_device *dasd_device_from_cdev(struct ccw_device *); struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *); From 56c91a18432b631ca18438841fd1831ef756cabf Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 22 Jan 2021 15:42:14 +0800 Subject: [PATCH 11/50] kernel: kexec: remove the lock operation of system_transition_mutex Function kernel_kexec() is called with lock system_transition_mutex held in reboot system call. While inside kernel_kexec(), it will acquire system_transition_mutex agin. This will lead to dead lock. The dead lock should be easily triggered, it hasn't caused any failure report just because the feature 'kexec jump' is almost not used by anyone as far as I know. An inquiry can be made about who is using 'kexec jump' and where it's used. Before that, let's simply remove the lock operation inside CONFIG_KEXEC_JUMP ifdeffery scope. Fixes: 55f2503c3b69 ("PM / reboot: Eliminate race between reboot and suspend") Signed-off-by: Baoquan He Reported-by: Dan Carpenter Reviewed-by: Pingfan Liu Cc: 4.19+ # 4.19+ Signed-off-by: Rafael J. Wysocki --- kernel/kexec_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 4f8efc278aa7..aa919585c24b 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1134,7 +1134,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { @@ -1197,7 +1196,6 @@ int kernel_kexec(void) thaw_processes(); Restore_console: pm_restore_console(); - unlock_system_sleep(); } #endif From 2f96e40212d435b328459ba6b3956395eed8fa9f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jan 2021 16:26:17 -0500 Subject: [PATCH 12/50] btrfs: fix possible free space tree corruption with online conversion While running btrfs/011 in a loop I would often ASSERT() while trying to add a new free space entry that already existed, or get an EEXIST while adding a new block to the extent tree, which is another indication of double allocation. This occurs because when we do the free space tree population, we create the new root and then populate the tree and commit the transaction. The problem is when you create a new root, the root node and commit root node are the same. During this initial transaction commit we will run all of the delayed refs that were paused during the free space tree generation, and thus begin to cache block groups. While caching block groups the caching thread will be reading from the main root for the free space tree, so as we make allocations we'll be changing the free space tree, which can cause us to add the same range twice which results in either the ASSERT(ret != -EEXIST); in __btrfs_add_free_space, or in a variety of different errors when running delayed refs because of a double allocation. Fix this by marking the fs_info as unsafe to load the free space tree, and fall back on the old slow method. We could be smarter than this, for example caching the block group while we're populating the free space tree, but since this is a serious problem I've opted for the simplest solution. CC: stable@vger.kernel.org # 4.9+ Fixes: a5ed91828518 ("Btrfs: implement the free space B-tree") Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 10 +++++++++- fs/btrfs/ctree.h | 3 +++ fs/btrfs/free-space-tree.c | 10 +++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0886e81e5540..48ebc106a606 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -673,7 +673,15 @@ static noinline void caching_thread(struct btrfs_work *work) wake_up(&caching_ctl->wait); } - if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + /* + * If we are in the transaction that populated the free space tree we + * can't actually cache from the free space tree as our commit root and + * real root are the same, so we could change the contents of the blocks + * while caching. Instead do the slow caching in this case, and after + * the transaction has committed we will be safe. + */ + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) ret = load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0225c5208f44..47ca8edafb5e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -564,6 +564,9 @@ enum { /* Indicate that we need to cleanup space cache v1 */ BTRFS_FS_CLEANUP_SPACE_CACHE_V1, + + /* Indicate that we can't trust the free space tree for caching yet */ + BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, }; /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index e33a65bd9a0c..a33bca94d133 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1150,6 +1150,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); free_space_root = btrfs_create_tree(trans, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { @@ -1171,11 +1172,18 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + ret = btrfs_commit_transaction(trans); - return btrfs_commit_transaction(trans); + /* + * Now that we've committed the transaction any reading of our commit + * root will be safe, so we can cache from the free space tree now. + */ + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + return ret; abort: clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; From c41ec4529d3448df8998950d7bada757a1b321cf Mon Sep 17 00:00:00 2001 From: Su Yue Date: Thu, 21 Jan 2021 19:39:10 +0800 Subject: [PATCH 13/50] btrfs: fix lockdep warning due to seqcount_mutex on 32bit arch This effectively reverts commit d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t"). While running fstests on 32 bits test box, many tests failed because of warnings in dmesg. One of those warnings (btrfs/003): [66.441317] WARNING: CPU: 6 PID: 9251 at include/linux/seqlock.h:279 btrfs_remove_chunk+0x58b/0x7b0 [btrfs] [66.441446] CPU: 6 PID: 9251 Comm: btrfs Tainted: G O 5.11.0-rc4-custom+ #5 [66.441449] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.441451] EIP: btrfs_remove_chunk+0x58b/0x7b0 [btrfs] [66.441472] EAX: 00000000 EBX: 00000001 ECX: c576070c EDX: c6b15803 [66.441475] ESI: 10000000 EDI: 00000000 EBP: c56fbcfc ESP: c56fbc70 [66.441477] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [66.441481] CR0: 80050033 CR2: 05c8da20 CR3: 04b20000 CR4: 00350ed0 [66.441485] Call Trace: [66.441510] btrfs_relocate_chunk+0xb1/0x100 [btrfs] [66.441529] ? btrfs_lookup_block_group+0x17/0x20 [btrfs] [66.441562] btrfs_balance+0x8ed/0x13b0 [btrfs] [66.441586] ? btrfs_ioctl_balance+0x333/0x3c0 [btrfs] [66.441619] ? __this_cpu_preempt_check+0xf/0x11 [66.441643] btrfs_ioctl_balance+0x333/0x3c0 [btrfs] [66.441664] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441683] btrfs_ioctl+0x414/0x2ae0 [btrfs] [66.441700] ? __lock_acquire+0x35f/0x2650 [66.441717] ? lockdep_hardirqs_on+0x87/0x120 [66.441720] ? lockdep_hardirqs_on_prepare+0xd0/0x1e0 [66.441724] ? call_rcu+0x2d3/0x530 [66.441731] ? __might_fault+0x41/0x90 [66.441736] ? kvm_sched_clock_read+0x15/0x50 [66.441740] ? sched_clock+0x8/0x10 [66.441745] ? sched_clock_cpu+0x13/0x180 [66.441750] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441750] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441768] __ia32_sys_ioctl+0x165/0x8a0 [66.441773] ? __this_cpu_preempt_check+0xf/0x11 [66.441785] ? __might_fault+0x89/0x90 [66.441791] __do_fast_syscall_32+0x54/0x80 [66.441796] do_fast_syscall_32+0x32/0x70 [66.441801] do_SYSENTER_32+0x15/0x20 [66.441805] entry_SYSENTER_32+0x9f/0xf2 [66.441808] EIP: 0xab7b5549 [66.441814] EAX: ffffffda EBX: 00000003 ECX: c4009420 EDX: bfa91f5c [66.441816] ESI: 00000003 EDI: 00000001 EBP: 00000000 ESP: bfa91e98 [66.441818] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000292 [66.441833] irq event stamp: 42579 [66.441835] hardirqs last enabled at (42585): [] console_unlock+0x495/0x590 [66.441838] hardirqs last disabled at (42590): [] console_unlock+0x405/0x590 [66.441840] softirqs last enabled at (41698): [] call_on_stack+0x1c/0x60 [66.441843] softirqs last disabled at (41681): [] call_on_stack+0x1c/0x60 ======================================================================== btrfs_remove_chunk+0x58b/0x7b0: __seqprop_mutex_assert at linux/./include/linux/seqlock.h:279 (inlined by) btrfs_device_set_bytes_used at linux/fs/btrfs/volumes.h:212 (inlined by) btrfs_remove_chunk at linux/fs/btrfs/volumes.c:2994 ======================================================================== The warning is produced by lockdep_assert_held() in __seqprop_mutex_assert() if CONFIG_LOCKDEP is enabled. And "olumes.c:2994 is btrfs_device_set_bytes_used() with mutex lock fs_info->chunk_mutex held already. After adding some debug prints, the cause was found that many __alloc_device() are called with NULL @fs_info (during scanning ioctl). Inside the function, btrfs_device_data_ordered_init() is expanded to seqcount_mutex_init(). In this scenario, its second parameter info->chunk_mutex is &NULL->chunk_mutex which equals to offsetof(struct btrfs_fs_info, chunk_mutex) unexpectedly. Thus, seqcount_mutex_init() is called in wrong way. And later btrfs_device_get/set helpers trigger lockdep warnings. The device and filesystem object lifetimes are different and we'd have to synchronize initialization of the btrfs_device::data_seqcount with the fs_info, possibly using some additional synchronization. It would still not prevent concurrent access to the seqcount lock when it's used for read and initialization. Commit d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t") does not mention a particular problem being fixed so revert should not cause any harm and we'll get the lockdep warning fixed. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210139 Reported-by: Erhard F Fixes: d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t") CC: stable@vger.kernel.org # 5.10 CC: Davidlohr Bueso Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0c7f4f6237e8..b900cc7849b6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -433,7 +433,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - btrfs_device_data_ordered_init(dev, fs_info); + btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); extent_io_tree_init(fs_info, &dev->alloc_state, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1997a4649a66..c43663d9c22e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -39,10 +39,10 @@ struct btrfs_io_geometry { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include #define __BTRFS_NEED_DEVICE_DATA_ORDERED -#define btrfs_device_data_ordered_init(device, info) \ - seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex) +#define btrfs_device_data_ordered_init(device) \ + seqcount_init(&device->data_seqcount) #else -#define btrfs_device_data_ordered_init(device, info) do { } while (0) +#define btrfs_device_data_ordered_init(device) do { } while (0) #endif #define BTRFS_DEV_STATE_WRITEABLE (0) @@ -76,8 +76,7 @@ struct btrfs_device { blk_status_t last_flush_error; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED - /* A seqcount_t with associated chunk_mutex (for lockdep) */ - seqcount_mutex_t data_seqcount; + seqcount_t data_seqcount; #endif /* the internal btrfs device id */ @@ -168,9 +167,11 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ + preempt_disable(); \ write_seqcount_begin(&dev->data_seqcount); \ dev->name = size; \ write_seqcount_end(&dev->data_seqcount); \ + preempt_enable(); \ } #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ From 9ad6d91f056b99dbe59a262810cb342519ea8d39 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 22 Jan 2021 19:07:45 +0000 Subject: [PATCH 14/50] btrfs: fix log replay failure due to race with space cache rebuild After a sudden power failure we may end up with a space cache on disk that is not valid and needs to be rebuilt from scratch. If that happens, during log replay when we attempt to pin an extent buffer from a log tree, at btrfs_pin_extent_for_log_replay(), we do not wait for the space cache to be rebuilt through the call to: btrfs_cache_block_group(cache, 1); That is because that only waits for the task (work queue job) that loads the space cache to change the cache state from BTRFS_CACHE_FAST to any other value. That is ok when the space cache on disk exists and is valid, but when the cache is not valid and needs to be rebuilt, it ends up returning as soon as the cache state changes to BTRFS_CACHE_STARTED (done at caching_thread()). So this means that we can end up trying to unpin a range which is not yet marked as free in the block group. This results in the call to btrfs_remove_free_space() to return -EINVAL to btrfs_pin_extent_for_log_replay(), which in turn makes the log replay fail as well as mounting the filesystem. More specifically the -EINVAL comes from free_space_cache.c:remove_from_bitmap(), because the requested range is not marked as free space (ones in the bitmap), we have the following condition triggered: static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, (...) if (ret < 0 || search_start != *offset) return -EINVAL; (...) It's the "search_start != *offset" that results in the condition being evaluated to true. When this happens we got the following in dmesg/syslog: [72383.415114] BTRFS: device fsid 32b95b69-0ea9-496a-9f02-3f5a56dc9322 devid 1 transid 1432 /dev/sdb scanned by mount (3816007) [72383.417837] BTRFS info (device sdb): disk space caching is enabled [72383.418536] BTRFS info (device sdb): has skinny extents [72383.423846] BTRFS info (device sdb): start tree-log replay [72383.426416] BTRFS warning (device sdb): block group 30408704 has wrong amount of free space [72383.427686] BTRFS warning (device sdb): failed to load free space cache for block group 30408704, rebuilding it now [72383.454291] BTRFS: error (device sdb) in btrfs_recover_log_trees:6203: errno=-22 unknown (Failed to pin buffers while recovering log root tree.) [72383.456725] BTRFS: error (device sdb) in btrfs_replay_log:2253: errno=-22 unknown (Failed to recover log tree) [72383.460241] BTRFS error (device sdb): open_ctree failed We also mark the range for the extent buffer in the excluded extents io tree. That is fine when the space cache is valid on disk and we can load it, in which case it causes no problems. However, for the case where we need to rebuild the space cache, because it is either invalid or it is missing, having the extent buffer range marked in the excluded extents io tree leads to a -EINVAL failure from the call to btrfs_remove_free_space(), resulting in the log replay and mount to fail. This is because by having the range marked in the excluded extents io tree, the caching thread ends up never adding the range of the extent buffer as free space in the block group since the calls to add_new_free_space(), called from load_extent_tree_free(), filter out any ranges that are marked as excluded extents. So fix this by making sure that during log replay we wait for the caching task to finish completely when we need to rebuild a space cache, and also drop the need to mark the extent buffer range in the excluded extents io tree, as well as clearing ranges from that tree at btrfs_finish_extent_commit(). This started to happen with some frequency on large filesystems having block groups with a lot of fragmentation since the recent commit e747853cae3ae3 ("btrfs: load free space cache asynchronously"), but in fact the issue has been there for years, it was just much less likely to happen. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 61 +++++++++++++----------------------------- 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 30b1a630dc2f..0c335dae5af7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2602,8 +2602,6 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; int ret; - btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes); - cache = btrfs_lookup_block_group(trans->fs_info, bytenr); if (!cache) return -EINVAL; @@ -2615,11 +2613,19 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, * the pinned extents. */ btrfs_cache_block_group(cache, 1); + /* + * Make sure we wait until the cache is completely built in case it is + * missing or is invalid and therefore needs to be rebuilt. + */ + ret = btrfs_wait_block_group_cache_done(cache); + if (ret) + goto out; pin_down_extent(trans, cache, bytenr, num_bytes, 0); /* remove us from the free space cache (if we're there at all) */ ret = btrfs_remove_free_space(cache, bytenr, num_bytes); +out: btrfs_put_block_group(cache); return ret; } @@ -2629,50 +2635,22 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, { int ret; struct btrfs_block_group *block_group; - struct btrfs_caching_control *caching_ctl; block_group = btrfs_lookup_block_group(fs_info, start); if (!block_group) return -EINVAL; - btrfs_cache_block_group(block_group, 0); - caching_ctl = btrfs_get_caching_control(block_group); + btrfs_cache_block_group(block_group, 1); + /* + * Make sure we wait until the cache is completely built in case it is + * missing or is invalid and therefore needs to be rebuilt. + */ + ret = btrfs_wait_block_group_cache_done(block_group); + if (ret) + goto out; - if (!caching_ctl) { - /* Logic error */ - BUG_ON(!btrfs_block_group_done(block_group)); - ret = btrfs_remove_free_space(block_group, start, num_bytes); - } else { - /* - * We must wait for v1 caching to finish, otherwise we may not - * remove our space. - */ - btrfs_wait_space_cache_v1_finished(block_group, caching_ctl); - mutex_lock(&caching_ctl->mutex); - - if (start >= caching_ctl->progress) { - ret = btrfs_add_excluded_extent(fs_info, start, - num_bytes); - } else if (start + num_bytes <= caching_ctl->progress) { - ret = btrfs_remove_free_space(block_group, - start, num_bytes); - } else { - num_bytes = caching_ctl->progress - start; - ret = btrfs_remove_free_space(block_group, - start, num_bytes); - if (ret) - goto out_lock; - - num_bytes = (start + num_bytes) - - caching_ctl->progress; - start = caching_ctl->progress; - ret = btrfs_add_excluded_extent(fs_info, start, - num_bytes); - } -out_lock: - mutex_unlock(&caching_ctl->mutex); - btrfs_put_caching_control(caching_ctl); - } + ret = btrfs_remove_free_space(block_group, start, num_bytes); +out: btrfs_put_block_group(block_group); return ret; } @@ -2863,9 +2841,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) - clear_extent_bits(&fs_info->excluded_extents, start, - end, EXTENT_UPTODATE); if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, From fef9c8d28e28a808274a18fbd8cc2685817fd62a Mon Sep 17 00:00:00 2001 From: Laurent Badel Date: Fri, 22 Jan 2021 17:19:41 +0100 Subject: [PATCH 15/50] PM: hibernate: flush swap writer after marking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flush the swap writer after, not before, marking the files, to ensure the signature is properly written. Fixes: 6f612af57821 ("PM / Hibernate: Group swap ops") Signed-off-by: Laurent Badel Cc: All applicable Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index c73f2e295167..72e33054a2e1 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -497,10 +497,10 @@ static int swap_writer_finish(struct swap_map_handle *handle, unsigned int flags, int error) { if (!error) { - flush_swap_writer(handle); pr_info("S"); error = mark_swapfiles(handle, flags); pr_cont("|\n"); + flush_swap_writer(handle); } if (error) From b98e762e3d71e893b221f871825dc64694cfb258 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 12:21:02 -0500 Subject: [PATCH 16/50] nbd: freeze the queue while we're adding connections When setting up a device, we can krealloc the config->socks array to add new sockets to the configuration. However if we happen to get a IO request in at this point even though we aren't setup we could hit a UAF, as we deref config->socks without any locking, assuming that the configuration was setup already and that ->socks is safe to access it as we have a reference on the configuration. But there's nothing really preventing IO from occurring at this point of the device setup, we don't want to incur the overhead of a lock to access ->socks when it will never change while the device is running. To fix this UAF scenario simply freeze the queue if we are adding sockets. This will protect us from this particular case without adding any additional overhead for the normal running case. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6727358e147d..e6ea5d344f87 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1022,6 +1022,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, if (!sock) return err; + /* + * We need to make sure we don't get any errant requests while we're + * reallocating the ->socks array. + */ + blk_mq_freeze_queue(nbd->disk->queue); + if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) nbd->task_setup = current; @@ -1060,10 +1066,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->cookie = 0; socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); + blk_mq_unfreeze_queue(nbd->disk->queue); return 0; put_socket: + blk_mq_unfreeze_queue(nbd->disk->queue); sockfd_put(sock); return err; } From ef99a60ffd9b918354e038bc5e61f007ff7e901d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 17 Jan 2021 09:30:15 +0000 Subject: [PATCH 17/50] drm/i915/gt: Clear CACHE_MODE prior to clearing residuals Since we do a bare context switch with no restore, the clear residual kernel runs on dirty state, and we must be careful to avoid executing with bad state from context registers inherited from a malicious client. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2955 Fixes: 09aa9e45863e ("drm/i915/gt: Restore clear-residual mitigations for Ivybridge, Baytrail") Testcase: igt/gem_ctx_isolation # ivb,vlv Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Akeem G Abodunrin Reviewed-by: Akeem G Abodunrin Link: https://patchwork.freedesktop.org/patch/msgid/20210117093015.29143-1-chris@chris-wilson.co.uk (cherry picked from commit ace44e13e577c2ae59980e9a6ff5ca253b1cf831) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index 94465374ca2f..e961ad6a3129 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -390,6 +390,16 @@ static void emit_batch(struct i915_vma * const vma, &cb_kernel_ivb, desc_count); + /* Reset inherited context registers */ + gen7_emit_pipeline_invalidate(&cmds); + batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); + batch_add(&cmds, 0xffff0000); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); + batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + gen7_emit_pipeline_flush(&cmds); + + /* Switch to the media pipeline and our base address */ gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); @@ -399,9 +409,11 @@ static void emit_batch(struct i915_vma * const vma, gen7_emit_state_base_address(&cmds, descriptors); gen7_emit_pipeline_invalidate(&cmds); + /* Set the clear-residual kernel state */ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); + /* Execute the kernel on all HW threads */ for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); From a2a5f5628e5494ca9353f761f7fe783dfa82fb9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Dec 2020 22:35:11 +0200 Subject: [PATCH 18/50] drm/i915: Fix ICL MG PHY vswing handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MH PHY vswing table does have all the entries these days. Get rid of the old hacks in the code which claim otherwise. This hack was totally bogus anyway. The correct way to handle the lack of those two entries would have been to declare our max vswing and pre-emph to both be level 2. Cc: José Roberto de Souza Cc: Clinton Taylor Fixes: 9f7ffa297978 ("drm/i915/tc/icl: Update TC vswing tables") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201207203512.1718-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: José Roberto de Souza (cherry picked from commit 5ec346476e795089b7dac8ab9dcee30c8d80ad84) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index d5ace48b1ace..bf17365857ca 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2755,12 +2755,11 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 val; ddi_translations = icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - /* The table does not have values for level 3 and level 9. */ - if (level >= n_entries || level == 3 || level == 9) { + if (level >= n_entries) { drm_dbg_kms(&dev_priv->drm, "DDI translation not found for level %d. Using %d instead.", - level, n_entries - 2); - level = n_entries - 2; + level, n_entries - 1); + level = n_entries - 1; } /* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */ From 8f6d08c9af284d74276da6681348e4673f13caea Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 21 Jan 2021 16:19:35 +0000 Subject: [PATCH 19/50] drm/i915: Check for all subplatform bits Current code is checking only 2 bits in the subplatform, but actually 3 bits are allocated for the field. Check all 3 bits. Fixes: 805446c8347c ("drm/i915: Introduce concept of a sub-platform") Cc: Tvrtko Ursulin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210121161936.746591-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 27b695ee1af9bb36605e67055874ec081306ac28) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 632c713227dc..c6964f82a1bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1346,7 +1346,7 @@ intel_subplatform(const struct intel_runtime_info *info, enum intel_platform p) { const unsigned int pi = __platform_mask_index(info, p); - return info->platform_mask[pi] & INTEL_SUBPLATFORM_BITS; + return info->platform_mask[pi] & ((1 << INTEL_SUBPLATFORM_BITS) - 1); } static __always_inline bool From 3d480fe1befa0ef434f5c25199e7d45c26870555 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 17:56:40 -0800 Subject: [PATCH 20/50] drm/i915/selftest: Fix potential memory leak Object out is not released on path that no VMA instance found. The root cause is jumping to an unexpected label on the error path. Fixes: a47e788c2310 ("drm/i915/selftests: Exercise CS TLB invalidation") Signed-off-by: Pan Bian Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210122015640.16002-1-bianpan2016@163.com (cherry picked from commit 2b015017d5cb01477a79ca184ac25c247d664568) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c53a222e3dec..713770fb2b92 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1880,7 +1880,7 @@ static int igt_cs_tlb(void *arg) vma = i915_vma_instance(out, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto out_put_batch; + goto out_put_out; } err = i915_vma_pin(vma, 0, 0, From f6e98a1809faa02f40e0d089d6cfc1aa372a34c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jan 2021 23:28:07 +0000 Subject: [PATCH 21/50] drm/i915: Always flush the active worker before returning from the wait The first thing the active retirement worker does is decrement the i915_active count. The first thing we do during i915_active_wait is try to increment the i915_active count, but only if already active [non-zero]. The wait may see that the retirement is already started and so marked the i915_active as idle, and skip waiting for the retirement handler. However, the caller of i915_active_wait may immediately free the i915_active upon returning (e.g. i915_vma_destroy) so we must not return before the concurrent access from the worker is completed. We must always flush the worker. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2473 Fixes: 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210121232807.16618-1-chris@chris-wilson.co.uk (cherry picked from commit 977a372e972cb42799746c284035a33c64ebace9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 10a865f3dc09..9ed19b8bca60 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -631,24 +631,26 @@ static int flush_lazy_signals(struct i915_active *ref) int __i915_active_wait(struct i915_active *ref, int state) { - int err; - might_sleep(); - if (!i915_active_acquire_if_busy(ref)) - return 0; - /* Any fence added after the wait begins will not be auto-signaled */ - err = flush_lazy_signals(ref); - i915_active_release(ref); - if (err) - return err; + if (i915_active_acquire_if_busy(ref)) { + int err; - if (!i915_active_is_idle(ref) && - ___wait_var_event(ref, i915_active_is_idle(ref), - state, 0, 0, schedule())) - return -EINTR; + err = flush_lazy_signals(ref); + i915_active_release(ref); + if (err) + return err; + if (___wait_var_event(ref, i915_active_is_idle(ref), + state, 0, 0, schedule())) + return -EINTR; + } + + /* + * After the wait is complete, the caller may free the active. + * We have to flush any concurrent retirement before returning. + */ flush_work(&ref->work); return 0; } From 489140b5ba2e7cc4b853c29e0591895ddb462a82 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Jan 2021 12:50:33 +0000 Subject: [PATCH 22/50] drm/i915/gt: Always try to reserve GGTT address 0x0 Since writing to address 0 is a very common mistake, let's try to avoid putting anything sensitive there. References: https://gitlab.freedesktop.org/drm/intel/-/issues/2989 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210125125033.23656-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 56b429cc584c6ed8b895d8d8540959655db1ff73) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 47 +++++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index cf94525be2c1..db8c66dde655 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -526,16 +526,39 @@ static int init_ggtt(struct i915_ggtt *ggtt) mutex_init(&ggtt->error_mutex); if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, - &ggtt->error_capture, - PAGE_SIZE, 0, - I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; + /* + * Reserve a mappable slot for our lockless error capture. + * + * We strongly prefer taking address 0x0 in order to protect + * other critical buffers against accidental overwrites, + * as writing to address 0 is a very common mistake. + * + * Since 0 may already be in use by the system (e.g. the BIOS + * framebuffer), we let the reservation fail quietly and hope + * 0 remains reserved always. + * + * If we fail to reserve 0, and then fail to find any space + * for an error-capture, remain silent. We can afford not + * to reserve an error_capture node as we have fallback + * paths, and we trust that 0 will remain reserved. However, + * the only likely reason for failure to insert is a driver + * bug, which we expect to cause other failures... + */ + ggtt->error_capture.size = I915_GTT_PAGE_SIZE; + ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; + if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) + drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + ggtt->error_capture.size, 0, + ggtt->error_capture.color, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); } + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_dbg(&ggtt->vm.i915->drm, + "Reserved GGTT:[%llx, %llx] for use by error capture\n", + ggtt->error_capture.start, + ggtt->error_capture.start + ggtt->error_capture.size); /* * The upper portion of the GuC address space has a sizeable hole @@ -548,9 +571,9 @@ static int init_ggtt(struct i915_ggtt *ggtt) /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - drm_dbg_kms(&ggtt->vm.i915->drm, - "clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); + drm_dbg(&ggtt->vm.i915->drm, + "clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); ggtt->vm.clear_range(&ggtt->vm, hole_start, hole_end - hole_start); } From a1bb3cd58913338e1b627ea6b8c03c2ae82d293f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 15:28:26 +0000 Subject: [PATCH 23/50] io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE If the tctx inflight number haven't changed because of cancellation, __io_uring_task_cancel() will continue leaving the task in TASK_UNINTERRUPTIBLE state, that's not expected by __io_uring_files_cancel(). Ensure we always call finish_wait() before retrying. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2166c469789d..09aada153a71 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9124,16 +9124,15 @@ void __io_uring_task_cancel(void) prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); /* - * If we've seen completions, retry. This avoids a race where - * a completion comes in before we did prepare_to_wait(). + * If we've seen completions, retry without waiting. This + * avoids a race where a completion comes in before we did + * prepare_to_wait(). */ - if (inflight != tctx_inflight(tctx)) - continue; - schedule(); + if (inflight == tctx_inflight(tctx)) + schedule(); finish_wait(&tctx->wait, &wait); } while (1); - finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); io_uring_remove_task_files(tctx); From ca70f00bed6cb255b7a9b91aa18a2717c9217f70 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 15:28:27 +0000 Subject: [PATCH 24/50] io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE do not call blocking ops when !TASK_RUNNING; state=2 set at [<00000000ced9dbfc>] prepare_to_wait+0x1f4/0x3b0 kernel/sched/wait.c:262 WARNING: CPU: 1 PID: 19888 at kernel/sched/core.c:7853 __might_sleep+0xed/0x100 kernel/sched/core.c:7848 RIP: 0010:__might_sleep+0xed/0x100 kernel/sched/core.c:7848 Call Trace: __mutex_lock_common+0xc4/0x2ef0 kernel/locking/mutex.c:935 __mutex_lock kernel/locking/mutex.c:1103 [inline] mutex_lock_nested+0x1a/0x20 kernel/locking/mutex.c:1118 io_wq_submit_work+0x39a/0x720 fs/io_uring.c:6411 io_run_cancel fs/io-wq.c:856 [inline] io_wqe_cancel_pending_work fs/io-wq.c:990 [inline] io_wq_cancel_cb+0x614/0xcb0 fs/io-wq.c:1027 io_uring_cancel_files fs/io_uring.c:8874 [inline] io_uring_cancel_task_requests fs/io_uring.c:8952 [inline] __io_uring_files_cancel+0x115d/0x19e0 fs/io_uring.c:9038 io_uring_files_cancel include/linux/io_uring.h:51 [inline] do_exit+0x2e6/0x2490 kernel/exit.c:780 do_group_exit+0x168/0x2d0 kernel/exit.c:922 get_signal+0x16b5/0x2030 kernel/signal.c:2770 arch_do_signal_or_restart+0x8e/0x6a0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0xac/0x1e0 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x48/0x190 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Rewrite io_uring_cancel_files() to mimic __io_uring_task_cancel()'s counting scheme, so it does all the heavy work before setting TASK_UNINTERRUPTIBLE. Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+f655445043a26a7cfab8@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov [axboe: fix inverted task check] Signed-off-by: Jens Axboe --- fs/io_uring.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 09aada153a71..bb0270eeb8cb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8873,30 +8873,31 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } +static int io_uring_count_inflight(struct io_ring_ctx *ctx, + struct task_struct *task, + struct files_struct *files) +{ + struct io_kiocb *req; + int cnt = 0; + + spin_lock_irq(&ctx->inflight_lock); + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) + cnt += io_match_task(req, task, files); + spin_unlock_irq(&ctx->inflight_lock); + return cnt; +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { struct io_task_cancel cancel = { .task = task, .files = files }; - struct io_kiocb *req; DEFINE_WAIT(wait); - bool found = false; + int inflight; - spin_lock_irq(&ctx->inflight_lock); - list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (!io_match_task(req, task, files)) - continue; - found = true; - break; - } - if (found) - prepare_to_wait(&task->io_uring->wait, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&ctx->inflight_lock); - - /* We need to keep going until we don't find a matching req */ - if (!found) + inflight = io_uring_count_inflight(ctx, task, files); + if (!inflight) break; io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); @@ -8905,7 +8906,11 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_cqring_overflow_flush(ctx, true, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); - schedule(); + + prepare_to_wait(&task->io_uring->wait, &wait, + TASK_UNINTERRUPTIBLE); + if (inflight == io_uring_count_inflight(ctx, task, files)) + schedule(); finish_wait(&task->io_uring->wait, &wait); } } From 519ea6f1c82fcdc9842908155ae379de47818778 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 26 Jan 2021 13:40:56 +0000 Subject: [PATCH 25/50] arm64: Fix kernel address detection of __is_lm_address() Currently, the __is_lm_address() check just masks out the top 12 bits of the address, but if they are 0, it still yields a true result. This has as a side effect that virt_addr_valid() returns true even for invalid virtual addresses (e.g. 0x0). Fix the detection checking that it's actually a kernel address starting at PAGE_OFFSET. Fixes: 68dd8ef32162 ("arm64: memory: Fix virt_addr_valid() using __is_lm_address()") Cc: # 5.4.x Cc: Will Deacon Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210126134056.45747-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 18fce223b67b..99d7e1494aaa 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -247,9 +247,11 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts at the bottom of the virtual address space. + * Check whether an arbitrary address is within the linear map, which + * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the + * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) +#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) From 907d1df30a51cc1a1d25414a00cde0494b83df7b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 23:35:10 +0000 Subject: [PATCH 26/50] io_uring: fix wqe->lock/completion_lock deadlock Joseph reports following deadlock: CPU0: ... io_kill_linked_timeout // &ctx->completion_lock io_commit_cqring __io_queue_deferred __io_queue_async_work io_wq_enqueue io_wqe_enqueue // &wqe->lock CPU1: ... __io_uring_files_cancel io_wq_cancel_cb io_wqe_cancel_pending_work // &wqe->lock io_cancel_task_cb // &ctx->completion_lock Only __io_queue_deferred() calls queue_async_work() while holding ctx->completion_lock, enqueue drained requests via io_req_task_queue() instead. Cc: stable@vger.kernel.org # 5.9+ Reported-by: Joseph Qi Tested-by: Joseph Qi Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bb0270eeb8cb..c218deaf73a9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1026,6 +1026,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, struct iov_iter *iter, bool force); static void io_req_drop_files(struct io_kiocb *req); +static void io_req_task_queue(struct io_kiocb *req); static struct kmem_cache *req_cachep; @@ -1634,18 +1635,11 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) do { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); - struct io_kiocb *link; if (req_need_defer(de->req, de->seq)) break; list_del_init(&de->list); - /* punt-init is done before queueing for defer */ - link = __io_queue_async_work(de->req); - if (link) { - __io_queue_linked_timeout(link); - /* drop submission reference */ - io_put_req_deferred(link, 1); - } + io_req_task_queue(de->req); kfree(de); } while (!list_empty(&ctx->defer_list)); } From a1df829ead5877d4a1061e976a50e2e665a16f24 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 21 Jan 2021 17:24:19 -0800 Subject: [PATCH 27/50] ACPI/IORT: Do not blindly trust DMA masks from firmware Address issue observed on real world system with suboptimal IORT table where DMA masks of PCI devices would get set to 0 as result. iort_dma_setup() would query the root complex'/named component IORT entry for a DMA mask, and use that over the one the device has been configured with earlier. Ideally we want to use the minimum mask of what the IORT contains for the root complex and what the device was configured with. Fixes: 5ac65e8c8941 ("ACPI/IORT: Support address size limit for root complexes") Signed-off-by: Moritz Fischer Reviewed-by: Robin Murphy Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20210122012419.95010-1-mdf@kernel.org Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/iort.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index d4eac6d7e9fb..2494138a6905 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1107,6 +1107,11 @@ static int nc_dma_get_range(struct device *dev, u64 *size) ncomp = (struct acpi_iort_named_component *)node->node_data; + if (!ncomp->memory_address_limit) { + pr_warn(FW_BUG "Named component missing memory address limit\n"); + return -EINVAL; + } + *size = ncomp->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1126,6 +1131,11 @@ static int rc_dma_get_range(struct device *dev, u64 *size) rc = (struct acpi_iort_root_complex *)node->node_data; + if (!rc->memory_address_limit) { + pr_warn(FW_BUG "Root complex missing memory address limit\n"); + return -EINVAL; + } + *size = rc->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1173,8 +1183,8 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) end = dmaaddr + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; - dev->coherent_dma_mask = mask; - *dev->dma_mask = mask; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); } *dma_addr = dmaaddr; From 8dc932d3e8afb65e12eba7495f046c83884c49bf Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 26 Jan 2021 21:59:07 +0200 Subject: [PATCH 28/50] Revert "block: simplify set_init_blocksize" to regain lost performance The cited commit introduced a serious regression with SATA write speed, as found by bisecting. This patch reverts this commit, which restores write speed back to the values observed before this commit. The performance tests were done on a Helios4 NAS (2nd batch) with 4 HDDs (WD8003FFBX) using dd (bs=1M count=2000). "Direct" is a test with a single HDD, the rest are different RAID levels built over the first partitions of 4 HDDs. Test results are in MB/s, R is read, W is write. | Direct | RAID0 | RAID10 f2 | RAID10 n2 | RAID6 ----------------+--------+-------+-----------+-----------+-------- 9011495c9466 | R:256 | R:313 | R:276 | R:313 | R:323 (before faulty) | W:254 | W:253 | W:195 | W:204 | W:117 ----------------+--------+-------+-----------+-----------+-------- 5ff9f19231a0 | R:257 | R:398 | R:312 | R:344 | R:391 (faulty commit) | W:154 | W:122 | W:67.7 | W:66.6 | W:67.2 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:256 | R:401 | R:312 | R:356 | R:375 unpatched | W:149 | W:123 | W:64 | W:64.1 | W:61.5 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:255 | R:396 | R:312 | R:340 | R:393 patched | W:247 | W:274 | W:220 | W:225 | W:121 Applying this patch doesn't hurt read performance, while improves the write speed by 1.5x - 3.5x (more impact on RAID tests). The write speed is restored back to the state before the faulty commit, and even a bit higher in RAID tests (which aren't HDD-bound on this device) - that is likely related to other optimizations done between the faulty commit and 5.10.10 which also improved the read speed. Signed-off-by: Maxim Mikityanskiy Fixes: 5ff9f19231a0 ("block: simplify set_init_blocksize") Cc: Christoph Hellwig Cc: Jens Axboe Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b8963e228a1..235b5042672e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -130,7 +130,15 @@ EXPORT_SYMBOL(truncate_bdev_range); static void set_init_blocksize(struct block_device *bdev) { - bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + unsigned int bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_inode->i_blkbits = blksize_bits(bsize); } int set_blocksize(struct block_device *bdev, int size) From 6195ba09822c87cad09189bbf550d0fbe714687a Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 27 Jan 2021 15:14:09 +0800 Subject: [PATCH 29/50] io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE Abaci reported the follow warning: [ 27.073425] do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait_exclusive+0x3a/0xc0 [ 27.075805] WARNING: CPU: 0 PID: 951 at kernel/sched/core.c:7853 __might_sleep+0x80/0xa0 [ 27.077604] Modules linked in: [ 27.078379] CPU: 0 PID: 951 Comm: a.out Not tainted 5.11.0-rc3+ #1 [ 27.079637] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 27.080852] RIP: 0010:__might_sleep+0x80/0xa0 [ 27.081835] Code: 65 48 8b 04 25 80 71 01 00 48 8b 90 c0 15 00 00 48 8b 70 18 48 c7 c7 08 39 95 82 c6 05 f9 5f de 08 01 48 89 d1 e8 00 c6 fa ff 0b eb bf 41 0f b6 f5 48 c7 c7 40 23 c9 82 e8 f3 48 ec 00 eb a7 [ 27.084521] RSP: 0018:ffffc90000fe3ce8 EFLAGS: 00010286 [ 27.085350] RAX: 0000000000000000 RBX: ffffffff82956083 RCX: 0000000000000000 [ 27.086348] RDX: ffff8881057a0000 RSI: ffffffff8118cc9e RDI: ffff88813bc28570 [ 27.087598] RBP: 00000000000003a7 R08: 0000000000000001 R09: 0000000000000001 [ 27.088819] R10: ffffc90000fe3e00 R11: 00000000fffef9f0 R12: 0000000000000000 [ 27.089819] R13: 0000000000000000 R14: ffff88810576eb80 R15: ffff88810576e800 [ 27.091058] FS: 00007f7b144cf740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 [ 27.092775] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.093796] CR2: 00000000022da7b8 CR3: 000000010b928002 CR4: 00000000003706f0 [ 27.094778] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 27.095780] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 27.097011] Call Trace: [ 27.097685] __mutex_lock+0x5d/0xa30 [ 27.098565] ? prepare_to_wait_exclusive+0x71/0xc0 [ 27.099412] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.100441] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.101537] ? _raw_spin_unlock_irqrestore+0x2d/0x40 [ 27.102656] ? trace_hardirqs_on+0x46/0x110 [ 27.103459] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.104317] io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.105113] io_cqring_wait+0x36e/0x4d0 [ 27.105770] ? find_held_lock+0x28/0xb0 [ 27.106370] ? io_uring_remove_task_files+0xa0/0xa0 [ 27.107076] __x64_sys_io_uring_enter+0x4fb/0x640 [ 27.107801] ? rcu_read_lock_sched_held+0x59/0xa0 [ 27.108562] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.109684] ? syscall_enter_from_user_mode+0x26/0x70 [ 27.110731] do_syscall_64+0x2d/0x40 [ 27.111296] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 27.112056] RIP: 0033:0x7f7b13dc8239 [ 27.112663] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 3d 01 f0 ff ff 73 01 c3 48 8b 0d 27 ec 2c 00 f7 d8 64 89 01 48 [ 27.115113] RSP: 002b:00007ffd6d7f5c88 EFLAGS: 00000286 ORIG_RAX: 00000000000001aa [ 27.116562] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b13dc8239 [ 27.117961] RDX: 000000000000478e RSI: 0000000000000000 RDI: 0000000000000003 [ 27.118925] RBP: 00007ffd6d7f5cb0 R08: 0000000020000040 R09: 0000000000000008 [ 27.119773] R10: 0000000000000001 R11: 0000000000000286 R12: 0000000000400480 [ 27.120614] R13: 00007ffd6d7f5d90 R14: 0000000000000000 R15: 0000000000000000 [ 27.121490] irq event stamp: 5635 [ 27.121946] hardirqs last enabled at (5643): [] console_unlock+0x5c4/0x740 [ 27.123476] hardirqs last disabled at (5652): [] console_unlock+0x4e7/0x740 [ 27.125192] softirqs last enabled at (5272): [] __do_softirq+0x3c5/0x5aa [ 27.126430] softirqs last disabled at (5267): [] asm_call_irq_on_stack+0xf/0x20 [ 27.127634] ---[ end trace 289d7e28fa60f928 ]--- This is caused by calling io_cqring_overflow_flush() which may sleep after calling prepare_to_wait_exclusive() which set task state to TASK_INTERRUPTIBLE Reported-by: Abaci Fixes: 6c503150ae33 ("io_uring: patch up IOPOLL overflow_flush sync") Reviewed-by: Pavel Begunkov Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c218deaf73a9..ae388cc52843 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7268,14 +7268,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ ret = io_run_task_work_sig(); - if (ret > 0) + if (ret > 0) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } else if (ret < 0) break; if (io_should_wake(&iowq)) break; - if (test_bit(0, &ctx->cq_check_overflow)) + if (test_bit(0, &ctx->cq_check_overflow)) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } if (uts) { timeout = schedule_timeout(timeout); if (timeout == 0) { From a44092e326d403c7878018ba532369f84d31dbfa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 20 Jan 2021 07:50:02 -0600 Subject: [PATCH 30/50] iommu/amd: Use IVHD EFR for early initialization of IOMMU features IOMMU Extended Feature Register (EFR) is used to communicate the supported features for each IOMMU to the IOMMU driver. This is normally read from the PCI MMIO register offset 0x30, and used by the iommu_feature() helper function. However, there are certain scenarios where the information is needed prior to PCI initialization, and the iommu_feature() function is used prematurely w/o warning. This has caused incorrect initialization of IOMMU. This is the case for the commit 6d39bdee238f ("iommu/amd: Enforce 4k mapping for certain IOMMU data structures") Since, the EFR is also available in the IVHD header, and is available to the driver prior to PCI initialization. Therefore, default to using the IVHD EFR instead. Fixes: 6d39bdee238f ("iommu/amd: Enforce 4k mapping for certain IOMMU data structures") Signed-off-by: Suravee Suthikulpanit Tested-by: Brijesh Singh Reviewed-by: Robert Richter Link: https://lore.kernel.org/r/20210120135002.2682-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 7 ++-- drivers/iommu/amd/amd_iommu_types.h | 4 +++ drivers/iommu/amd/init.c | 56 +++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf71714..b4adab698563 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -84,12 +84,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) { - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); + return !!(iommu->features & mask); } static inline u64 iommu_virt_to_phys(void *vaddr) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 553587827771..1a0495dd5fcb 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -387,6 +387,10 @@ #define IOMMU_CAP_NPCACHE 26 #define IOMMU_CAP_EFR 27 +/* IOMMU IVINFO */ +#define IOMMU_IVINFO_OFFSET 36 +#define IOMMU_IVINFO_EFRSUP BIT(0) + /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6a1f7048dacc..83d8ab2aed9f 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -257,6 +257,8 @@ static void init_device_table_dma(void); static bool amd_iommu_pre_enabled = true; +static u32 amd_iommu_ivinfo __initdata; + bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); @@ -296,6 +298,18 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +/* + * For IVHD type 0x11/0x40, EFR is also available via IVHD. + * Default to IVHD EFR since it is available sooner + * (i.e. before PCI init). + */ +static void __init early_iommu_features_init(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + iommu->features = h->efr_reg; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1577,6 +1591,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + + early_iommu_features_init(iommu, h); + break; default: return -EINVAL; @@ -1770,6 +1787,35 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; +/* + * Note: IVHD 0x11 and 0x40 also contains exact copy + * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. + * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). + */ +static void __init late_iommu_features_init(struct amd_iommu *iommu) +{ + u64 features; + + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return; + + /* read extended feature bits */ + features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + + if (!iommu->features) { + iommu->features = features; + return; + } + + /* + * Sanity check and warn if EFR values from + * IVHD and MMIO conflict. + */ + if (features != iommu->features) + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + features, iommu->features); +} + static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; @@ -1789,8 +1835,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; - /* read extended feature bits */ - iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + late_iommu_features_init(iommu); if (iommu_feature(iommu, FEATURE_GT)) { int glxval; @@ -2607,6 +2652,11 @@ static void __init free_dma_resources(void) free_unity_maps(); } +static void __init ivinfo_init(void *ivrs) +{ + amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -2661,6 +2711,8 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + ivinfo_init(ivrs_base); + amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); From 494b3688bb11a21af12e92a344a1313486693d47 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 19 Jan 2021 12:35:00 +0800 Subject: [PATCH 31/50] iommu/vt-d: Correctly check addr alignment in qi_flush_dev_iotlb_pasid() An incorrect address mask is being used in the qi_flush_dev_iotlb_pasid() to check the address alignment. This leads to a lot of spurious kernel warnings: [ 485.837093] DMAR: Invalidate non-aligned address 7f76f47f9000, order 0 [ 485.837098] DMAR: Invalidate non-aligned address 7f76f47f9000, order 0 [ 492.494145] qi_flush_dev_iotlb_pasid: 5734 callbacks suppressed [ 492.494147] DMAR: Invalidate non-aligned address 7f7728800000, order 11 [ 492.508965] DMAR: Invalidate non-aligned address 7f7728800000, order 11 Fix it by checking the alignment in right way. Fixes: 288d08e780088 ("iommu/vt-d: Handle non-page aligned address") Reported-and-tested-by: Guo Kaijie Signed-off-by: Lu Baolu Cc: Liu Yi L Link: https://lore.kernel.org/r/20210119043500.1539596-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 004feaed3c72..02e7c10a4224 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1496,7 +1496,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, * Max Invs Pending (MIP) is set to 0 for now until we have DIT in * ECAP. */ - if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0)) + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", addr, size_order); From 29b32839725f8c89a41cb6ee054c85f3116ea8b5 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 27 Jan 2021 09:53:17 -0800 Subject: [PATCH 32/50] iommu/vt-d: Do not use flush-queue when caching-mode is on When an Intel IOMMU is virtualized, and a physical device is passed-through to the VM, changes of the virtual IOMMU need to be propagated to the physical IOMMU. The hypervisor therefore needs to monitor PTE mappings in the IOMMU page-tables. Intel specifications provide "caching-mode" capability that a virtual IOMMU uses to report that the IOMMU is virtualized and a TLB flush is needed after mapping to allow the hypervisor to propagate virtual IOMMU mappings to the physical IOMMU. To the best of my knowledge no real physical IOMMU reports "caching-mode" as turned on. Synchronizing the virtual and the physical IOMMU tables is expensive if the hypervisor is unaware which PTEs have changed, as the hypervisor is required to walk all the virtualized tables and look for changes. Consequently, domain flushes are much more expensive than page-specific flushes on virtualized IOMMUs with passthrough devices. The kernel therefore exploited the "caching-mode" indication to avoid domain flushing and use page-specific flushing in virtualized environments. See commit 78d5f0f500e6 ("intel-iommu: Avoid global flushes with caching mode.") This behavior changed after commit 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing"). Now, when batched TLB flushing is used (the default), full TLB domain flushes are performed frequently, requiring the hypervisor to perform expensive synchronization between the virtual TLB and the physical one. Getting batched TLB flushes to use page-specific invalidations again in such circumstances is not easy, since the TLB invalidation scheme assumes that "full" domain TLB flushes are performed for scalability. Disable batched TLB flushes when caching-mode is on, as the performance benefit from using batched TLB invalidations is likely to be much smaller than the overhead of the virtual-to-physical IOMMU page-tables synchronization. Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Nadav Amit Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210127175317.1600473-1-namit@vmware.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f665322a0991..06b00b5363d8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5440,6 +5440,36 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain, return ret; } +static bool domain_use_flush_queue(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool r = true; + + if (intel_iommu_strict) + return false; + + /* + * The flush queue implementation does not perform page-selective + * invalidations that are required for efficient TLB flushes in virtual + * environments. The benefit of batching is likely to be much lower than + * the overhead of synchronizing the virtual and physical IOMMU + * page-tables. + */ + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!cap_caching_mode(iommu->cap)) + continue; + + pr_warn_once("IOMMU batching is disabled due to virtualization"); + r = false; + break; + } + rcu_read_unlock(); + + return r; +} + static int intel_iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) @@ -5450,7 +5480,7 @@ intel_iommu_domain_get_attr(struct iommu_domain *domain, case IOMMU_DOMAIN_DMA: switch (attr) { case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = !intel_iommu_strict; + *(int *)data = domain_use_flush_queue(); return 0; default: return -ENODEV; From 6c635caef410aa757befbd8857c1eadde5cc22ed Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 28 Jan 2021 13:58:15 +0800 Subject: [PATCH 33/50] blk-cgroup: Use cond_resched() when destroy blkgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On !PREEMPT kernel, we can get below softlockup when doing stress testing with creating and destroying block cgroup repeatly. The reason is it may take a long time to acquire the queue's lock in the loop of blkcg_destroy_blkgs(), or the system can accumulate a huge number of blkgs in pathological cases. We can add a need_resched() check on each loop and release locks and do cond_resched() if true to avoid this issue, since the blkcg_destroy_blkgs() is not called from atomic contexts. [ 4757.010308] watchdog: BUG: soft lockup - CPU#11 stuck for 94s! [ 4757.010698] Call trace: [ 4757.010700]  blkcg_destroy_blkgs+0x68/0x150 [ 4757.010701]  cgwb_release_workfn+0x104/0x158 [ 4757.010702]  process_one_work+0x1bc/0x3f0 [ 4757.010704]  worker_thread+0x164/0x468 [ 4757.010705]  kthread+0x108/0x138 Suggested-by: Tejun Heo Signed-off-by: Baolin Wang Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 031114d454a6..4221a1539391 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1016,6 +1016,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) */ void blkcg_destroy_blkgs(struct blkcg *blkcg) { + might_sleep(); + spin_lock_irq(&blkcg->lock); while (!hlist_empty(&blkcg->blkg_list)) { @@ -1023,14 +1025,20 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; - if (spin_trylock(&q->queue_lock)) { - blkg_destroy(blkg); - spin_unlock(&q->queue_lock); - } else { + if (need_resched() || !spin_trylock(&q->queue_lock)) { + /* + * Given that the system can accumulate a huge number + * of blkgs in pathological cases, check to see if we + * need to rescheduling to avoid softlockup. + */ spin_unlock_irq(&blkcg->lock); - cpu_relax(); + cond_resched(); spin_lock_irq(&blkcg->lock); + continue; } + + blkg_destroy(blkg); + spin_unlock(&q->queue_lock); } spin_unlock_irq(&blkcg->lock); From 0fe37724f8e70fa4cb72948f60fca553702df768 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Jan 2021 15:36:19 +0900 Subject: [PATCH 34/50] block: fix bd_size_lock use Some block device drivers, e.g. the skd driver, call set_capacity() with IRQ disabled. This results in lockdep ito complain about inconsistent lock states ("inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage") because set_capacity takes a block device bd_size_lock using the functions spin_lock() and spin_unlock(). Ensure a consistent locking state by replacing these calls with spin_lock_irqsave() and spin_lock_irqrestore(). The same applies to bdev_set_nr_sectors(). With this fix, all lockdep complaints are resolved. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/genhd.c | 5 +++-- block/partitions/core.c | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 419548e92d82..9e741a4f351b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -45,10 +45,11 @@ static void disk_release_events(struct gendisk *disk); void set_capacity(struct gendisk *disk, sector_t sectors) { struct block_device *bdev = disk->part0; + unsigned long flags; - spin_lock(&bdev->bd_size_lock); + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } EXPORT_SYMBOL(set_capacity); diff --git a/block/partitions/core.c b/block/partitions/core.c index 23460cee9de5..4601a845cd79 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -88,9 +88,11 @@ static int (*check_part[])(struct parsed_partitions *) = { static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - spin_lock(&bdev->bd_size_lock); + unsigned long flags; + + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } static struct parsed_partitions *allocate_partitions(struct gendisk *hd) From 0df28cad06eb41cc36bfea69d9c882fb567fd0d6 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 28 Jan 2021 18:48:47 +0800 Subject: [PATCH 35/50] bcache: only check feature sets when sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES For super block version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES, it doesn't make sense to check the feature sets. This patch checks super block version in bch_has_feature_* routines, if the version doesn't have feature sets yet, returns 0 (false) to the caller. Fixes: 5342fd425502 ("bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Cc: stable@vger.kernel.org # 5.9+ Reported-and-tested-by: Bockholdt Arne Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 84fc2c0f0101..d1c8fd3977fc 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -33,6 +33,8 @@ #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_compat & \ BCH##_FEATURE_COMPAT_##flagname) != 0); \ } \ @@ -50,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_ro_compat & \ BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \ } \ @@ -67,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_incompat & \ BCH##_FEATURE_INCOMPAT_##flagname) != 0); \ } \ From 899199292b14b7c735808a37517de4dd2160c300 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 25 Jan 2021 21:19:16 -0800 Subject: [PATCH 36/50] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a SPCC device This adds a quirk for SPCC 256GB NVMe 1.3 drive which fixes timeouts and I/O errors due to the fact that the controller does not properly handle the Write Zeroes command: [ 2745.659527] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 5.10.6-BET #1 [ 2745.659528] Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 3001 12/04/2020 [ 2776.138874] nvme nvme1: I/O 414 QID 3 timeout, aborting [ 2776.138886] nvme nvme1: I/O 415 QID 3 timeout, aborting [ 2776.138891] nvme nvme1: I/O 416 QID 3 timeout, aborting [ 2776.138895] nvme nvme1: I/O 417 QID 3 timeout, aborting [ 2776.138912] nvme nvme1: Abort status: 0x0 [ 2776.138921] nvme nvme1: I/O 428 QID 3 timeout, aborting [ 2776.138922] nvme nvme1: Abort status: 0x0 [ 2776.138925] nvme nvme1: Abort status: 0x0 [ 2776.138974] nvme nvme1: Abort status: 0x0 [ 2776.138977] nvme nvme1: Abort status: 0x0 [ 2806.346792] nvme nvme1: I/O 414 QID 3 timeout, reset controller [ 2806.363566] nvme nvme1: 15/0/0 default/read/poll queues [ 2836.554298] nvme nvme1: I/O 415 QID 3 timeout, disable controller [ 2836.672064] blk_update_request: I/O error, dev nvme1n1, sector 16350 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672072] blk_update_request: I/O error, dev nvme1n1, sector 16093 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672074] blk_update_request: I/O error, dev nvme1n1, sector 15836 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672076] blk_update_request: I/O error, dev nvme1n1, sector 15579 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672078] blk_update_request: I/O error, dev nvme1n1, sector 15322 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672080] blk_update_request: I/O error, dev nvme1n1, sector 15065 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672082] blk_update_request: I/O error, dev nvme1n1, sector 14808 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672083] blk_update_request: I/O error, dev nvme1n1, sector 14551 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672085] blk_update_request: I/O error, dev nvme1n1, sector 14294 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672087] blk_update_request: I/O error, dev nvme1n1, sector 14037 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672121] nvme nvme1: failed to mark controller live state [ 2836.672123] nvme nvme1: Removing after probe failure status: -19 [ 2836.689016] Aborting journal on device dm-0-8. [ 2836.689024] Buffer I/O error on dev dm-0, logical block 25198592, lost sync page write [ 2836.689027] JBD2: Error -5 detected when updating journal superblock for dm-0-8. Reported-by: Bradley Chapman Signed-off-by: Chaitanya Kulkarni Tested-by: Bradley Chapman Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 856aa31931c1..81e6389b2042 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3257,6 +3257,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, From d1bcf006a9d3d63c1bcb65a993cb13756954cd9c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 27 Jan 2021 11:30:33 +0100 Subject: [PATCH 37/50] nvme-multipath: Early exit if no path is available nvme_round_robin_path() should test if the return ns pointer is valid. nvme_next_ns() will return a NULL pointer if there is no path left. Fixes: 75c10e732724 ("nvme-multipath: round-robin I/O policy") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9ac762b28811..282b7a4ea9a9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -221,7 +221,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, } for (ns = nvme_next_ns(head, old); - ns != old; + ns && ns != old; ns = nvme_next_ns(head, ns)) { if (nvme_path_is_disabled(ns)) continue; From 772ea326a4a00b6b4b2c8f3606ad10c31f46c511 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 28 Jan 2021 11:33:51 +0800 Subject: [PATCH 38/50] nvme-core: use list_add_tail_rcu instead of list_add_tail for nvme_init_ns_head The "list" of nvme_ns_head is used as rcu list, now in nvme_init_ns_head list_add_tail is used to add ns->siblings to the rcu list. It is not safe. Should use list_add_tail_rcu instead of list_add_tail. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8caf9b34734d..f13eb4ded95f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3829,7 +3829,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } } - list_add_tail(&ns->siblings, &head->list); + list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); return 0; From a119f87b86bcdf14a18ce39a899e97a1e9160f7f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jan 2021 13:28:59 -0500 Subject: [PATCH 39/50] Revert "drm/amdgpu/swsmu: drop set_fan_speed_percent (v2)" On some boards the rpm interface apparently does not work at all leading to the fan not spinning or spinning at strange speeds. Revert this for now to fix 5.10, 5.11. The follow on patch fixes this properly for 5.12. This reverts commit 8d6e65adc25e23fabbc5293b6cd320195c708dca. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1408 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 3 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 ++---- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 1 + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 1 + .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 1 + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 31 ++++++++++++++++++- 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 4bdbcce7092d..0d797fa9f5cc 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -553,6 +553,7 @@ struct pptable_funcs { *clock_req); uint32_t (*get_fan_control_mode)(struct smu_context *smu); int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); + int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed); int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate); int (*gfx_off_control)(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 13de692a4213..5d0b29653ffa 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -203,6 +203,9 @@ int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode); +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed); + int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8b867a6d52b5..e84c737e3967 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2151,19 +2151,14 @@ int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed) int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) { int ret = 0; - uint32_t rpm; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_fan_speed_rpm) { - if (speed > 100) - speed = 100; - rpm = speed * smu->fan_max_rpm / 100; - ret = smu->ppt_funcs->set_fan_speed_rpm(smu, rpm); - } + if (smu->ppt_funcs->set_fan_speed_percent) + ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed); mutex_unlock(&smu->mutex); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index cd7b411457ff..16db0b506b0d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2326,6 +2326,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 51e83123f72a..cd7efa923195 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2456,6 +2456,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 12b36eb0ff6a..d68d3dfee51d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2802,6 +2802,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index b279dbbbce6b..5aeb5f5a0447 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1173,6 +1173,35 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) return 0; } +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + if (speed > 100) + speed = 100; + + if (smu_v11_0_auto_fan_control(smu, 0)) + return -EINVAL; + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 100); + duty = (uint32_t)tmp64; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), + CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); + + return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); +} + int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode) @@ -1181,7 +1210,7 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu, switch (mode) { case AMD_FAN_CTRL_NONE: - ret = smu_v11_0_set_fan_speed_rpm(smu, smu->fan_max_rpm); + ret = smu_v11_0_set_fan_speed_percent(smu, 100); break; case AMD_FAN_CTRL_MANUAL: ret = smu_v11_0_auto_fan_control(smu, 0); From 00190bc087e795290502dc51c5d32de85cb2c2b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2021 13:23:20 +0100 Subject: [PATCH 40/50] amdgpu: fix clang build warning clang warns about the -mhard-float command line arguments on architectures that do not support this: clang: error: argument unused during compilation: '-mhard-float' [-Werror,-Wunused-command-line-argument] Move this into the gcc-specific arguments. Fixes: e77165bf7b02 ("drm/amd/display: Add DCN3 blocks to Makefile") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 6 ++++-- drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 3 ++- drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index c20331eb62e0..dfd77b3cc84d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -32,8 +32,8 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -msse endif ifdef CONFIG_PPC64 @@ -45,6 +45,8 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mhard-float +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mhard-float endif ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 3ca7d911d25c..09264716d1dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -14,7 +14,7 @@ DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \ dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -msse endif ifdef CONFIG_PPC64 @@ -25,6 +25,7 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o += -mhard-float endif ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 8d4924b7dc22..101620a8867a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -13,7 +13,7 @@ DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -msse endif ifdef CONFIG_PPC64 @@ -24,6 +24,7 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mhard-float endif ifdef CONFIG_X86 From f609cbb8911e40e15f9055e8f945f926ac906924 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 18:39:24 +0000 Subject: [PATCH 41/50] io_uring: fix list corruption for splice file_get kernel BUG at lib/list_debug.c:29! Call Trace: __list_add include/linux/list.h:67 [inline] list_add include/linux/list.h:86 [inline] io_file_get+0x8cc/0xdb0 fs/io_uring.c:6466 __io_splice_prep+0x1bc/0x530 fs/io_uring.c:3866 io_splice_prep fs/io_uring.c:3920 [inline] io_req_prep+0x3546/0x4e80 fs/io_uring.c:6081 io_queue_sqe+0x609/0x10d0 fs/io_uring.c:6628 io_submit_sqe fs/io_uring.c:6705 [inline] io_submit_sqes+0x1495/0x2720 fs/io_uring.c:6953 __do_sys_io_uring_enter+0x107d/0x1f30 fs/io_uring.c:9353 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_file_get() may be called from splice, and so REQ_F_INFLIGHT may already be set. Fixes: 02a13674fa0e8 ("io_uring: account io_uring internal files as REQ_F_INFLIGHT") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+6879187cf57845801267@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ae388cc52843..39ae1f821cef 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6460,7 +6460,8 @@ static struct file *io_file_get(struct io_submit_state *state, file = __io_file_get(state, fd); } - if (file && file->f_op == &io_uring_fops) { + if (file && file->f_op == &io_uring_fops && + !(req->flags & REQ_F_INFLIGHT)) { io_req_init_async(req); req->flags |= REQ_F_INFLIGHT; From 70b2c60d3797bffe182dddb9bb55975b9be5889a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 18:39:25 +0000 Subject: [PATCH 42/50] io_uring: fix sqo ownership false positive warning WARNING: CPU: 0 PID: 21359 at fs/io_uring.c:9042 io_uring_cancel_task_requests+0xe55/0x10c0 fs/io_uring.c:9042 Call Trace: io_uring_flush+0x47b/0x6e0 fs/io_uring.c:9227 filp_close+0xb4/0x170 fs/open.c:1295 close_files fs/file.c:403 [inline] put_files_struct fs/file.c:418 [inline] put_files_struct+0x1cc/0x350 fs/file.c:415 exit_files+0x7e/0xa0 fs/file.c:435 do_exit+0xc22/0x2ae0 kernel/exit.c:820 do_group_exit+0x125/0x310 kernel/exit.c:922 get_signal+0x427/0x20f0 kernel/signal.c:2773 arch_do_signal_or_restart+0x2a8/0x1eb0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x148/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Now io_uring_cancel_task_requests() can be called not through file notes but directly, remove a WARN_ONCE() there that give us false positives. That check is not very important and we catch it in other places. Fixes: 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+3e3d9bd0c6ce9efbc3ef@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 39ae1f821cef..12bf7180c0f1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8967,8 +8967,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - /* for SQPOLL only sqo_task has task notes */ - WARN_ON_ONCE(ctx->sqo_task != current); io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); From 3a7efd1ad269ccaf9c1423364d97c9661ba6dafa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 23:23:42 +0000 Subject: [PATCH 43/50] io_uring: reinforce cancel on flush during exit What 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") really wants is to cancel all relevant REQ_F_INFLIGHT requests reliably. That can be achieved by io_uring_cancel_files(), but we'll miss it calling io_uring_cancel_task_requests(files=NULL) from io_uring_flush(), because it will go through __io_uring_cancel_task_requests(). Just always call io_uring_cancel_files() during cancel, it's good enough for now. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 12bf7180c0f1..38c6cbe1ab38 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8976,10 +8976,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); + io_uring_cancel_files(ctx, task, files); if (!files) __io_uring_cancel_task_requests(ctx, task); - else - io_uring_cancel_files(ctx, task, files); if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); From fd55b61ebd31449549e14c33574825d64de2b29b Mon Sep 17 00:00:00 2001 From: Bastian Beranek Date: Thu, 21 Jan 2021 15:27:36 +0100 Subject: [PATCH 44/50] drm/nouveau/dispnv50: Restore pushing of all data. Commit f844eb485eb056ad3b67e49f95cbc6c685a73db4 introduced a regression for NV50, which lead to visual artifacts, tearing and eventual crashes. In the changes of f844eb485eb056ad3b67e49f95cbc6c685a73db4 only the first line was correctly translated to the new NVIDIA header macros: - PUSH_NVSQ(push, NV827C, 0x0110, 0, - 0x0114, 0); + PUSH_MTHD(push, NV827C, SET_PROCESSING, + NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); The lower part ("0x0114, 0") was probably omitted by accident. This patch restores the push of the missing data and fixes the regression. Signed-off-by: Bastian Beranek Fixes: f844eb485eb05 ("drm/nouveau/kms/nv50-: use NVIDIA's headers for wndw image_set()") Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/14 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/base507c.c | 6 +++++- drivers/gpu/drm/nouveau/dispnv50/base827c.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c index 302d4e6fc52f..788db043a342 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base507c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c @@ -88,7 +88,11 @@ base507c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV507C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV507C, SET_PROCESSING, - NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV507C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV507C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV507C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8); diff --git a/drivers/gpu/drm/nouveau/dispnv50/base827c.c b/drivers/gpu/drm/nouveau/dispnv50/base827c.c index 18d34096f125..093d4ba6910e 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base827c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base827c.c @@ -49,7 +49,11 @@ base827c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV827C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV827C, SET_PROCESSING, - NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV827C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV827C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV827C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8, From dcd602cc5fe2803bf532d407cde24ba0b7808ff3 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 18 Jan 2021 18:16:06 +0100 Subject: [PATCH 45/50] drm/nouveau/svm: fail NOUVEAU_SVM_INIT ioctl on unsupported devices Fixes a crash when trying to create a channel on e.g. Turing GPUs when NOUVEAU_SVM_INIT was called before. Fixes: eeaf06ac1a558 ("drm/nouveau/svm: initial support for shared virtual memory") Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 4f69e4c3dafd..1c3f890377d2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -315,6 +315,10 @@ nouveau_svmm_init(struct drm_device *dev, void *data, struct drm_nouveau_svm_init *args = data; int ret; + /* We need to fail if svm is disabled */ + if (!cli->drm->svm) + return -ENOSYS; + /* Allocate tracking for SVM-enabled VMM. */ if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL))) return -ENOMEM; From 7c6d659868c77da9b518f32348160340dcdfa008 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:12 -0500 Subject: [PATCH 46/50] drivers/nouveau/kms/nv50-: Reject format modifiers for cursor planes Nvidia hardware doesn't actually support using tiling formats with the cursor plane, only linear is allowed. In the future, we should write a testcase for this. Fixes: c586f30bf74c ("drm/nouveau/kms: Add format mod prop to base/ovly/nvdisp") Cc: James Jones Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Cc: # v5.8+ Signed-off-by: Lyude Paul Reviewed-by: Simon Ser Reviewed-by: James Jones Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index ce451242f79e..271de3a63f21 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -702,6 +702,11 @@ nv50_wndw_init(struct nv50_wndw *wndw) nvif_notify_get(&wndw->notify); } +static const u64 nv50_cursor_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +}; + int nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, enum drm_plane_type type, const char *name, int index, @@ -713,6 +718,7 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, struct nvif_mmu *mmu = &drm->client.mmu; struct nv50_disp *disp = nv50_disp(dev); struct nv50_wndw *wndw; + const u64 *format_modifiers; int nformat; int ret; @@ -728,10 +734,13 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, for (nformat = 0; format[nformat]; nformat++); - ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, - format, nformat, - nouveau_display(dev)->format_modifiers, - type, "%s-%d", name, index); + if (type == DRM_PLANE_TYPE_CURSOR) + format_modifiers = nv50_cursor_format_modifiers; + else + format_modifiers = nouveau_display(dev)->format_modifiers; + + ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, format, nformat, + format_modifiers, type, "%s-%d", name, index); if (ret) { kfree(*pwndw); *pwndw = NULL; From d3b2f0f7921c75b5f0de50e618e4bd165fded3e1 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:13 -0500 Subject: [PATCH 47/50] drm/nouveau/kms/nv50-: Report max cursor size to userspace Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Signed-off-by: Lyude Paul Tested-by: Simon Ser Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index c6367035970e..5f4f09a601d4 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2663,6 +2663,14 @@ nv50_display_create(struct drm_device *dev) else nouveau_display(dev)->format_modifiers = disp50xx_modifiers; + if (disp->disp->object.oclass >= GK104_DISP) { + dev->mode_config.cursor_width = 256; + dev->mode_config.cursor_height = 256; + } else { + dev->mode_config.cursor_width = 64; + dev->mode_config.cursor_height = 64; + } + /* create crtc objects to represent the hw heads */ if (disp->disp->object.oclass >= GV100_DISP) crtcs = nvif_rd32(&device->object, 0x610060) & 0xff; From ba839b7598440a5d78550a115bac21b08d57cc32 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:14 -0500 Subject: [PATCH 48/50] drm/nouveau/kms/gk104-gp1xx: Fix > 64x64 cursors While we do handle the additional cursor sizes introduced in NVE4, it looks like we accidentally broke this when converting over to use Nvidia's display headers. Since we now use NVVAL in dispnv50/head907d.c in order to format the value for the cursor layout and NVD9 only had one byte reserved vs. the 2 bytes reserved in later generations, we end up accidentally stripping the second bit in the cursor layout format parameter - causing us to set the wrong cursor size. This fixes that by adding our own curs_set hook for 917d which uses the NV917D headers. Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Cc: # v5.9+ Signed-off-by: Lyude Paul Fixes: ed0b86a90bf9 ("drm/nouveau/kms/nv50-: use NVIDIA's headers for core head_curs_set()") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/head917d.c | 28 ++++++++++++++++++- .../drm/nouveau/include/nvhw/class/cl917d.h | 4 +++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/head917d.c b/drivers/gpu/drm/nouveau/dispnv50/head917d.c index a5d827403660..ea9f8667305e 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head917d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head917d.c @@ -22,6 +22,7 @@ #include "head.h" #include "core.h" +#include "nvif/push.h" #include #include @@ -73,6 +74,31 @@ head917d_base(struct nv50_head *head, struct nv50_head_atom *asyh) return 0; } +static int +head917d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nvif_push *push = nv50_disp(head->base.base.dev)->core->chan.push; + const int i = head->base.index; + int ret; + + ret = PUSH_WAIT(push, 5); + if (ret) + return ret; + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTROL_CURSOR(i), + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, ENABLE, ENABLE) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, FORMAT, asyh->curs.format) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, SIZE, asyh->curs.layout) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_X, 0) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_Y, 0) | + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, COMPOSITION, ALPHA_BLEND), + + HEAD_SET_OFFSET_CURSOR(i), asyh->curs.offset >> 8); + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTEXT_DMA_CURSOR(i), asyh->curs.handle); + return 0; +} + int head917d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw, struct nv50_head_atom *asyh) @@ -101,7 +127,7 @@ head917d = { .core_clr = head907d_core_clr, .curs_layout = head917d_curs_layout, .curs_format = head507d_curs_format, - .curs_set = head907d_curs_set, + .curs_set = head917d_curs_set, .curs_clr = head907d_curs_clr, .base = head917d_base, .ovly = head907d_ovly, diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h index 2a2612d6e1e0..fb223723a38a 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h @@ -66,6 +66,10 @@ #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_ALPHA_BLEND (0x00000000) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_PREMULT_ALPHA_BLEND (0x00000001) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_XOR (0x00000002) +#define NV917D_HEAD_SET_OFFSET_CURSOR(a) (0x00000484 + (a)*0x00000300) +#define NV917D_HEAD_SET_OFFSET_CURSOR_ORIGIN 31:0 +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR(a) (0x0000048C + (a)*0x00000300) +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR_HANDLE 31:0 #define NV917D_HEAD_SET_DITHER_CONTROL(a) (0x000004A0 + (a)*0x00000300) #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE 0:0 #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE_DISABLE (0x00000000) From cd92cdb9c8bcfc27a8f28bcbf7c414a0ea79e5ec Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 29 Jan 2021 23:47:25 +0900 Subject: [PATCH 49/50] null_blk: cleanup zoned mode initialization To avoid potential compilation problems, replaced the badly written MB_TO_SECTS() macro (missing parenthesis around the argument use) with the inline function mb_to_sects(). And while at it, simplify the calculation of the total number of zones of the device using the round_up() macro. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 148b871f263b..fce0a54df0e5 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -6,7 +6,10 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) +static inline sector_t mb_to_sects(unsigned long mb) +{ + return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT; +} static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { @@ -77,12 +80,11 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); - dev_capacity_sects = MB_TO_SECTS(dev->size); - dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); - dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); - if (dev_capacity_sects & (dev->zone_size_sects - 1)) - dev->nr_zones++; + zone_capacity_sects = mb_to_sects(dev->zone_capacity); + dev_capacity_sects = mb_to_sects(dev->size); + dev->zone_size_sects = mb_to_sects(dev->zone_size); + dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects) + >> ilog2(dev->zone_size_sects); dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), GFP_KERNEL | __GFP_ZERO); From a9cbbb80e3e7dd38ceac166e0698f161862a18ae Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Jan 2021 12:28:20 -0800 Subject: [PATCH 50/50] tty: avoid using vfs_iocb_iter_write() for redirected console writes It turns out that the vfs_iocb_iter_{read,write}() functions are entirely broken, and don't actually use the passed-in file pointer for IO - only for the preparatory work (permission checking and for the write_iter function lookup). That worked fine for overlayfs, which always builds the new iocb with the same file pointer that it passes in, but in the general case it ends up doing nonsensical things (and could cause an iterator call that doesn't even match the passed-in file pointer). This subtly broke the tty conversion to write_iter in commit 9bb48c82aced ("tty: implement write_iter"), because the console redirection didn't actually end up redirecting anything, since the passed-in file pointer was basically ignored, and the actual write was done with the original non-redirected console tty after all. The main visible effect of this is that the console messages were no longer logged to /var/log/boot.log during graphical boot. Fix the issue by simply not using the vfs write "helper" function at all, and just redirecting the write entirely internally to the tty layer. Do the target writability permission checks when actually registering the target tty with TIOCCONS instead of at write time. Fixes: 9bb48c82aced ("tty: implement write_iter") Reported-and-tested-by: Hans de Goede Cc: Greg Kroah-Hartman Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/tty/tty_io.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 48de20916ca7..816e709afa56 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1026,9 +1026,8 @@ void tty_write_message(struct tty_struct *tty, char *msg) * write method will not be invoked in parallel for each device. */ -static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) +static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from) { - struct file *file = iocb->ki_filp; struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; ssize_t ret; @@ -1051,6 +1050,11 @@ static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) return ret; } +static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) +{ + return file_tty_write(iocb->ki_filp, iocb, from); +} + ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *p = NULL; @@ -1060,9 +1064,13 @@ ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) p = get_file(redirect); spin_unlock(&redirect_lock); + /* + * We know the redirected tty is just another tty, we can can + * call file_tty_write() directly with that file pointer. + */ if (p) { ssize_t res; - res = vfs_iocb_iter_write(p, iocb, iter); + res = file_tty_write(p, iocb, iter); fput(p); return res; } @@ -2308,6 +2316,12 @@ static int tioccons(struct file *file) fput(f); return 0; } + if (file->f_op->write_iter != tty_write) + return -ENOTTY; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; spin_lock(&redirect_lock); if (redirect) { spin_unlock(&redirect_lock);