diff --git a/block/Kconfig b/block/Kconfig index af5f70b07037..e657e2bd8059 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -161,8 +161,6 @@ config BLK_WBT_MQ depends on BLK_WBT help Enable writeback throttling by default on multiqueue devices. - Multiqueue currently doesn't have support for IO scheduling, - enabling this option is recommended. config BLK_DEBUG_FS bool "Block layer debugging information in debugfs" diff --git a/block/blk-core.c b/block/blk-core.c index 10c08ac50697..062efdedc994 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -116,8 +116,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - rq->tag = -1; - rq->internal_tag = -1; + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = BLK_MQ_NO_TAG; rq->start_time_ns = ktime_get_ns(); rq->part = NULL; refcount_set(&rq->ref, 1); @@ -643,162 +643,6 @@ void blk_put_request(struct request *req) } EXPORT_SYMBOL(blk_put_request); -static void blk_account_io_merge_bio(struct request *req) -{ - if (!blk_do_io_stat(req)) - return; - - part_stat_lock(); - part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); - part_stat_unlock(); -} - -bool bio_attempt_back_merge(struct request *req, struct bio *bio, - unsigned int nr_segs) -{ - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - - if (!ll_back_merge_fn(req, bio, nr_segs)) - return false; - - trace_block_bio_backmerge(req->q, req, bio); - rq_qos_merge(req->q, req, bio); - - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) - blk_rq_set_mixed_merge(req); - - req->biotail->bi_next = bio; - req->biotail = bio; - req->__data_len += bio->bi_iter.bi_size; - - bio_crypt_free_ctx(bio); - - blk_account_io_merge_bio(req); - return true; -} - -bool bio_attempt_front_merge(struct request *req, struct bio *bio, - unsigned int nr_segs) -{ - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - - if (!ll_front_merge_fn(req, bio, nr_segs)) - return false; - - trace_block_bio_frontmerge(req->q, req, bio); - rq_qos_merge(req->q, req, bio); - - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) - blk_rq_set_mixed_merge(req); - - bio->bi_next = req->bio; - req->bio = bio; - - req->__sector = bio->bi_iter.bi_sector; - req->__data_len += bio->bi_iter.bi_size; - - bio_crypt_do_front_merge(req, bio); - - blk_account_io_merge_bio(req); - return true; -} - -bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, - struct bio *bio) -{ - unsigned short segments = blk_rq_nr_discard_segments(req); - - if (segments >= queue_max_discard_segments(q)) - goto no_merge; - if (blk_rq_sectors(req) + bio_sectors(bio) > - blk_rq_get_max_sectors(req, blk_rq_pos(req))) - goto no_merge; - - rq_qos_merge(q, req, bio); - - req->biotail->bi_next = bio; - req->biotail = bio; - req->__data_len += bio->bi_iter.bi_size; - req->nr_phys_segments = segments + 1; - - blk_account_io_merge_bio(req); - return true; -no_merge: - req_set_nomerge(q, req); - return false; -} - -/** - * blk_attempt_plug_merge - try to merge with %current's plugged list - * @q: request_queue new bio is being queued at - * @bio: new bio being queued - * @nr_segs: number of segments in @bio - * @same_queue_rq: pointer to &struct request that gets filled in when - * another request associated with @q is found on the plug list - * (optional, may be %NULL) - * - * Determine whether @bio being queued on @q can be merged with a request - * on %current's plugged list. Returns %true if merge was successful, - * otherwise %false. - * - * Plugging coalesces IOs from the same issuer for the same purpose without - * going through @q->queue_lock. As such it's more of an issuing mechanism - * than scheduling, and the request, while may have elvpriv data, is not - * added on the elevator at this point. In addition, we don't have - * reliable access to the elevator outside queue lock. Only check basic - * merging parameters without querying the elevator. - * - * Caller must ensure !blk_queue_nomerges(q) beforehand. - */ -bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - unsigned int nr_segs, struct request **same_queue_rq) -{ - struct blk_plug *plug; - struct request *rq; - struct list_head *plug_list; - - plug = blk_mq_plug(q, bio); - if (!plug) - return false; - - plug_list = &plug->mq_list; - - list_for_each_entry_reverse(rq, plug_list, queuelist) { - bool merged = false; - - if (rq->q == q && same_queue_rq) { - /* - * Only blk-mq multiple hardware queues case checks the - * rq in the same queue, there should be only one such - * rq in a queue - **/ - *same_queue_rq = rq; - } - - if (rq->q != q || !blk_rq_merge_ok(rq, bio)) - continue; - - switch (blk_try_merge(rq, bio)) { - case ELEVATOR_BACK_MERGE: - merged = bio_attempt_back_merge(rq, bio, nr_segs); - break; - case ELEVATOR_FRONT_MERGE: - merged = bio_attempt_front_merge(rq, bio, nr_segs); - break; - case ELEVATOR_DISCARD_MERGE: - merged = bio_attempt_discard_merge(q, rq, bio); - break; - default: - break; - } - - if (merged) - return true; - } - - return false; -} - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; diff --git a/block/blk-map.c b/block/blk-map.c index 6e804892d5ec..be118926ccf4 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -12,7 +12,8 @@ #include "blk.h" struct bio_map_data { - int is_our_pages; + bool is_our_pages : 1; + bool is_null_mapped : 1; struct iov_iter iter; struct iovec iov[]; }; @@ -108,7 +109,7 @@ static int bio_uncopy_user(struct bio *bio) struct bio_map_data *bmd = bio->bi_private; int ret = 0; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + if (!bmd->is_null_mapped) { /* * if we're in a workqueue, the request is orphaned, so * don't copy into a random user address space, just free @@ -126,24 +127,12 @@ static int bio_uncopy_user(struct bio *bio) return ret; } -/** - * bio_copy_user_iov - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. - */ -static struct bio *bio_copy_user_iov(struct request_queue *q, - struct rq_map_data *map_data, struct iov_iter *iter, - gfp_t gfp_mask) +static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, + struct iov_iter *iter, gfp_t gfp_mask) { struct bio_map_data *bmd; struct page *page; - struct bio *bio; + struct bio *bio, *bounce_bio; int i = 0, ret; int nr_pages; unsigned int len = iter->count; @@ -151,14 +140,14 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, bmd = bio_alloc_map_data(iter, gfp_mask); if (!bmd) - return ERR_PTR(-ENOMEM); + return -ENOMEM; /* * We need to do a deep copy of the iov_iter including the iovecs. * The caller provided iov might point to an on-stack or otherwise * shortlived one. */ - bmd->is_our_pages = map_data ? 0 : 1; + bmd->is_our_pages = !map_data; nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); if (nr_pages > BIO_MAX_PAGES) @@ -168,8 +157,7 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, bio = bio_kmalloc(gfp_mask, nr_pages); if (!bio) goto out_bmd; - - ret = 0; + bio->bi_opf |= req_op(rq); if (map_data) { nr_pages = 1 << map_data->page_order; @@ -186,7 +174,7 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, if (map_data) { if (i == map_data->nr_entries * nr_pages) { ret = -ENOMEM; - break; + goto cleanup; } page = map_data->pages[i / nr_pages]; @@ -194,14 +182,14 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, i++; } else { - page = alloc_page(q->bounce_gfp | gfp_mask); + page = alloc_page(rq->q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; - break; + goto cleanup; } } - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { + if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { if (!map_data) __free_page(page); break; @@ -211,9 +199,6 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, offset = 0; } - if (ret) - goto cleanup; - if (map_data) map_data->offset += bio->bi_iter.bi_size; @@ -234,40 +219,43 @@ static struct bio *bio_copy_user_iov(struct request_queue *q, bio->bi_private = bmd; if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - return bio; + bmd->is_null_mapped = true; + + bounce_bio = bio; + ret = blk_rq_append_bio(rq, &bounce_bio); + if (ret) + goto cleanup; + + /* + * We link the bounce buffer in and could have to traverse it later, so + * we have to get a ref to prevent it from being freed + */ + bio_get(bounce_bio); + return 0; cleanup: if (!map_data) bio_free_pages(bio); bio_put(bio); out_bmd: kfree(bmd); - return ERR_PTR(ret); + return ret; } -/** - * bio_map_user_iov - map user iovec into bio - * @q: the struct request_queue for the bio - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -static struct bio *bio_map_user_iov(struct request_queue *q, - struct iov_iter *iter, gfp_t gfp_mask) +static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, + gfp_t gfp_mask) { - unsigned int max_sectors = queue_max_hw_sectors(q); - int j; - struct bio *bio; + unsigned int max_sectors = queue_max_hw_sectors(rq->q); + struct bio *bio, *bounce_bio; int ret; + int j; if (!iov_iter_count(iter)) - return ERR_PTR(-EINVAL); + return -EINVAL; bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); if (!bio) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + bio->bi_opf |= req_op(rq); while (iov_iter_count(iter)) { struct page **pages; @@ -283,7 +271,7 @@ static struct bio *bio_map_user_iov(struct request_queue *q, npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - if (unlikely(offs & queue_dma_alignment(q))) { + if (unlikely(offs & queue_dma_alignment(rq->q))) { ret = -EINVAL; j = 0; } else { @@ -295,7 +283,7 @@ static struct bio *bio_map_user_iov(struct request_queue *q, if (n > bytes) n = bytes; - if (!bio_add_hw_page(q, bio, page, n, offs, + if (!bio_add_hw_page(rq->q, bio, page, n, offs, max_sectors, &same_page)) { if (same_page) put_page(page); @@ -319,21 +307,31 @@ static struct bio *bio_map_user_iov(struct request_queue *q, break; } - bio_set_flag(bio, BIO_USER_MAPPED); - /* - * subtle -- if bio_map_user_iov() ended up bouncing a bio, - * it would normally disappear when its bi_end_io is run. - * however, we need it for the unmap, so grab an extra - * reference to it + * Subtle: if we end up needing to bounce a bio, it would normally + * disappear when its bi_end_io is run. However, we need the original + * bio for the unmap, so grab an extra reference to it */ bio_get(bio); - return bio; + bounce_bio = bio; + ret = blk_rq_append_bio(rq, &bounce_bio); + if (ret) + goto out_put_orig; + + /* + * We link the bounce buffer in and could have to traverse it + * later, so we have to get a ref to prevent it from being freed + */ + bio_get(bounce_bio); + return 0; + + out_put_orig: + bio_put(bio); out_unmap: bio_release_pages(bio, false); bio_put(bio); - return ERR_PTR(ret); + return ret; } /** @@ -557,55 +555,6 @@ int blk_rq_append_bio(struct request *rq, struct bio **bio) } EXPORT_SYMBOL(blk_rq_append_bio); -static int __blk_rq_unmap_user(struct bio *bio) -{ - int ret = 0; - - if (bio) { - if (bio_flagged(bio, BIO_USER_MAPPED)) - bio_unmap_user(bio); - else - ret = bio_uncopy_user(bio); - } - - return ret; -} - -static int __blk_rq_map_user_iov(struct request *rq, - struct rq_map_data *map_data, struct iov_iter *iter, - gfp_t gfp_mask, bool copy) -{ - struct request_queue *q = rq->q; - struct bio *bio, *orig_bio; - int ret; - - if (copy) - bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); - else - bio = bio_map_user_iov(q, iter, gfp_mask); - - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_opf &= ~REQ_OP_MASK; - bio->bi_opf |= req_op(rq); - - orig_bio = bio; - - /* - * We link the bounce buffer in and could have to traverse it - * later so we have to get a ref to prevent it from being freed - */ - ret = blk_rq_append_bio(rq, &bio); - if (ret) { - __blk_rq_unmap_user(orig_bio); - return ret; - } - bio_get(bio); - - return 0; -} - /** * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted @@ -649,7 +598,10 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, i = *iter; do { - ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); + if (copy) + ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); + else + ret = bio_map_user_iov(rq, &i, gfp_mask); if (ret) goto unmap_rq; if (!bio) @@ -700,9 +652,13 @@ int blk_rq_unmap_user(struct bio *bio) if (unlikely(bio_flagged(bio, BIO_BOUNCED))) mapped_bio = bio->bi_private; - ret2 = __blk_rq_unmap_user(mapped_bio); - if (ret2 && !ret) - ret = ret2; + if (bio->bi_private) { + ret2 = bio_uncopy_user(mapped_bio); + if (ret2 && !ret) + ret = ret2; + } else { + bio_unmap_user(mapped_bio); + } mapped_bio = bio; bio = bio->bi_next; diff --git a/block/blk-merge.c b/block/blk-merge.c index f685d633bcc9..80c974484a3f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -11,6 +11,7 @@ #include #include "blk.h" +#include "blk-rq-qos.h" static inline bool bio_will_gap(struct request_queue *q, struct request *prev_rq, struct bio *prev, struct bio *next) @@ -895,3 +896,205 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) return ELEVATOR_FRONT_MERGE; return ELEVATOR_NO_MERGE; } + +static void blk_account_io_merge_bio(struct request *req) +{ + if (!blk_do_io_stat(req)) + return; + + part_stat_lock(); + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); + part_stat_unlock(); +} + +enum bio_merge_status bio_attempt_back_merge(struct request *req, + struct bio *bio, + unsigned int nr_segs) +{ + const int ff = bio->bi_opf & REQ_FAILFAST_MASK; + + if (!ll_back_merge_fn(req, bio, nr_segs)) + return BIO_MERGE_FAILED; + + trace_block_bio_backmerge(req->q, req, bio); + rq_qos_merge(req->q, req, bio); + + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + + req->biotail->bi_next = bio; + req->biotail = bio; + req->__data_len += bio->bi_iter.bi_size; + + bio_crypt_free_ctx(bio); + + blk_account_io_merge_bio(req); + return BIO_MERGE_OK; +} + +enum bio_merge_status bio_attempt_front_merge(struct request *req, + struct bio *bio, + unsigned int nr_segs) +{ + const int ff = bio->bi_opf & REQ_FAILFAST_MASK; + + if (!ll_front_merge_fn(req, bio, nr_segs)) + return BIO_MERGE_FAILED; + + trace_block_bio_frontmerge(req->q, req, bio); + rq_qos_merge(req->q, req, bio); + + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + + bio->bi_next = req->bio; + req->bio = bio; + + req->__sector = bio->bi_iter.bi_sector; + req->__data_len += bio->bi_iter.bi_size; + + bio_crypt_do_front_merge(req, bio); + + blk_account_io_merge_bio(req); + return BIO_MERGE_OK; +} + +enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q, + struct request *req, + struct bio *bio) +{ + unsigned short segments = blk_rq_nr_discard_segments(req); + + if (segments >= queue_max_discard_segments(q)) + goto no_merge; + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req, blk_rq_pos(req))) + goto no_merge; + + rq_qos_merge(q, req, bio); + + req->biotail->bi_next = bio; + req->biotail = bio; + req->__data_len += bio->bi_iter.bi_size; + req->nr_phys_segments = segments + 1; + + blk_account_io_merge_bio(req); + return BIO_MERGE_OK; +no_merge: + req_set_nomerge(q, req); + return BIO_MERGE_FAILED; +} + +static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, + struct request *rq, + struct bio *bio, + unsigned int nr_segs, + bool sched_allow_merge) +{ + if (!blk_rq_merge_ok(rq, bio)) + return BIO_MERGE_NONE; + + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (!sched_allow_merge || + (sched_allow_merge && blk_mq_sched_allow_merge(q, rq, bio))) + return bio_attempt_back_merge(rq, bio, nr_segs); + break; + case ELEVATOR_FRONT_MERGE: + if (!sched_allow_merge || + (sched_allow_merge && blk_mq_sched_allow_merge(q, rq, bio))) + return bio_attempt_front_merge(rq, bio, nr_segs); + break; + case ELEVATOR_DISCARD_MERGE: + return bio_attempt_discard_merge(q, rq, bio); + default: + return BIO_MERGE_NONE; + } + + return BIO_MERGE_FAILED; +} + +/** + * blk_attempt_plug_merge - try to merge with %current's plugged list + * @q: request_queue new bio is being queued at + * @bio: new bio being queued + * @nr_segs: number of segments in @bio + * @same_queue_rq: pointer to &struct request that gets filled in when + * another request associated with @q is found on the plug list + * (optional, may be %NULL) + * + * Determine whether @bio being queued on @q can be merged with a request + * on %current's plugged list. Returns %true if merge was successful, + * otherwise %false. + * + * Plugging coalesces IOs from the same issuer for the same purpose without + * going through @q->queue_lock. As such it's more of an issuing mechanism + * than scheduling, and the request, while may have elvpriv data, is not + * added on the elevator at this point. In addition, we don't have + * reliable access to the elevator outside queue lock. Only check basic + * merging parameters without querying the elevator. + * + * Caller must ensure !blk_queue_nomerges(q) beforehand. + */ +bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs, struct request **same_queue_rq) +{ + struct blk_plug *plug; + struct request *rq; + struct list_head *plug_list; + + plug = blk_mq_plug(q, bio); + if (!plug) + return false; + + plug_list = &plug->mq_list; + + list_for_each_entry_reverse(rq, plug_list, queuelist) { + if (rq->q == q && same_queue_rq) { + /* + * Only blk-mq multiple hardware queues case checks the + * rq in the same queue, there should be only one such + * rq in a queue + **/ + *same_queue_rq = rq; + } + + if (rq->q != q) + continue; + + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + } + + return false; +} + +/* + * Iterate list of requests and see if we can merge this bio with any + * of them. + */ +bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, + struct bio *bio, unsigned int nr_segs) +{ + struct request *rq; + int checked = 8; + + list_for_each_entry_reverse(rq, list, queuelist) { + if (!checked--) + break; + + switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) { + case BIO_MERGE_NONE: + continue; + case BIO_MERGE_OK: + return true; + case BIO_MERGE_FAILED: + return false; + } + + } + + return false; +} +EXPORT_SYMBOL_GPL(blk_bio_list_merge); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d2790e5b06d1..501a85ceaccb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -368,7 +368,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_BACK_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_back_merge(rq, bio, nr_segs)) + if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK) return false; *merged_request = attempt_back_merge(q, rq); if (!*merged_request) @@ -377,86 +377,20 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_FRONT_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_front_merge(rq, bio, nr_segs)) + if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK) return false; *merged_request = attempt_front_merge(q, rq); if (!*merged_request) elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); return true; case ELEVATOR_DISCARD_MERGE: - return bio_attempt_discard_merge(q, rq, bio); + return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK; default: return false; } } EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); -/* - * Iterate list of requests and see if we can merge this bio with any - * of them. - */ -bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio, unsigned int nr_segs) -{ - struct request *rq; - int checked = 8; - - list_for_each_entry_reverse(rq, list, queuelist) { - bool merged = false; - - if (!checked--) - break; - - if (!blk_rq_merge_ok(rq, bio)) - continue; - - switch (blk_try_merge(rq, bio)) { - case ELEVATOR_BACK_MERGE: - if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_back_merge(rq, bio, - nr_segs); - break; - case ELEVATOR_FRONT_MERGE: - if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_front_merge(rq, bio, - nr_segs); - break; - case ELEVATOR_DISCARD_MERGE: - merged = bio_attempt_discard_merge(q, rq, bio); - break; - default: - continue; - } - - return merged; - } - - return false; -} -EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge); - -/* - * Reverse check our software queue for entries that we could potentially - * merge with. Currently includes a hand-wavy stop count of 8, to not spend - * too much time checking for merges. - */ -static bool blk_mq_attempt_merge(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct bio *bio, - unsigned int nr_segs) -{ - enum hctx_type type = hctx->type; - - lockdep_assert_held(&ctx->lock); - - if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) { - ctx->rq_merged++; - return true; - } - - return false; -} - bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { @@ -470,14 +404,24 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, return e->type->ops.bio_merge(hctx, bio, nr_segs); type = hctx->type; - if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && - !list_empty_careful(&ctx->rq_lists[type])) { - /* default per sw-queue merge */ - spin_lock(&ctx->lock); - ret = blk_mq_attempt_merge(q, hctx, ctx, bio, nr_segs); - spin_unlock(&ctx->lock); + if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || + list_empty_careful(&ctx->rq_lists[type])) + return false; + + /* default per sw-queue merge */ + spin_lock(&ctx->lock); + /* + * Reverse check our software queue for entries that we could + * potentially merge with. Currently includes a hand-wavy stop + * count of 8, to not spend too much time checking for merges. + */ + if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) { + ctx->rq_merged++; + ret = true; } + spin_unlock(&ctx->lock); + return ret; } @@ -531,7 +475,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, goto run; } - WARN_ON(e && (rq->tag != -1)); + WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG)); if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { /* diff --git a/block/blk.h b/block/blk.h index 49e2928a1632..c08762e10b04 100644 --- a/block/blk.h +++ b/block/blk.h @@ -29,6 +29,12 @@ struct blk_flush_queue { spinlock_t mq_flush_lock; }; +enum bio_merge_status { + BIO_MERGE_OK, + BIO_MERGE_NONE, + BIO_MERGE_FAILED, +}; + extern struct kmem_cache *blk_requestq_cachep; extern struct kobj_type blk_queue_ktype; extern struct ida blk_queue_ida; @@ -169,14 +175,19 @@ static inline void blk_integrity_del(struct gendisk *disk) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); -bool bio_attempt_front_merge(struct request *req, struct bio *bio, - unsigned int nr_segs); -bool bio_attempt_back_merge(struct request *req, struct bio *bio, - unsigned int nr_segs); -bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, - struct bio *bio); +enum bio_merge_status bio_attempt_front_merge(struct request *req, + struct bio *bio, + unsigned int nr_segs); +enum bio_merge_status bio_attempt_back_merge(struct request *req, + struct bio *bio, + unsigned int nr_segs); +enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q, + struct request *req, + struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs, struct request **same_queue_rq); +bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, + struct bio *bio, unsigned int nr_segs); void blk_account_io_start(struct request *req); void blk_account_io_done(struct request *req, u64 now); @@ -350,7 +361,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -void delete_partition(struct gendisk *disk, struct hd_struct *part); +void delete_partition(struct hd_struct *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); diff --git a/block/genhd.c b/block/genhd.c index 99c64641c314..5fc6d82e6c68 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -110,8 +110,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) } } -static unsigned int part_in_flight(struct request_queue *q, - struct hd_struct *part) +static unsigned int part_in_flight(struct hd_struct *part) { unsigned int inflight = 0; int cpu; @@ -126,8 +125,7 @@ static unsigned int part_in_flight(struct request_queue *q, return inflight; } -static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]) +static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) { int cpu; @@ -913,7 +911,7 @@ void del_gendisk(struct gendisk *disk) DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { invalidate_partition(disk, part->partno); - delete_partition(disk, part); + delete_partition(part); } disk_part_iter_exit(&piter); @@ -1301,7 +1299,7 @@ ssize_t part_stat_show(struct device *dev, if (queue_is_mq(q)) inflight = blk_mq_in_flight(q, p); else - inflight = part_in_flight(q, p); + inflight = part_in_flight(p); return sprintf(buf, "%8lu %8lu %8llu %8u " @@ -1343,7 +1341,7 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, if (queue_is_mq(q)) blk_mq_in_flight_rw(q, p, inflight); else - part_in_flight_rw(q, p, inflight); + part_in_flight_rw(p, inflight); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } @@ -1623,7 +1621,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) if (queue_is_mq(gp->queue)) inflight = blk_mq_in_flight(gp->queue, hd); else - inflight = part_in_flight(gp->queue, hd); + inflight = part_in_flight(hd); seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " @@ -1729,45 +1727,48 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) } disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); - if (disk) { - disk->part0.dkstats = alloc_percpu(struct disk_stats); - if (!disk->part0.dkstats) { - kfree(disk); - return NULL; - } - init_rwsem(&disk->lookup_sem); - disk->node_id = node_id; - if (disk_expand_part_tbl(disk, 0)) { - free_percpu(disk->part0.dkstats); - kfree(disk); - return NULL; - } - ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[0], &disk->part0); + if (!disk) + return NULL; - /* - * set_capacity() and get_capacity() currently don't use - * seqcounter to read/update the part0->nr_sects. Still init - * the counter as we can read the sectors in IO submission - * patch using seqence counters. - * - * TODO: Ideally set_capacity() and get_capacity() should be - * converted to make use of bd_mutex and sequence counters. - */ - hd_sects_seq_init(&disk->part0); - if (hd_ref_init(&disk->part0)) { - hd_free_part(&disk->part0); - kfree(disk); - return NULL; - } + disk->part0.dkstats = alloc_percpu(struct disk_stats); + if (!disk->part0.dkstats) + goto out_free_disk; - disk->minors = minors; - rand_initialize_disk(disk); - disk_to_dev(disk)->class = &block_class; - disk_to_dev(disk)->type = &disk_type; - device_initialize(disk_to_dev(disk)); + init_rwsem(&disk->lookup_sem); + disk->node_id = node_id; + if (disk_expand_part_tbl(disk, 0)) { + free_percpu(disk->part0.dkstats); + goto out_free_disk; } + + ptbl = rcu_dereference_protected(disk->part_tbl, 1); + rcu_assign_pointer(ptbl->part[0], &disk->part0); + + /* + * set_capacity() and get_capacity() currently don't use + * seqcounter to read/update the part0->nr_sects. Still init + * the counter as we can read the sectors in IO submission + * patch using seqence counters. + * + * TODO: Ideally set_capacity() and get_capacity() should be + * converted to make use of bd_mutex and sequence counters. + */ + hd_sects_seq_init(&disk->part0); + if (hd_ref_init(&disk->part0)) + goto out_free_part0; + + disk->minors = minors; + rand_initialize_disk(disk); + disk_to_dev(disk)->class = &block_class; + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); return disk; + +out_free_part0: + hd_free_part(&disk->part0); +out_free_disk: + kfree(disk); + return NULL; } EXPORT_SYMBOL(__alloc_disk_node); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index a38c5ab103d1..6d4ba0e9688e 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -573,7 +573,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, bool merged; spin_lock(&kcq->lock); - merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); + merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); spin_unlock(&kcq->lock); return merged; diff --git a/block/partitions/core.c b/block/partitions/core.c index 722406b841df..5cacbac30107 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -199,14 +199,20 @@ static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); + + return sprintf(buf, "%u\n", + queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, + p->start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%u\n", p->discard_alignment); + + return sprintf(buf, "%u\n", + queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, + p->start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -318,8 +324,9 @@ int hd_ref_init(struct hd_struct *part) * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct gendisk *disk, struct hd_struct *part) +void delete_partition(struct hd_struct *part) { + struct gendisk *disk = part_to_disk(part); struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); @@ -327,7 +334,7 @@ void delete_partition(struct gendisk *disk, struct hd_struct *part) * ->part_tbl is referenced in this part's release handler, so * we have to hold the disk device */ - get_device(disk_to_dev(part_to_disk(part))); + get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[part->partno], NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); @@ -405,10 +412,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); p->start_sect = start; - p->alignment_offset = - queue_limit_alignment_offset(&disk->queue->limits, start); - p->discard_alignment = - queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); @@ -554,7 +557,7 @@ int bdev_del_partition(struct block_device *bdev, int partno) sync_blockdev(bdevp); invalidate_bdev(bdevp); - delete_partition(bdev->bd_disk, part); + delete_partition(part); ret = 0; out_unlock: mutex_unlock(&bdev->bd_mutex); @@ -592,8 +595,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); + part_nr_sects_write(part, length); + bd_set_nr_sectors(bdevp, length); ret = 0; out_unlock: @@ -634,7 +637,7 @@ int blk_drop_partitions(struct block_device *bdev) disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - delete_partition(bdev->bd_disk, part); + delete_partition(part); disk_part_iter_exit(&piter); return 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6dba41395155..313f0b946fe2 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -900,9 +900,7 @@ aoecmd_sleepwork(struct work_struct *work) ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { - inode_lock(bd->bd_inode); - i_size_write(bd->bd_inode, (loff_t)ssize<<9); - inode_unlock(bd->bd_inode); + bd_set_nr_sectors(bd, ssize); bdput(bd); } spin_lock_irq(&d->lock); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d3394191e168..cb1191d6e945 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -253,7 +253,7 @@ static void loop_set_size(struct loop_device *lo, loff_t size) { struct block_device *bdev = lo->lo_device; - bd_set_size(bdev, size << SECTOR_SHIFT); + bd_set_nr_sectors(bdev, size); set_capacity_revalidate_and_notify(lo->lo_disk, size, false); } @@ -1251,7 +1251,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_size(bdev, 0); + bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index edf8b632e3d2..a54f2d155a31 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -300,6 +300,7 @@ static void nbd_size_update(struct nbd_device *nbd) { struct nbd_config *config = nbd->config; struct block_device *bdev = bdget_disk(nbd->disk, 0); + sector_t nr_sectors = config->bytesize >> 9; if (config->flags & NBD_FLAG_SEND_TRIM) { nbd->disk->queue->limits.discard_granularity = config->blksize; @@ -308,10 +309,10 @@ static void nbd_size_update(struct nbd_device *nbd) } blk_queue_logical_block_size(nbd->disk->queue, config->blksize); blk_queue_physical_block_size(nbd->disk->queue, config->blksize); - set_capacity(nbd->disk, config->bytesize >> 9); + set_capacity(nbd->disk, nr_sectors); if (bdev) { if (bdev->bd_disk) { - bd_set_size(bdev, config->bytesize); + bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else bdev->bd_invalidated = 1; @@ -1138,7 +1139,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_size(bdev, 0); + bd_set_nr_sectors(bdev, 0); } static void nbd_parse_flags(struct nbd_device *nbd) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 1034e445680c..17f2e6ff1223 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2192,7 +2192,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) set_capacity(pd->disk, lba << 2); set_capacity(pd->bdev->bd_disk, lba << 2); - bd_set_size(pd->bdev, (loff_t)lba << 11); + bd_set_nr_sectors(pd->bdev, lba << 2); q = bdev_get_queue(pd->bdev); if (write) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f20f02b7bc15..bf3d752e443f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -648,7 +648,7 @@ static struct attribute *virtblk_attrs[] = { static umode_t virtblk_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; struct virtio_device *vdev = vblk->vdev; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 380bf518338e..ccf5bd528642 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -63,6 +63,11 @@ static int raw_open(struct inode *inode, struct file *filp) return 0; } + pr_warn_ratelimited( + "process %s (pid %d) is using the deprecated raw device\n" + "support will be removed in Linux 5.14.\n", + current->comm, current->pid); + mutex_lock(&raw_mutex); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9d2f9dd70a4c..efd6c4255356 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2086,18 +2086,6 @@ static void event_callback(void *context) dm_issue_global_event(); } -/* - * Protected by md->suspend_lock obtained by dm_swap_table(). - */ -static void __set_size(struct mapped_device *md, sector_t size) -{ - lockdep_assert_held(&md->suspend_lock); - - set_capacity(md->disk, size); - - i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); -} - /* * Returns old map, which caller must destroy. */ @@ -2120,7 +2108,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - __set_size(md, size); + set_capacity(md->disk, size); + bd_set_nr_sectors(md->bdev, size); dm_table_event_callback(t, event_callback, md); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 893e29624c16..4a053aca3eea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -94,21 +94,34 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); +static void nvme_update_bdev_size(struct gendisk *disk) +{ + struct block_device *bdev = bdget_disk(disk, 0); + + if (bdev) { + bd_set_nr_sectors(bdev, get_capacity(disk)); + bdput(bdev); + } +} + +/* + * Prepare a queue for teardown. + * + * This must forcibly unquiesce queues to avoid blocking dispatch, and only set + * the capacity to 0 after that to avoid blocking dispatchers that may be + * holding bd_butex. This will end buffered writers dirtying pages that can't + * be synced. + */ static void nvme_set_queue_dying(struct nvme_ns *ns) { - /* - * Revalidating a dead namespace sets capacity to 0. This will end - * buffered writers dirtying pages that can't be synced. - */ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; + blk_set_queue_dying(ns->queue); - /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); - /* - * Revalidate after unblocking dispatchers that may be holding bd_butex - */ - revalidate_disk(ns->disk); + + set_capacity(ns->disk, 0); + nvme_update_bdev_size(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -2134,7 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); - nvme_mpath_update_disk_size(ns->head->disk); + nvme_update_bdev_size(ns->head->disk); } #endif return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2aaedfa43ed8..8ac37430347c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -682,16 +682,6 @@ static inline void nvme_trace_bio_complete(struct request *req, trace_block_bio_complete(ns->head->disk->queue, req->bio); } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT); - bdput(bdev); - } -} - extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; @@ -766,9 +756,6 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) { } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ -} #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_BLK_DEV_ZONED diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 777734d1b4e5..faaf5596e31c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -55,10 +55,7 @@ dasd_ioctl_enable(struct block_device *bdev) dasd_enable_device(base); /* Formatting the dasd device can change the capacity. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, - (loff_t)get_capacity(base->block->gdp) << 9); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -91,9 +88,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, 0); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, 0); dasd_put_device(base); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 8ae833e00443..990e97bcbeaf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -876,6 +876,7 @@ struct block_device *bdget(dev_t dev) bdev = &BDEV_I(inode)->bdev; if (inode->i_state & I_NEW) { + spin_lock_init(&bdev->bd_size_lock); bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; @@ -1290,6 +1291,7 @@ static void check_disk_size_change(struct gendisk *disk, { loff_t disk_size, bdev_size; + spin_lock(&bdev->bd_size_lock); disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { @@ -1299,11 +1301,15 @@ static void check_disk_size_change(struct gendisk *disk, disk->disk_name, bdev_size, disk_size); } i_size_write(bdev->bd_inode, disk_size); - if (bdev_size > disk_size && __invalidate_device(bdev, false)) + } + bdev->bd_invalidated = 0; + spin_unlock(&bdev->bd_size_lock); + + if (bdev_size > disk_size) { + if (__invalidate_device(bdev, false)) pr_warn("VFS: busy inodes on resized disk %s\n", disk->disk_name); } - bdev->bd_invalidated = 0; } /** @@ -1328,13 +1334,10 @@ int revalidate_disk(struct gendisk *disk) if (!(disk->flags & GENHD_FL_HIDDEN)) { struct block_device *bdev = bdget_disk(disk, 0); - if (!bdev) - return ret; - - mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev, ret == 0); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + if (bdev) { + check_disk_size_change(disk, bdev, ret == 0); + bdput(bdev); + } } return ret; } @@ -1371,13 +1374,13 @@ int check_disk_change(struct block_device *bdev) EXPORT_SYMBOL(check_disk_change); -void bd_set_size(struct block_device *bdev, loff_t size) +void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - inode_lock(bdev->bd_inode); - i_size_write(bdev->bd_inode, size); - inode_unlock(bdev->bd_inode); + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); } -EXPORT_SYMBOL(bd_set_size); +EXPORT_SYMBOL(bd_set_nr_sectors); static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); @@ -1446,22 +1449,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, struct gendisk *disk; int ret; int partno; - int perm = 0; bool first_open = false, unblock_events = true, need_restart; - if (mode & FMODE_READ) - perm |= MAY_READ; - if (mode & FMODE_WRITE) - perm |= MAY_WRITE; - /* - * hooks: /n/, see "layering violations". - */ - if (!for_part) { - ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) - return ret; - } - restart: need_restart = false; ret = -ENXIO; @@ -1514,7 +1503,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, } if (!ret) { - bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + bd_set_nr_sectors(bdev, get_capacity(disk)); set_init_blocksize(bdev); } @@ -1542,7 +1531,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); set_init_blocksize(bdev); } @@ -1634,12 +1623,24 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { - int res; + int ret, perm = 0; - res =__blkdev_get(bdev, mode, holder, 0); - if (res) - bdput(bdev); - return res; + if (mode & FMODE_READ) + perm |= MAY_READ; + if (mode & FMODE_WRITE) + perm |= MAY_WRITE; + ret = devcgroup_inode_permission(bdev->bd_inode, perm); + if (ret) + goto bdput; + + ret =__blkdev_get(bdev, mode, holder, 0); + if (ret) + goto bdput; + return 0; + +bdput: + bdput(bdev); + return ret; } EXPORT_SYMBOL(blkdev_get); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9d2d5ad367a4..21a02e0577dd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -489,8 +489,6 @@ void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_complete_request(struct request *rq); bool blk_mq_complete_request_remote(struct request *rq); -bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 46dca1b984c6..8d67ae1080fe 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,7 +20,7 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ + dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; @@ -38,6 +38,7 @@ struct block_device { /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; @@ -258,8 +259,6 @@ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ - BIO_USER_MAPPED, /* contains user pages */ - BIO_NULL_MAPPED, /* contains invalid user pages */ BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 868e11face00..0769112ca80b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -460,7 +461,7 @@ struct request_queue { #ifdef CONFIG_PM struct device *dev; - int rpm_status; + enum rpm_status rpm_status; unsigned int nr_pending; #endif @@ -1457,10 +1458,9 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) - return bdev->bd_part->alignment_offset; - + return queue_limit_alignment_offset(&q->limits, + bdev->bd_part->start_sect); return q->limits.alignment_offset; } @@ -1500,8 +1500,8 @@ static inline int bdev_discard_alignment(struct block_device *bdev) struct request_queue *q = bdev_get_queue(bdev); if (bdev != bdev->bd_contains) - return bdev->bd_part->discard_alignment; - + return queue_limit_discard_alignment(&q->limits, + bdev->bd_part->start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4ab853461dff..9ea2ca31c278 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,8 +65,6 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; - sector_t alignment_offset; - unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; @@ -375,7 +373,7 @@ void unregister_blkdev(unsigned int major, const char *name); int revalidate_disk(struct gendisk *disk); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_size(struct block_device *bdev, loff_t size); +void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);