diff --git a/block/bio.c b/block/bio.c index e6e26d7a1ffb..fa01bef35bb1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1044,6 +1044,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) ssize_t size, left; unsigned len, i; size_t offset; + int ret = 0; if (WARN_ON_ONCE(!max_append_sectors)) return 0; @@ -1066,15 +1067,17 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) len = min_t(size_t, PAGE_SIZE - offset, left); if (bio_add_hw_page(q, bio, page, len, offset, - max_append_sectors, &same_page) != len) - return -EINVAL; + max_append_sectors, &same_page) != len) { + ret = -EINVAL; + break; + } if (same_page) put_page(page); offset = 0; } - iov_iter_advance(iter, size); - return 0; + iov_iter_advance(iter, size - left); + return ret; } /** diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f9b55614d67d..c68bdf58c9a6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -657,13 +657,20 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto fail; } + if (radix_tree_preload(GFP_KERNEL)) { + blkg_free(new_blkg); + ret = -ENOMEM; + goto fail; + } + rcu_read_lock(); spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_check(pos, pol, q); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); - goto fail_unlock; + blkg_free(new_blkg); + goto fail_preloaded; } if (blkg) { @@ -672,10 +679,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_create(pos, q, new_blkg); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); - goto fail_unlock; + goto fail_preloaded; } } + radix_tree_preload_end(); + if (pos == blkcg) goto success; } @@ -685,6 +694,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ctx->body = input; return 0; +fail_preloaded: + radix_tree_preload_end(); fail_unlock: spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); diff --git a/block/blk-flush.c b/block/blk-flush.c index 53abb5c73d99..e32958f0b687 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -225,6 +225,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); + WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); if (!refcount_dec_and_test(&flush_rq->ref)) { fq->rq_status = error; spin_unlock_irqrestore(&fq->mq_flush_lock, flags); diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index eb9dc14e5147..20190f66ced9 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -2100,7 +2100,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) pp->dhfis_bits &= ~done_mask; pp->dmafis_bits &= ~done_mask; pp->sdbfis_bits |= done_mask; - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); if (!ap->qc_active) { DPRINTK("over\n"); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0bed21c0c81b..c4f9ccf5cc2a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -296,7 +296,7 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd) +static void nbd_size_update(struct nbd_device *nbd, bool start) { struct nbd_config *config = nbd->config; struct block_device *bdev = bdget_disk(nbd->disk, 0); @@ -313,7 +313,8 @@ static void nbd_size_update(struct nbd_device *nbd) if (bdev) { if (bdev->bd_disk) { bd_set_nr_sectors(bdev, nr_sectors); - set_blocksize(bdev, config->blksize); + if (start) + set_blocksize(bdev, config->blksize); } else set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); bdput(bdev); @@ -328,7 +329,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; if (nbd->task_recv != NULL) - nbd_size_update(nbd); + nbd_size_update(nbd, false); } static void nbd_complete_rq(struct request *req) @@ -1308,7 +1309,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd); + nbd_size_update(nbd, true); return error; } diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index d2e7db43a52a..cfd00ad40355 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -47,6 +47,8 @@ struct nullb_device { unsigned int nr_zones_closed; struct blk_zone *zones; sector_t zone_size_sects; + spinlock_t zone_dev_lock; + unsigned long *zone_locks; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 7d94f2d47a6a..8775acbb4f8f 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "null_blk.h" #define CREATE_TRACE_POINTS @@ -45,6 +46,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) if (!dev->zones) return -ENOMEM; + spin_lock_init(&dev->zone_dev_lock); + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); + if (!dev->zone_locks) { + kvfree(dev->zones); + return -ENOMEM; + } + if (dev->zone_nr_conv >= dev->nr_zones) { dev->zone_nr_conv = dev->nr_zones - 1; pr_info("changed the number of conventional zones to %u", @@ -123,15 +131,26 @@ int null_register_zoned_dev(struct nullb *nullb) void null_free_zoned_dev(struct nullb_device *dev) { + bitmap_free(dev->zone_locks); kvfree(dev->zones); } +static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) +{ + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); +} + +static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) +{ + clear_and_wake_up_bit(zno, dev->zone_locks); +} + int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct nullb *nullb = disk->private_data; struct nullb_device *dev = nullb->dev; - unsigned int first_zone, i; + unsigned int first_zone, i, zno; struct blk_zone zone; int error; @@ -142,15 +161,18 @@ int null_report_zones(struct gendisk *disk, sector_t sector, nr_zones = min(nr_zones, dev->nr_zones - first_zone); trace_nullb_report_zones(nullb, nr_zones); - for (i = 0; i < nr_zones; i++) { + zno = first_zone; + for (i = 0; i < nr_zones; i++, zno++) { /* * Stacked DM target drivers will remap the zone information by * modifying the zone information passed to the report callback. * So use a local copy to avoid corruption of the device zone * array. */ - memcpy(&zone, &dev->zones[first_zone + i], - sizeof(struct blk_zone)); + null_lock_zone(dev, zno); + memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone)); + null_unlock_zone(dev, zno); + error = cb(&zone, i, data); if (error) return error; @@ -159,6 +181,10 @@ int null_report_zones(struct gendisk *disk, sector_t sector, return nr_zones; } +/* + * This is called in the case of memory backing from null_process_cmd() + * with the target zone already locked. + */ size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len) { @@ -295,22 +321,27 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + null_lock_zone(dev, zno); + spin_lock(&dev->zone_dev_lock); + switch (zone->cond) { case BLK_ZONE_COND_FULL: /* Cannot write to a full zone */ - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_CLOSED: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: break; default: /* Invalid zone condition */ - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } /* @@ -326,11 +357,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, else cmd->rq->__sector = sector; } else if (sector != zone->wp) { - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } - if (zone->wp + nr_sectors > zone->start + zone->capacity) - return BLK_STS_IOERR; + if (zone->wp + nr_sectors > zone->start + zone->capacity) { + ret = BLK_STS_IOERR; + goto unlock; + } if (zone->cond == BLK_ZONE_COND_CLOSED) { dev->nr_zones_closed--; @@ -341,9 +375,11 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, if (zone->cond != BLK_ZONE_COND_EXP_OPEN) zone->cond = BLK_ZONE_COND_IMP_OPEN; + spin_unlock(&dev->zone_dev_lock); ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + spin_lock(&dev->zone_dev_lock); if (ret != BLK_STS_OK) - return ret; + goto unlock; zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { @@ -353,7 +389,13 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, dev->nr_zones_imp_open--; zone->cond = BLK_ZONE_COND_FULL; } - return BLK_STS_OK; + ret = BLK_STS_OK; + +unlock: + spin_unlock(&dev->zone_dev_lock); + null_unlock_zone(dev, zno); + + return ret; } static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) @@ -464,16 +506,33 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, sector_t sector) { struct nullb_device *dev = cmd->nq->dev; - unsigned int zone_no = null_zone_no(dev, sector); - struct blk_zone *zone = &dev->zones[zone_no]; - blk_status_t ret = BLK_STS_OK; + unsigned int zone_no; + struct blk_zone *zone; + blk_status_t ret; size_t i; + if (op == REQ_OP_ZONE_RESET_ALL) { + for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { + null_lock_zone(dev, i); + zone = &dev->zones[i]; + if (zone->cond != BLK_ZONE_COND_EMPTY) { + spin_lock(&dev->zone_dev_lock); + null_reset_zone(dev, zone); + spin_unlock(&dev->zone_dev_lock); + trace_nullb_zone_op(cmd, i, zone->cond); + } + null_unlock_zone(dev, i); + } + return BLK_STS_OK; + } + + zone_no = null_zone_no(dev, sector); + zone = &dev->zones[zone_no]; + + null_lock_zone(dev, zone_no); + spin_lock(&dev->zone_dev_lock); + switch (op) { - case REQ_OP_ZONE_RESET_ALL: - for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) - null_reset_zone(dev, &dev->zones[i]); - break; case REQ_OP_ZONE_RESET: ret = null_reset_zone(dev, zone); break; @@ -487,30 +546,46 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, ret = null_finish_zone(dev, zone); break; default: - return BLK_STS_NOTSUPP; + ret = BLK_STS_NOTSUPP; + break; } + spin_unlock(&dev->zone_dev_lock); + if (ret == BLK_STS_OK) trace_nullb_zone_op(cmd, zone_no, zone->cond); + null_unlock_zone(dev, zone_no); + return ret; } blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors) { + struct nullb_device *dev = cmd->nq->dev; + unsigned int zno = null_zone_no(dev, sector); + blk_status_t sts; + switch (op) { case REQ_OP_WRITE: - return null_zone_write(cmd, sector, nr_sectors, false); + sts = null_zone_write(cmd, sector, nr_sectors, false); + break; case REQ_OP_ZONE_APPEND: - return null_zone_write(cmd, sector, nr_sectors, true); + sts = null_zone_write(cmd, sector, nr_sectors, true); + break; case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - return null_zone_mgmt(cmd, op, sector); + sts = null_zone_mgmt(cmd, op, sector); + break; default: - return null_process_cmd(cmd, op, sector, nr_sectors); + null_lock_zone(dev, zno); + sts = null_process_cmd(cmd, op, sector, nr_sectors); + null_unlock_zone(dev, zno); } + + return sts; } diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 8d581c7536fb..eb8ef65778c3 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -443,22 +443,27 @@ static void ace_fix_driveid(u16 *id) #define ACE_FSM_NUM_STATES 11 /* Set flag to exit FSM loop and reschedule tasklet */ -static inline void ace_fsm_yield(struct ace_device *ace) +static inline void ace_fsm_yieldpoll(struct ace_device *ace) { - dev_dbg(ace->dev, "ace_fsm_yield()\n"); tasklet_schedule(&ace->fsm_tasklet); ace->fsm_continue_flag = 0; } +static inline void ace_fsm_yield(struct ace_device *ace) +{ + dev_dbg(ace->dev, "%s()\n", __func__); + ace_fsm_yieldpoll(ace); +} + /* Set flag to exit FSM loop and wait for IRQ to reschedule tasklet */ static inline void ace_fsm_yieldirq(struct ace_device *ace) { dev_dbg(ace->dev, "ace_fsm_yieldirq()\n"); - if (!ace->irq) - /* No IRQ assigned, so need to poll */ - tasklet_schedule(&ace->fsm_tasklet); - ace->fsm_continue_flag = 0; + if (ace->irq > 0) + ace->fsm_continue_flag = 0; + else + ace_fsm_yieldpoll(ace); } static bool ace_has_next_request(struct request_queue *q) @@ -1053,12 +1058,12 @@ static int ace_setup(struct ace_device *ace) ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ); /* Now we can hook up the irq handler */ - if (ace->irq) { + if (ace->irq > 0) { rc = request_irq(ace->irq, ace_interrupt, 0, "systemace", ace); if (rc) { /* Failure - fall back to polled mode */ dev_err(ace->dev, "request_irq failed\n"); - ace->irq = 0; + ace->irq = rc; } } @@ -1110,7 +1115,7 @@ static void ace_teardown(struct ace_device *ace) tasklet_kill(&ace->fsm_tasklet); - if (ace->irq) + if (ace->irq > 0) free_irq(ace->irq, ace); iounmap(ace->baseaddr); @@ -1123,11 +1128,6 @@ static int ace_alloc(struct device *dev, int id, resource_size_t physaddr, int rc; dev_dbg(dev, "ace_alloc(%p)\n", dev); - if (!physaddr) { - rc = -ENODEV; - goto err_noreg; - } - /* Allocate and initialize the ace device structure */ ace = kzalloc(sizeof(struct ace_device), GFP_KERNEL); if (!ace) { @@ -1153,7 +1153,6 @@ static int ace_alloc(struct device *dev, int id, resource_size_t physaddr, dev_set_drvdata(dev, NULL); kfree(ace); err_alloc: -err_noreg: dev_err(dev, "could not initialize device, err=%i\n", rc); return rc; } @@ -1176,10 +1175,11 @@ static void ace_free(struct device *dev) static int ace_probe(struct platform_device *dev) { - resource_size_t physaddr = 0; int bus_width = ACE_BUS_WIDTH_16; /* FIXME: should not be hard coded */ + resource_size_t physaddr; + struct resource *res; u32 id = dev->id; - int irq = 0; + int irq; int i; dev_dbg(&dev->dev, "ace_probe(%p)\n", dev); @@ -1190,12 +1190,15 @@ static int ace_probe(struct platform_device *dev) if (of_find_property(dev->dev.of_node, "8-bit", NULL)) bus_width = ACE_BUS_WIDTH_8; - for (i = 0; i < dev->num_resources; i++) { - if (dev->resource[i].flags & IORESOURCE_MEM) - physaddr = dev->resource[i].start; - if (dev->resource[i].flags & IORESOURCE_IRQ) - irq = dev->resource[i].start; - } + res = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + physaddr = res->start; + if (!physaddr) + return -ENODEV; + + irq = platform_get_irq_optional(dev, 0); /* Call the bus-independent setup code */ return ace_alloc(&dev->dev, id, physaddr, irq, bus_width); diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 09346ae308eb..78cc64b42b30 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -47,7 +47,7 @@ enum { struct intel_tlv { u8 type; u8 len; - u8 val[0]; + u8 val[]; } __packed; struct intel_version_tlv { diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index a5ef9faf71c7..e7f0d4ae0f96 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1176,8 +1176,10 @@ mptscsih_remove(struct pci_dev *pdev) MPT_SCSI_HOST *hd; int sz1; - if((hd = shost_priv(host)) == NULL) - return; + if (host == NULL) + hd = NULL; + else + hd = shost_priv(host); mptscsih_shutdown(pdev); @@ -1193,14 +1195,15 @@ mptscsih_remove(struct pci_dev *pdev) "Free'd ScsiLookup (%d) memory\n", ioc->name, sz1)); - kfree(hd->info_kbuf); + if (hd) + kfree(hd->info_kbuf); /* NULL the Scsi_Host pointer */ ioc->sh = NULL; - scsi_host_put(host); - + if (host) + scsi_host_put(host); mpt_detach(pdev); } diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h index 8bac86c4d86b..df2fb9520dd8 100644 --- a/drivers/misc/mei/hw.h +++ b/drivers/misc/mei/hw.h @@ -224,7 +224,7 @@ struct mei_ext_hdr { u8 type; u8 length; u8 ext_payload[2]; - u8 hdr[0]; + u8 hdr[]; }; /** @@ -238,7 +238,7 @@ struct mei_ext_meta_hdr { u8 count; u8 size; u8 reserved[2]; - struct mei_ext_hdr hdrs[0]; + struct mei_ext_hdr hdrs[]; }; /* @@ -308,7 +308,7 @@ struct mei_msg_hdr { u32 dma_ring:1; u32 internal:1; u32 msg_complete:1; - u32 extension[0]; + u32 extension[]; } __packed; /* The length is up to 9 bits */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index c81be32bcedf..827f74e86d34 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -402,7 +402,7 @@ struct enetc_psfp_gate { u32 num_entries; refcount_t refcount; struct hlist_node node; - struct action_gate_entry entries[0]; + struct action_gate_entry entries[]; }; /* Only enable the green color frame now diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 281de8326bc5..015796a20118 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -198,7 +198,7 @@ static_assert(sizeof(struct stats) == 16); struct gve_stats_report { __be64 written_count; - struct stats stats[0]; + struct stats stats[]; }; static_assert(sizeof(struct gve_stats_report) == 8); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 48a433154ce0..02e7d74779f4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -116,9 +116,8 @@ static int gve_alloc_stats_report(struct gve_priv *priv) priv->tx_cfg.num_queues; rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * priv->rx_cfg.num_queues; - priv->stats_report_len = sizeof(struct gve_stats_report) + - (tx_stats_num + rx_stats_num) * - sizeof(struct stats); + priv->stats_report_len = struct_size(priv->stats_report, stats, + tx_stats_num + rx_stats_num); priv->stats_report = dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, &priv->stats_report_bus, GFP_KERNEL); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 95ef4943d8bd..376096bfc54a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2125,7 +2125,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); - if (ret) + if (ret && !nvme_first_scan(ns->disk)) return ret; } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3c002bdcace3..f4c246462658 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -146,7 +146,8 @@ struct nvme_fc_rport { /* fc_ctrl flags values - specified as bit positions */ #define ASSOC_ACTIVE 0 -#define FCCTRL_TERMIO 1 +#define ASSOC_FAILED 1 +#define FCCTRL_TERMIO 2 struct nvme_fc_ctrl { spinlock_t lock; @@ -157,7 +158,6 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - atomic_t err_work_active; u64 association_id; struct nvmefc_ls_rcv_op *rcv_disconn; @@ -167,7 +167,6 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set tag_set; struct delayed_work connect_work; - struct work_struct err_work; struct kref ref; unsigned long flags; @@ -2414,24 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) nvme_fc_ctrl_put(ctrl); } +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static bool +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + + __nvme_fc_abort_op(ctrl, op); + return true; +} + +/* + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically be called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. + */ +static void +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) +{ + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_start_queues(&ctrl->ctrl); + } + + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ + + /* + * clean up the admin queue. Same thing as above. + */ + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); +} + static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - int active; - /* - * if an error (io timeout, etc) while (re)connecting, - * it's an error on creating the new association. - * Start the error recovery thread if it hasn't already - * been started. It is expected there could be multiple - * ios hitting this path before things are cleaned up. + * if an error (io timeout, etc) while (re)connecting, the remote + * port requested terminating of the association (disconnect_ls) + * or an error (timeout or abort) occurred on an io while creating + * the controller. Abort any ios on the association and let the + * create_association error path resolve things. */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - active = atomic_xchg(&ctrl->err_work_active, 1); - if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { - atomic_set(&ctrl->err_work_active, 0); - WARN_ON(1); - } + __nvme_fc_abort_outstanding_ios(ctrl, true); + set_bit(ASSOC_FAILED, &ctrl->flags); return; } @@ -2745,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } -/* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. - */ -static bool -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) -{ - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - - __nvme_fc_abort_op(ctrl, op); - return true; -} - static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, @@ -2988,6 +3036,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->cnum, ctrl->lport->localport.port_name, ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + clear_bit(ASSOC_FAILED, &ctrl->flags); + /* * Create the admin queue */ @@ -3016,7 +3066,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ ret = nvme_enable_ctrl(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; @@ -3026,7 +3076,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ret = nvme_init_identify(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; /* sanity checks */ @@ -3071,9 +3121,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ret = nvme_fc_create_io_queues(ctrl); else ret = nvme_fc_recreate_io_queues(ctrl); - if (ret) - goto out_term_aen_ops; } + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + goto out_term_aen_ops; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -3107,60 +3157,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } -/* - * This routine runs through all outstanding commands on the association - * and aborts them. This routine is typically be called by the - * delete_association routine. It is also called due to an error during - * reconnect. In that scenario, it is most likely a command that initializes - * the controller, including fabric Connect commands on io queues, that - * may have timed out or failed thus the io must be killed for the connect - * thread to see the error. - */ -static void -__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) -{ - /* - * If io queues are present, stop them and terminate all outstanding - * ios on them. As FC allocates FC exchange for each io, the - * transport must contact the LLDD to terminate the exchange, - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() - * to tell us what io's are busy and invoke a transport routine - * to kill them with the LLDD. After terminating the exchange - * the LLDD will call the transport's normal io done path, but it - * will have an aborted status. The done path will return the - * io requests back to the block layer as part of normal completions - * (but with error status). - */ - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->tag_set); - if (start_queues) - nvme_start_queues(&ctrl->ctrl); - } - - /* - * Other transports, which don't have link-level contexts bound - * to sqe's, would try to gracefully shutdown the controller by - * writing the registers for shutdown and polling (call - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially - * just aborted and we will wait on those contexts, and given - * there was no indication of how live the controlelr is on the - * link, don't send more io to create more contexts for the - * shutdown. Let the controller fail via keepalive failure if - * its still present. - */ - - /* - * clean up the admin queue. Same thing as above. - */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); -} - /* * This routine stops operation of the controller on the host side. * On the host os stack side: Admin and IO queues are stopped, @@ -3237,7 +3233,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3291,79 +3286,35 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) } } -static void -__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) -{ - /* - * if state is CONNECTING - the error occurred as part of a - * reconnect attempt. Abort any ios on the association and - * let the create_association error paths resolve things. - */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - __nvme_fc_abort_outstanding_ios(ctrl, true); - return; - } - - /* - * For any other state, kill the association. As this routine - * is a common io abort routine for resetting and such, after - * the association is terminated, ensure that the state is set - * to CONNECTING. - */ - - nvme_stop_keep_alive(&ctrl->ctrl); - - /* will block will waiting for io to terminate */ - nvme_fc_delete_association(ctrl); - - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: error_recovery: Couldn't change state " - "to CONNECTING\n", ctrl->cnum); -} - static void nvme_fc_reset_ctrl_work(struct work_struct *work) { struct nvme_fc_ctrl *ctrl = container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - int ret; - - __nvme_fc_terminate_io(ctrl); nvme_stop_ctrl(&ctrl->ctrl); - if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) - ret = nvme_fc_create_association(ctrl); - else - ret = -ENOTCONN; + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", - ctrl->cnum); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to CONNECTING\n", ctrl->cnum); + + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to schedule connect " + "after reset\n", ctrl->cnum); + } else { + flush_delayed_work(&ctrl->connect_work); + } + } else { + nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN); + } } -static void -nvme_fc_connect_err_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, err_work); - - __nvme_fc_terminate_io(ctrl); - - atomic_set(&ctrl->err_work_active, 0); - - /* - * Rescheduling the connection after recovering - * from the io error is left to the reconnect work - * item, which is what should have stalled waiting on - * the io that had the error that scheduled this work. - */ -} static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", @@ -3491,7 +3442,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); @@ -3499,7 +3449,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); - INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3592,7 +3541,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&ctrl->ctrl.reset_work); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); ctrl->ctrl.opts = NULL; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8bbc48cc45dc..541b0cba6d80 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1768,6 +1768,14 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + /* sanity checking for received data length */ + if (unlikely(wc->byte_len < len)) { + dev_err(queue->ctrl->ctrl.device, + "Unexpected nvme completion length(%d)\n", wc->byte_len); + nvme_rdma_error_recovery(queue->ctrl); + return; + } + ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); /* * AEN requests are special as they don't time out and can diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index aafcbc424b7a..957b39a82431 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -907,8 +907,6 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; - trace_nvmet_req_init(req, req->cmd); - /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { req->error_loc = offsetof(struct nvme_common_command, flags); @@ -938,6 +936,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, if (status) goto fail; + trace_nvmet_req_init(req, req->cmd); + if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 0458046d6501..c14e3249a14d 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -46,19 +46,12 @@ static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) return req->sq->ctrl; } -static inline void __assign_disk_name(char *name, struct nvmet_req *req, - bool init) +static inline void __assign_req_name(char *name, struct nvmet_req *req) { - struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req); - struct nvmet_ns *ns; - - if ((init && req->sq->qid) || (!init && req->cq->qid)) { - ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid); - strncpy(name, ns->device_path, DISK_NAME_LEN); - return; - } - - memset(name, 0, DISK_NAME_LEN); + if (req->ns) + strncpy(name, req->ns->device_path, DISK_NAME_LEN); + else + memset(name, 0, DISK_NAME_LEN); } #endif @@ -81,7 +74,7 @@ TRACE_EVENT(nvmet_req_init, TP_fast_assign( __entry->cmd = cmd; __entry->ctrl = nvmet_req_to_ctrl(req); - __assign_disk_name(__entry->disk, req, true); + __assign_req_name(__entry->disk, req); __entry->qid = req->sq->qid; __entry->cid = cmd->common.command_id; __entry->opcode = cmd->common.opcode; @@ -121,7 +114,7 @@ TRACE_EVENT(nvmet_req_complete, __entry->cid = req->cqe->command_id; __entry->result = le64_to_cpu(req->cqe->result.u64); __entry->status = le16_to_cpu(req->cqe->status) >> 1; - __assign_disk_name(__entry->disk, req, false); + __assign_req_name(__entry->disk, req); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", __print_ctrl_name(__entry->ctrl), diff --git a/drivers/of/device.c b/drivers/of/device.c index 655dee422563..3a469c79e6b0 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -93,7 +93,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, { const struct iommu_ops *iommu; const struct bus_dma_region *map = NULL; - dma_addr_t dma_start = 0; + u64 dma_start = 0; u64 mask, end, size = 0; bool coherent; int ret; @@ -109,10 +109,10 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, return ret == -ENODEV ? 0 : ret; } else { const struct bus_dma_region *r = map; - dma_addr_t dma_end = 0; + u64 dma_end = 0; /* Determine the overall bounds of all DMA regions */ - for (dma_start = ~(dma_addr_t)0; r->size; r++) { + for (dma_start = ~0ULL; r->size; r++) { /* Take lower and upper limits */ if (r->dma_start < dma_start) dma_start = r->dma_start; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 128583dfccf2..c8dd8588f800 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -445,7 +445,7 @@ static int hisi_sas_task_prep(struct sas_task *task, } } - if (scmd) { + if (scmd && hisi_hba->shost->nr_hw_queues) { unsigned int dq_index; u32 blk_tag; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index b1f3017b6547..29fcc44be2d5 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) spin_unlock_irqrestore(hostdata->host->host_lock, flags); } +/** + * ibmvscsi_set_request_limit - Set the adapter request_limit in response to + * an adapter failure, reset, or SRP Login. Done under host lock to prevent + * race with SCSI command submission. + * @hostdata: adapter to adjust + * @limit: new request limit + */ +static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit) +{ + unsigned long flags; + + spin_lock_irqsave(hostdata->host->host_lock, flags); + atomic_set(&hostdata->request_limit, limit); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +} + /** * ibmvscsi_reset_host - Reset the connection to the server * @hostdata: struct ibmvscsi_host_data to reset @@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata) { scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); purge_requests(hostdata, DID_ERROR); hostdata->action = IBMVSCSI_HOST_ACTION_RESET; @@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct) dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n", evt_struct->xfer_iu->srp.login_rej.reason); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; default: dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n", evt_struct->xfer_iu->srp.login_rsp.opcode); /* Login failed. */ - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); return; } @@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct) * This value is set rather than added to request_limit because * request_limit could have been set to -1 by this client. */ - atomic_set(&hostdata->request_limit, + ibmvscsi_set_request_limit(hostdata, be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta)); /* If we had any pending I/Os, kick them */ @@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata) login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); - spin_lock_irqsave(hostdata->host->host_lock, flags); /* Start out with a request limit of 0, since this is negotiated in * the login request we are just sending and login requests always * get sent by the driver regardless of request_limit. */ - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); + spin_lock_irqsave(hostdata->host->host_lock, flags); rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2); spin_unlock_irqrestore(hostdata->host->host_lock, flags); dev_info(hostdata->dev, "sent SRP login\n"); @@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, return; case VIOSRP_CRQ_XPORT_EVENT: /* Hypervisor telling us the connection is closed */ scsi_block_requests(hostdata->host); - atomic_set(&hostdata->request_limit, 0); + ibmvscsi_set_request_limit(hostdata, 0); if (crq->format == 0x06) { /* We need to re-setup the interpartition connection */ dev_info(hostdata->dev, "Re-enabling adapter!\n"); @@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata) } hostdata->action = IBMVSCSI_HOST_ACTION_NONE; + spin_unlock_irqrestore(hostdata->host->host_lock, flags); if (rc) { - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); dev_err(hostdata->dev, "error after %s\n", action); } - spin_unlock_irqrestore(hostdata->host->host_lock, flags); scsi_unblock_requests(hostdata->host); } @@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) init_waitqueue_head(&hostdata->work_wait_q); hostdata->host = host; hostdata->dev = dev; - atomic_set(&hostdata->request_limit, -1); + ibmvscsi_set_request_limit(hostdata, -1); hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT; if (map_persist_bufs(hostdata)) { diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 1f9005125313..b7a1dc24db38 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -554,10 +554,12 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, fcport = qla_rport->fcport; - if (!qpair || !fcport || (qpair && !qpair->fw_started) || - (fcport && fcport->deleted)) + if (!qpair || !fcport) return -ENODEV; + if (!qpair->fw_started || fcport->deleted) + return -EBUSY; + vha = fcport->vha; if (!(fcport->nvme_flag & NVME_FLAG_REGISTERED)) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f2437a7570ce..9af50e6f94c4 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1714,15 +1714,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { - struct async_scan_data *data; + struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; + mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); - return NULL; + goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); @@ -1733,7 +1734,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) goto err; init_completion(&data->prev_finished); - mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); @@ -1748,6 +1748,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) return data; err: + mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index ea84d08747df..590e6d072228 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -194,7 +194,7 @@ struct tcmu_tmr { uint8_t tmr_type; uint32_t tmr_cmd_cnt; - int16_t tmr_cmd_ids[0]; + int16_t tmr_cmd_ids[]; }; /* diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index ef1c550f8266..4b6195666c58 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr u64 paend; struct scatterlist *sg; struct device *dma = mvdev->mdev->device; - int ret; for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { @@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr done: mr->log_size = log_entity_size; mr->nsg = nsg; - ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!ret) + err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!err) goto err_map; err = create_direct_mr(mvdev, mr); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 2629911c29bb..6a90fdb9cbfc 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -38,6 +38,10 @@ static int batch_mapping = 1; module_param(batch_mapping, int, 0444); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); +static char *macaddr; +module_param(macaddr, charp, 0); +MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); + struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; @@ -60,7 +64,8 @@ struct vdpasim_virtqueue { static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM); + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | + (1ULL << VIRTIO_NET_F_MAC); /* State of each vdpasim device */ struct vdpasim { @@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void) spin_lock_init(&vdpasim->iommu_lock); dev = &vdpasim->vdpa.dev; - dev->coherent_dma_mask = DMA_BIT_MASK(64); + dev->dma_mask = &dev->coherent_dma_mask; + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) + goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); vdpasim->iommu = vhost_iotlb_alloc(2048, 0); @@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim->buffer) goto err_iommu; - eth_random_addr(vdpasim->config.mac); + if (macaddr) { + mac_pton(macaddr, vdpasim->config.mac); + if (!is_valid_ether_addr(vdpasim->config.mac)) { + ret = -EADDRNOTAVAIL; + goto err_iommu; + } + } else { + eth_random_addr(vdpasim->config.mac); + } vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); @@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa) return vdpasim->generation; } +static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) +{ + struct vdpa_iova_range range = { + .first = 0ULL, + .last = ULLONG_MAX, + }; + + return range; +} + static int vdpasim_set_map(struct vdpa_device *vdpa, struct vhost_iotlb *iotlb) { @@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, .free = vdpasim_free, @@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, .set_map = vdpasim_set_map, .free = vdpasim_free, }; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a2dbc85e0b0d..2754f3069738 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -47,6 +47,7 @@ struct vhost_vdpa { int minor; struct eventfd_ctx *config_ctx; int in_batch; + struct vdpa_iova_range range; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) vq->call_ctx.producer.token = vq->call_ctx.ctx; vq->call_ctx.producer.irq = irq; ret = irq_bypass_register_producer(&vq->call_ctx.producer); + if (unlikely(ret)) + dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", + qid, vq->call_ctx.producer.token, ret); } static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) @@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) return 0; } +static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vhost_vdpa_iova_range range = { + .first = v->range.first, + .last = v->range.last, + }; + + return copy_to_user(argp, &range, sizeof(range)); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, void __user *argp = (void __user *)arg; u64 __user *featurep = argp; u64 features; - long r; + long r = 0; if (cmd == VHOST_SET_BACKEND_FEATURES) { - r = copy_from_user(&features, featurep, sizeof(features)); - if (r) - return r; + if (copy_from_user(&features, featurep, sizeof(features))) + return -EFAULT; if (features & ~VHOST_VDPA_BACKEND_FEATURES) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); @@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; - r = copy_to_user(featurep, &features, sizeof(features)); + if (copy_to_user(featurep, &features, sizeof(features))) + r = -EFAULT; + break; + case VHOST_VDPA_GET_IOVA_RANGE: + r = vhost_vdpa_get_iova_range(v, argp); break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); @@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - struct vm_area_struct **vmas; + unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; - unsigned long map_pfn, last_pfn = 0; - unsigned long npages, lock_limit; - unsigned long i, nmap = 0; + unsigned long npages, cur_base, map_pfn, last_pfn = 0; + unsigned long locked, lock_limit, pinned, i; u64 iova = msg->iova; - long pinned; int ret = 0; + if (msg->iova < v->range.first || + msg->iova + msg->size - 1 > v->range.last) + return -EINVAL; + if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; - page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!page_list || !vmas) { - ret = -ENOMEM; - goto free; - } - mmap_read_lock(dev->mm); + locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { + + if (locked > lock_limit) { ret = -ENOMEM; - goto unlock; - } - - pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, - page_list, vmas); - if (npages != pinned) { - if (pinned < 0) { - ret = pinned; - } else { - unpin_user_pages(page_list, pinned); - ret = -ENOMEM; - } - goto unlock; + goto out; } + cur_base = msg->uaddr & PAGE_MASK; iova &= PAGE_MASK; - map_pfn = page_to_pfn(page_list[0]); - /* One more iteration to avoid extra vdpa_map() call out of loop. */ - for (i = 0; i <= npages; i++) { - unsigned long this_pfn; - u64 csize; + while (npages) { + pinned = min_t(unsigned long, npages, list_size); + ret = pin_user_pages(cur_base, pinned, + gup_flags, page_list, NULL); + if (ret != pinned) + goto out; - /* The last chunk may have no valid PFN next to it */ - this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; + if (!last_pfn) + map_pfn = page_to_pfn(page_list[0]); - if (last_pfn && (this_pfn == -1UL || - this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = last_pfn - map_pfn + 1; - ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, - msg->perm); - if (ret) { - /* - * Unpin the rest chunks of memory on the - * flight with no corresponding vdpa_map() - * calls having been made yet. On the other - * hand, vdpa_unmap() in the failure path - * is in charge of accounting the number of - * pinned pages for its own. - * This asymmetrical pattern of accounting - * is for efficiency to pin all pages at - * once, while there is no other callsite - * of vdpa_map() than here above. - */ - unpin_user_pages(&page_list[nmap], - npages - nmap); - goto out; + for (i = 0; i < ret; i++) { + unsigned long this_pfn = page_to_pfn(page_list[i]); + u64 csize; + + if (last_pfn && (this_pfn != last_pfn + 1)) { + /* Pin a contiguous chunk of memory */ + csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; + if (vhost_vdpa_map(v, iova, csize, + map_pfn << PAGE_SHIFT, + msg->perm)) + goto out; + map_pfn = this_pfn; + iova += csize; } - atomic64_add(csize, &dev->mm->pinned_vm); - nmap += csize; - iova += csize << PAGE_SHIFT; - map_pfn = this_pfn; + + last_pfn = this_pfn; } - last_pfn = this_pfn; + + cur_base += ret << PAGE_SHIFT; + npages -= ret; } - WARN_ON(nmap != npages); + /* Pin the rest chunk */ + ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, + map_pfn << PAGE_SHIFT, msg->perm); out: - if (ret) + if (ret) { vhost_vdpa_unmap(v, msg->iova, msg->size); -unlock: + atomic64_sub(npages, &dev->mm->pinned_vm); + } mmap_read_unlock(dev->mm); -free: - kvfree(vmas); - kvfree(page_list); + free_page((unsigned long)page_list); return ret; } @@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v) v->domain = NULL; } +static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) +{ + struct vdpa_iova_range *range = &v->range; + struct iommu_domain_geometry geo; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->get_iova_range) { + *range = ops->get_iova_range(vdpa); + } else if (v->domain && + !iommu_domain_get_attr(v->domain, + DOMAIN_ATTR_GEOMETRY, &geo) && + geo.force_aperture) { + range->first = geo.aperture_start; + range->last = geo.aperture_end; + } else { + range->first = 0; + range->last = ULLONG_MAX; + } +} + static int vhost_vdpa_open(struct inode *inode, struct file *filep) { struct vhost_vdpa *v; @@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) if (r) goto err_init_iotlb; + vhost_vdpa_set_iova_range(v); + filep->private_data = v; return 0; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b6b3d052ca86..fa50e8936f5f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1690,7 +1690,7 @@ struct elf_thread_core_info { struct elf_thread_core_info *next; struct task_struct *task; struct elf_prstatus prstatus; - struct memelfnote notes[0]; + struct memelfnote notes[]; }; struct elf_note_info { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b3268f4ea5f3..771a036867dc 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, int level = ref->level; struct btrfs_key search_key = ref->key_for_search; - root = btrfs_get_fs_root(fs_info, ref->root_id, false); + /* + * If we're search_commit_root we could possibly be holding locks on + * other tree nodes. This happens when qgroups does backref walks when + * adding new delayed refs. To deal with this we need to look in cache + * for the root, and if we don't find it then we need to search the + * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage + * here. + */ + if (path->search_commit_root) + root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id); + else + root = btrfs_get_fs_root(fs_info, ref->root_id, false); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out_free; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c0f1d6818df7..3ba6f3839d39 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) key.offset = 0; btrfs_release_path(path); } + btrfs_release_path(path); list_for_each_entry(space_info, &info->space_info, list) { int i; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index aac3d6f4e35b..0378933d163c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct extent_buffer *eb, int err); +void btrfs_reada_remove_dev(struct btrfs_device *dev); +void btrfs_reada_undo_remove_dev(struct btrfs_device *dev); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 4a0243cb9d97..5b9e3f3ace22 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + if (!scrub_ret) + btrfs_reada_remove_dev(src_device); + /* * We have to use this loop approach because at this point src_device * has to be available for transaction commit to complete, yet new @@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, while (1) { trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { + btrfs_reada_undo_remove_dev(src_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return PTR_ERR(trans); } @@ -746,6 +750,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, up_write(&dev_replace->rwsem); mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + btrfs_reada_undo_remove_dev(src_device); btrfs_rm_dev_replace_blocked(fs_info); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(tgt_device); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e3438672a82..af97ddcc6b3e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, return 0; } -struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, - struct btrfs_key *key) +static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, + struct btrfs_path *path, + struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_fs_info *fs_info = tree_root->fs_info; - struct btrfs_path *path; u64 generation; int ret; int level; - path = btrfs_alloc_path(); - if (!path) - return ERR_PTR(-ENOMEM); - root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS); - if (!root) { - ret = -ENOMEM; - goto alloc_fail; - } + if (!root) + return ERR_PTR(-ENOMEM); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); if (ret) { if (ret > 0) ret = -ENOENT; - goto find_fail; + goto fail; } generation = btrfs_root_generation(&root->root_item); @@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; - goto find_fail; + goto fail; } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { ret = -EIO; - goto find_fail; + goto fail; } root->commit_root = btrfs_root_node(root); -out: - btrfs_free_path(path); return root; - -find_fail: +fail: btrfs_put_root(root); -alloc_fail: - root = ERR_PTR(ret); - goto out; + return ERR_PTR(ret); +} + +struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key) +{ + struct btrfs_root *root; + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return ERR_PTR(-ENOMEM); + root = read_tree_root_path(tree_root, path, key); + btrfs_free_path(path); + + return root; } /* @@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, return root; } +static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info, + u64 objectid) +{ + if (objectid == BTRFS_ROOT_TREE_OBJECTID) + return btrfs_grab_root(fs_info->tree_root); + if (objectid == BTRFS_EXTENT_TREE_OBJECTID) + return btrfs_grab_root(fs_info->extent_root); + if (objectid == BTRFS_CHUNK_TREE_OBJECTID) + return btrfs_grab_root(fs_info->chunk_root); + if (objectid == BTRFS_DEV_TREE_OBJECTID) + return btrfs_grab_root(fs_info->dev_root); + if (objectid == BTRFS_CSUM_TREE_OBJECTID) + return btrfs_grab_root(fs_info->csum_root); + if (objectid == BTRFS_QUOTA_TREE_OBJECTID) + return btrfs_grab_root(fs_info->quota_root) ? + fs_info->quota_root : ERR_PTR(-ENOENT); + if (objectid == BTRFS_UUID_TREE_OBJECTID) + return btrfs_grab_root(fs_info->uuid_root) ? + fs_info->uuid_root : ERR_PTR(-ENOENT); + if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) + return btrfs_grab_root(fs_info->free_space_root) ? + fs_info->free_space_root : ERR_PTR(-ENOENT); + return NULL; +} + int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { @@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, struct btrfs_key key; int ret; - if (objectid == BTRFS_ROOT_TREE_OBJECTID) - return btrfs_grab_root(fs_info->tree_root); - if (objectid == BTRFS_EXTENT_TREE_OBJECTID) - return btrfs_grab_root(fs_info->extent_root); - if (objectid == BTRFS_CHUNK_TREE_OBJECTID) - return btrfs_grab_root(fs_info->chunk_root); - if (objectid == BTRFS_DEV_TREE_OBJECTID) - return btrfs_grab_root(fs_info->dev_root); - if (objectid == BTRFS_CSUM_TREE_OBJECTID) - return btrfs_grab_root(fs_info->csum_root); - if (objectid == BTRFS_QUOTA_TREE_OBJECTID) - return btrfs_grab_root(fs_info->quota_root) ? - fs_info->quota_root : ERR_PTR(-ENOENT); - if (objectid == BTRFS_UUID_TREE_OBJECTID) - return btrfs_grab_root(fs_info->uuid_root) ? - fs_info->uuid_root : ERR_PTR(-ENOENT); - if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) - return btrfs_grab_root(fs_info->free_space_root) ? - fs_info->free_space_root : ERR_PTR(-ENOENT); + root = btrfs_get_global_root(fs_info, objectid); + if (root) + return root; again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { @@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } +/* + * btrfs_get_fs_root_commit_root - return a root for the given objectid + * @fs_info: the fs_info + * @objectid: the objectid we need to lookup + * + * This is exclusively used for backref walking, and exists specifically because + * of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref + * creation time, which means we may have to read the tree_root in order to look + * up a fs root that is not in memory. If the root is not in memory we will + * read the tree root commit root and look up the fs root from there. This is a + * temporary root, it will not be inserted into the radix tree as it doesn't + * have the most uptodate information, it'll simply be discarded once the + * backref code is finished using the root. + */ +struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_root *root; + struct btrfs_key key; + + ASSERT(path->search_commit_root && path->skip_locking); + + /* + * This can return -ENOENT if we ask for a root that doesn't exist, but + * since this is called via the backref walking code we won't be looking + * up a root that doesn't exist, unless there's corruption. So if root + * != NULL just return it. + */ + root = btrfs_get_global_root(fs_info, objectid); + if (root) + return root; + + root = btrfs_lookup_fs_root(fs_info, objectid); + if (root) + return root; + + key.objectid = objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = read_tree_root_path(fs_info->tree_root, path, &key); + btrfs_release_path(path); + + return root; +} + /* * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fee69ced58b4..182540bdcea0 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, dev_t anon_dev); +struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, + struct btrfs_path *path, + u64 objectid); void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3b21fee13e77..5fd60b13f4f8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_tree_block_info *bi; if (item_size < sizeof(*ei) + sizeof(*bi)) { btrfs_crit(info, -"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu", +"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu", key.objectid, key.type, key.offset, owner_objectid, item_size, sizeof(*ei) + sizeof(*bi)); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0ff659455b1e..87355a38a654 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); ret = btrfs_direct_IO(iocb, to); inode_unlock_shared(inode); - if (ret < 0) + if (ret < 0 || !iov_iter_count(to) || + iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp))) return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 936c3137c646..da58c58ef9aa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, * clear_offset by our extent size. */ clear_offset += ins.offset; - btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset); + /* + * Now that we inserted the prealloc extent we can finally + * decrement the number of reservations in the block group. + * If we did it before, we could race with relocation and have + * relocation miss the reserved extent, making it fail later. + */ + btrfs_dec_block_group_reservations(fs_info, ins.objectid); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_free_reserved_extent(fs_info, ins.objectid, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 580899bdb991..c54ea6586632 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.type == BTRFS_ROOT_REF_KEY) { + + /* Release locks on tree_root before we access quota_root */ + btrfs_release_path(path); + ret = add_qgroup_item(trans, quota_root, found_key.offset); if (ret) { @@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) btrfs_abort_transaction(trans, ret); goto out_free_path; } + ret = btrfs_search_slot_for_read(tree_root, &found_key, + path, 1, 0); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + goto out_free_path; + } + if (ret > 0) { + /* + * Shouldn't happen, but in case it does we + * don't need to do the btrfs_next_item, just + * continue. + */ + continue; + } } ret = btrfs_next_item(tree_root, path); if (ret < 0) { diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 9d4f5316a7e8..d9a166eb344e 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!dev->bdev) continue; + if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state)) + continue; + if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { /* @@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, } have_zone = 1; } + if (!have_zone) + radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); up_read(&fs_info->dev_replace.rwsem); @@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle) kref_put(&rc->refcnt, reada_control_release); } + +/* + * Before removing a device (device replace or device remove ioctls), call this + * function to wait for all existing readahead requests on the device and to + * make sure no one queues more readahead requests for the device. + * + * Must be called without holding neither the device list mutex nor the device + * replace semaphore, otherwise it will deadlock. + */ +void btrfs_reada_remove_dev(struct btrfs_device *dev) +{ + struct btrfs_fs_info *fs_info = dev->fs_info; + + /* Serialize with readahead extent creation at reada_find_extent(). */ + spin_lock(&fs_info->reada_lock); + set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&fs_info->reada_lock); + + /* + * There might be readahead requests added to the radix trees which + * were not yet added to the readahead work queue. We need to start + * them and wait for their completion, otherwise we can end up with + * use-after-free problems when dropping the last reference on the + * readahead extents and their zones, as they need to access the + * device structure. + */ + reada_start_machine(fs_info); + btrfs_flush_workqueue(fs_info->readahead_workers); +} + +/* + * If when removing a device (device replace or device remove ioctls) an error + * happens after calling btrfs_reada_remove_dev(), call this to undo what that + * function did. This is safe to call even if btrfs_reada_remove_dev() was not + * called before. + */ +void btrfs_reada_undo_remove_dev(struct btrfs_device *dev) +{ + spin_lock(&dev->fs_info->reada_lock); + clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state); + spin_unlock(&dev->fs_info->reada_lock); +} diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index f0ffd5ee77bd..8784b74f5232 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, u64 type; u64 features; bool mixed = false; + int raid_index; + int nparity; + int ncopies; length = btrfs_chunk_length(leaf, chunk); stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); type = btrfs_chunk_type(leaf, chunk); + raid_index = btrfs_bg_flags_to_raid_index(type); + ncopies = btrfs_raid_array[raid_index].ncopies; + nparity = btrfs_raid_array[raid_index].nparity; if (!num_stripes) { chunk_err(leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); return -EUCLEAN; } + if (num_stripes < ncopies) { + chunk_err(leaf, chunk, logical, + "invalid chunk num_stripes < ncopies, have %u < %d", + num_stripes, ncopies); + return -EUCLEAN; + } + if (nparity && num_stripes == nparity) { + chunk_err(leaf, chunk, logical, + "invalid chunk num_stripes == nparity, have %u == %d", + num_stripes, nparity); + return -EUCLEAN; + } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { chunk_err(leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 58b9c419a2b6..b1e48078c318 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - btrfs_device_data_ordered_init(dev); + btrfs_device_data_ordered_init(dev, fs_info); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); extent_io_tree_init(fs_info, &dev->alloc_state, @@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + if (!ret) + btrfs_reada_remove_dev(device); mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -2179,6 +2181,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, return ret; error_undo: + btrfs_reada_undo_remove_dev(device); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); list_add(&device->dev_alloc_list, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bf27ac07d315..232f02bd214f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -39,10 +39,10 @@ struct btrfs_io_geometry { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include #define __BTRFS_NEED_DEVICE_DATA_ORDERED -#define btrfs_device_data_ordered_init(device) \ - seqcount_init(&device->data_seqcount) +#define btrfs_device_data_ordered_init(device, info) \ + seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex) #else -#define btrfs_device_data_ordered_init(device) do { } while (0) +#define btrfs_device_data_ordered_init(device, info) do { } while (0) #endif #define BTRFS_DEV_STATE_WRITEABLE (0) @@ -50,6 +50,7 @@ struct btrfs_io_geometry { #define BTRFS_DEV_STATE_MISSING (2) #define BTRFS_DEV_STATE_REPLACE_TGT (3) #define BTRFS_DEV_STATE_FLUSH_SENT (4) +#define BTRFS_DEV_STATE_NO_READA (5) struct btrfs_device { struct list_head dev_list; /* device_list_mutex */ @@ -71,7 +72,8 @@ struct btrfs_device { blk_status_t last_flush_error; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED - seqcount_t data_seqcount; + /* A seqcount_t with associated chunk_mutex (for lockdep) */ + seqcount_mutex_t data_seqcount; #endif /* the internal btrfs device id */ @@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ - preempt_disable(); \ write_seqcount_begin(&dev->data_seqcount); \ dev->name = size; \ write_seqcount_end(&dev->data_seqcount); \ - preempt_enable(); \ } #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index dcc2aab1b2c4..4ba45caf5939 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -60,7 +60,7 @@ struct hfs_bnode { wait_queue_head_t lock_wq; atomic_t refcnt; unsigned int page_offset; - struct page *page[0]; + struct page *page[]; }; #define HFS_BNODE_ERROR 0 diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 3b03fff68543..a92de5199ec3 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -117,7 +117,7 @@ struct hfs_bnode { wait_queue_head_t lock_wq; atomic_t refcnt; unsigned int page_offset; - struct page *page[0]; + struct page *page[]; }; #define HFS_BNODE_LOCK 0 diff --git a/fs/io_uring.c b/fs/io_uring.c index b42dfa0243bf..a7429c977eb3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1365,6 +1365,9 @@ static void io_prep_async_work(struct io_kiocb *req) io_req_init_async(req); id = req->work.identity; + if (req->flags & REQ_F_FORCE_ASYNC) + req->work.flags |= IO_WQ_WORK_CONCURRENT; + if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) io_wq_hash_work(&req->work, file_inode(req->file)); @@ -1846,59 +1849,39 @@ static void __io_free_req(struct io_kiocb *req) percpu_ref_put(&ctx->refs); } -static bool io_link_cancel_timeout(struct io_kiocb *req) +static void io_kill_linked_timeout(struct io_kiocb *req) { - struct io_timeout_data *io = req->async_data; struct io_ring_ctx *ctx = req->ctx; - int ret; - - ret = hrtimer_try_to_cancel(&io->timer); - if (ret != -1) { - io_cqring_fill_event(req, -ECANCELED); - io_commit_cqring(ctx); - req->flags &= ~REQ_F_LINK_HEAD; - io_put_req_deferred(req, 1); - return true; - } - - return false; -} - -static bool __io_kill_linked_timeout(struct io_kiocb *req) -{ struct io_kiocb *link; - bool wake_ev; + bool cancelled = false; + unsigned long flags; - if (list_empty(&req->link_list)) - return false; - link = list_first_entry(&req->link_list, struct io_kiocb, link_list); - if (link->opcode != IORING_OP_LINK_TIMEOUT) - return false; + spin_lock_irqsave(&ctx->completion_lock, flags); + link = list_first_entry_or_null(&req->link_list, struct io_kiocb, + link_list); /* * Can happen if a linked timeout fired and link had been like * req -> link t-out -> link t-out [-> ...] */ - if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE)) - return false; + if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) { + struct io_timeout_data *io = link->async_data; + int ret; - list_del_init(&link->link_list); - wake_ev = io_link_cancel_timeout(link); + list_del_init(&link->link_list); + ret = hrtimer_try_to_cancel(&io->timer); + if (ret != -1) { + io_cqring_fill_event(link, -ECANCELED); + io_commit_cqring(ctx); + cancelled = true; + } + } req->flags &= ~REQ_F_LINK_TIMEOUT; - return wake_ev; -} - -static void io_kill_linked_timeout(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - bool wake_ev; - - spin_lock_irqsave(&ctx->completion_lock, flags); - wake_ev = __io_kill_linked_timeout(req); spin_unlock_irqrestore(&ctx->completion_lock, flags); - if (wake_ev) + if (cancelled) { io_cqring_ev_posted(ctx); + io_put_req(link); + } } static struct io_kiocb *io_req_link_next(struct io_kiocb *req) @@ -4977,8 +4960,10 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, /* make sure double remove sees this as being gone */ wait->private = NULL; spin_unlock(&poll->head->lock); - if (!done) - __io_async_wake(req, poll, mask, io_poll_task_func); + if (!done) { + /* use wait func handler, so it matches the rq type */ + poll->wait.func(&poll->wait, mode, sync, key); + } } refcount_dec(&req->refs); return 1; @@ -6180,7 +6165,6 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) { struct io_kiocb *linked_timeout; - struct io_kiocb *nxt; const struct cred *old_creds = NULL; int ret; @@ -6206,7 +6190,6 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) */ if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { if (!io_arm_poll_handler(req)) { -punt: /* * Queued up for async execution, worker will release * submit reference when the iocb is actually submitted. @@ -6216,33 +6199,25 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs) if (linked_timeout) io_queue_linked_timeout(linked_timeout); - goto exit; - } + } else if (likely(!ret)) { + /* drop submission reference */ + req = io_put_req_find_next(req); + if (linked_timeout) + io_queue_linked_timeout(linked_timeout); - if (unlikely(ret)) { + if (req) { + if (!(req->flags & REQ_F_FORCE_ASYNC)) + goto again; + io_queue_async_work(req); + } + } else { /* un-prep timeout, so it'll be killed as any other linked */ req->flags &= ~REQ_F_LINK_TIMEOUT; req_set_fail_links(req); io_put_req(req); io_req_complete(req, ret); - goto exit; } - /* drop submission reference */ - nxt = io_put_req_find_next(req); - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); - - if (nxt) { - req = nxt; - - if (req->flags & REQ_F_FORCE_ASYNC) { - linked_timeout = NULL; - goto punt; - } - goto again; - } -exit: if (old_creds) revert_creds(old_creds); } @@ -6266,13 +6241,6 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (unlikely(ret)) goto fail_req; } - - /* - * Never try inline submit of IOSQE_ASYNC is set, go straight - * to async execution. - */ - io_req_init_async(req); - req->work.flags |= IO_WQ_WORK_CONCURRENT; io_queue_async_work(req); } else { if (sqe) { diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h index 1558cf22ef8a..ee9660e9671c 100644 --- a/fs/isofs/rock.h +++ b/fs/isofs/rock.h @@ -22,7 +22,7 @@ struct SU_ER_s { __u8 len_des; __u8 len_src; __u8 ext_ver; - __u8 data[0]; + __u8 data[]; } __attribute__ ((packed)); struct RR_RR_s { @@ -44,7 +44,7 @@ struct RR_PN_s { struct SL_component { __u8 flags; __u8 len; - __u8 text[0]; + __u8 text[]; } __attribute__ ((packed)); struct RR_SL_s { @@ -54,7 +54,7 @@ struct RR_SL_s { struct RR_NM_s { __u8 flags; - char name[0]; + char name[]; } __attribute__ ((packed)); struct RR_CL_s { @@ -71,7 +71,7 @@ struct stamp { struct RR_TF_s { __u8 flags; - struct stamp times[0]; /* Variable number of these beasts */ + struct stamp times[]; /* Variable number of these beasts */ } __attribute__ ((packed)); /* Linux-specific extension for transparent decompression */ diff --git a/fs/select.c b/fs/select.c index 7aef49552d4c..ebfebdfe5c69 100644 --- a/fs/select.c +++ b/fs/select.c @@ -97,7 +97,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv) struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; - struct poll_table_entry entries[0]; + struct poll_table_entry entries[]; }; #define POLL_TABLE_FULL(table) \ @@ -836,7 +836,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) struct poll_list { struct poll_list *next; int len; - struct pollfd entries[0]; + struct pollfd entries[]; }; #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 5896441ee604..efa2f0309f00 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -47,7 +47,7 @@ struct cppi5_host_desc_t { u32 buf_info1; u32 org_buf_len; u64 org_buf_ptr; - u32 epib[0]; + u32 epib[]; } __packed; #define CPPI5_DESC_MIN_ALIGN (16U) @@ -139,7 +139,7 @@ struct cppi5_desc_epib_t { */ struct cppi5_monolithic_desc_t { struct cppi5_desc_hdr_t hdr; - u32 epib[0]; + u32 epib[]; }; #define CPPI5_INFO2_MDESC_DATA_OFFSET_SHIFT (18U) diff --git a/include/linux/fs.h b/include/linux/fs.h index f944002689e2..43551c90056b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3287,7 +3287,7 @@ static inline ino_t parent_ino(struct dentry *dentry) */ struct simple_transaction_argresp { ssize_t size; - char data[0]; + char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h index 9542b41eacfd..35ce84c8ca02 100644 --- a/include/linux/mailbox/zynqmp-ipi-message.h +++ b/include/linux/mailbox/zynqmp-ipi-message.h @@ -14,7 +14,7 @@ */ struct zynqmp_ipi_message { size_t len; - u8 data[0]; + u8 data[]; }; #endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 651591a2965d..a092346c7b2d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5823,7 +5823,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits { u8 reserved_at_68[0x10]; u8 num_of_actions[0x8]; - union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0]; + union mlx5_ifc_set_add_copy_action_in_auto_bits actions[]; }; struct mlx5_ifc_dealloc_modify_header_context_out_bits { @@ -9761,7 +9761,7 @@ struct mlx5_ifc_mcda_reg_bits { u8 reserved_at_60[0x20]; - u8 data[0][0x20]; + u8 data[][0x20]; }; enum { diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9..a3a9a878415f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1419,7 +1419,7 @@ struct ec_response_flash_info_2 { uint16_t num_banks_total; /* Number of banks described in banks array. */ uint16_t num_banks_desc; - struct ec_flash_bank banks[0]; + struct ec_flash_bank banks[]; } __ec_align4; /* @@ -2420,12 +2420,12 @@ struct ec_response_motion_sense_fifo_info { /* Total amount of vector lost */ uint16_t total_lost; /* Lost events since the last fifo_info, per sensors */ - uint16_t lost[0]; + uint16_t lost[]; } __ec_todo_packed; struct ec_response_motion_sense_fifo_data { uint32_t number_data; - struct ec_response_motion_sensor_data data[0]; + struct ec_response_motion_sensor_data data[]; } __ec_todo_packed; /* List supported activity recognition */ @@ -3093,7 +3093,7 @@ struct ec_response_tmp006_get_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved[2]; - float val[0]; + float val[]; } __ec_align4; struct ec_params_tmp006_set_calibration_v1 { @@ -3101,7 +3101,7 @@ struct ec_params_tmp006_set_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved; - float val[0]; + float val[]; } __ec_align4; @@ -5076,7 +5076,7 @@ struct ec_response_pd_log { uint8_t type; /* event type : see PD_EVENT_xx below */ uint8_t size_port; /* [7:5] port number [4:0] payload size in bytes */ uint16_t data; /* type-defined data payload */ - uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */ + uint8_t payload[]; /* optional additional data payload: 0..16 bytes */ } __ec_align4; /* The timestamp is the microsecond counter shifted to get about a ms. */ @@ -5789,7 +5789,7 @@ struct ec_response_fp_encryption_status { struct ec_response_tp_frame_info { uint32_t n_frames; - uint32_t frame_sizes[0]; + uint32_t frame_sizes[]; } __ec_align4; /* Create a snapshot of current frame readings */ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4a415ae851ef..02599687770c 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -69,7 +69,7 @@ struct cros_ec_command { uint32_t outsize; uint32_t insize; uint32_t result; - uint8_t data[0]; + uint8_t data[]; }; /** diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index eae0bfd87d91..30bc7a7223bb 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -52,6 +52,16 @@ struct vdpa_device { int nvqs; }; +/** + * vDPA IOVA range - the IOVA range support by the device + * @first: start of the IOVA range + * @last: end of the IOVA range + */ +struct vdpa_iova_range { + u64 first; + u64 last; +}; + /** * vDPA_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the @@ -151,6 +161,10 @@ struct vdpa_device { * @get_generation: Get device config generation (optional) * @vdev: vdpa device * Returns u32: device generation + * @get_iova_range: Get supported iova range (optional) + * @vdev: vdpa device + * Returns the iova range supported by + * the device. * @set_map: Set device memory mapping (optional) * Needed for device that using device * specific DMA translation (on-chip IOMMU) @@ -216,6 +230,7 @@ struct vdpa_config_ops { void (*set_config)(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int len); u32 (*get_generation)(struct vdpa_device *vdev); + struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); /* DMA ops */ int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 9a269a88a6ff..f7f6a3a28977 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -138,6 +138,15 @@ struct vhost_vdpa_config { __u8 buf[0]; }; +/* vhost vdpa IOVA range + * @first: First address that can be mapped by vhost-vDPA + * @last: Last address that can be mapped by vhost-vDPA + */ +struct vhost_vdpa_iova_range { + __u64 first; + __u64 last; +}; + /* Feature bits */ /* Log all write descriptors. Can be changed while device is active. */ #define VHOST_F_LOG_ALL 26 diff --git a/kernel/params.c b/kernel/params.c index 3835fb82c64b..164d79330849 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -530,7 +530,7 @@ struct module_param_attrs { unsigned int num; struct attribute_group grp; - struct param_attribute attrs[0]; + struct param_attribute attrs[]; }; #ifdef CONFIG_SYSFS diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 24a960a89aa8..6b1525685277 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring)) */ struct prb_data_block { unsigned long id; - char data[0]; + char data[]; }; /* diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 26efd22f0633..3f659f855074 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints; */ struct tp_probes { struct rcu_head rcu; - struct tracepoint_func probes[0]; + struct tracepoint_func probes[]; }; static inline void *allocate_probes(int count) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 0a482ef988e5..a59778946404 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -933,7 +933,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, sg_miter_start(&miter, sgl, nents, sg_flags); if (!sg_miter_skip(&miter, skip)) - return false; + return 0; while ((offset < buflen) && sg_miter_next(&miter)) { unsigned int len; diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 8579bfeb2836..4b39534a14a1 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -12,12 +12,13 @@ struct msft_cp_read_supported_features { __u8 sub_opcode; } __packed; + struct msft_rp_read_supported_features { __u8 status; __u8 sub_opcode; __le64 features; __u8 evt_prefix_len; - __u8 evt_prefix[0]; + __u8 evt_prefix[]; } __packed; struct msft_data { diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index b3f46ab79e47..c579d1d5995a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -124,7 +124,7 @@ struct smc_clc_v2_extension { struct smc_clnt_opts_area_hdr hdr; u8 roce[16]; /* RoCEv2 GID */ u8 reserved[16]; - u8 user_eids[0][SMC_MAX_EID_LEN]; + u8 user_eids[][SMC_MAX_EID_LEN]; }; struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ @@ -143,7 +143,7 @@ struct smc_clc_msg_smcd { /* SMC-D GID information */ struct smc_clc_smcd_v2_extension { u8 system_eid[SMC_MAX_EID_LEN]; u8 reserved[16]; - struct smc_clc_smcd_gid_chid gidchid[0]; + struct smc_clc_smcd_gid_chid gidchid[]; }; struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 38043074ce5e..6ebefec616e4 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -101,7 +101,7 @@ struct ima_template_entry { struct tpm_digest *digests; struct ima_template_desc *template_desc; /* template descriptor */ u32 template_data_len; - struct ima_field_data template_data[0]; /* template related data */ + struct ima_field_data template_data[]; /* template related data */ }; struct ima_queue_entry {