Merge c2dc4c073f ("Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost") into android-mainline

Steps on the way to 5.10-rc2

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I686e55205b113f69b9ea8a22c56d86a572e8c603
This commit is contained in:
Greg Kroah-Hartman 2020-11-02 11:59:56 +01:00
commit b0748cf3e1
64 changed files with 787 additions and 503 deletions

View File

@ -1044,6 +1044,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
ssize_t size, left;
unsigned len, i;
size_t offset;
int ret = 0;
if (WARN_ON_ONCE(!max_append_sectors))
return 0;
@ -1066,15 +1067,17 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len)
return -EINVAL;
max_append_sectors, &same_page) != len) {
ret = -EINVAL;
break;
}
if (same_page)
put_page(page);
offset = 0;
}
iov_iter_advance(iter, size);
return 0;
iov_iter_advance(iter, size - left);
return ret;
}
/**

View File

@ -657,13 +657,20 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
goto fail;
}
if (radix_tree_preload(GFP_KERNEL)) {
blkg_free(new_blkg);
ret = -ENOMEM;
goto fail;
}
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
blkg = blkg_lookup_check(pos, pol, q);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto fail_unlock;
blkg_free(new_blkg);
goto fail_preloaded;
}
if (blkg) {
@ -672,10 +679,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
blkg = blkg_create(pos, q, new_blkg);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto fail_unlock;
goto fail_preloaded;
}
}
radix_tree_preload_end();
if (pos == blkcg)
goto success;
}
@ -685,6 +694,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
ctx->body = input;
return 0;
fail_preloaded:
radix_tree_preload_end();
fail_unlock:
spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();

View File

@ -225,6 +225,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
if (!refcount_dec_and_test(&flush_rq->ref)) {
fq->rq_status = error;
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);

View File

@ -2100,7 +2100,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap)
pp->dhfis_bits &= ~done_mask;
pp->dmafis_bits &= ~done_mask;
pp->sdbfis_bits |= done_mask;
ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
if (!ap->qc_active) {
DPRINTK("over\n");

View File

@ -296,7 +296,7 @@ static void nbd_size_clear(struct nbd_device *nbd)
}
}
static void nbd_size_update(struct nbd_device *nbd)
static void nbd_size_update(struct nbd_device *nbd, bool start)
{
struct nbd_config *config = nbd->config;
struct block_device *bdev = bdget_disk(nbd->disk, 0);
@ -313,7 +313,8 @@ static void nbd_size_update(struct nbd_device *nbd)
if (bdev) {
if (bdev->bd_disk) {
bd_set_nr_sectors(bdev, nr_sectors);
set_blocksize(bdev, config->blksize);
if (start)
set_blocksize(bdev, config->blksize);
} else
set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
bdput(bdev);
@ -328,7 +329,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
if (nbd->task_recv != NULL)
nbd_size_update(nbd);
nbd_size_update(nbd, false);
}
static void nbd_complete_rq(struct request *req)
@ -1308,7 +1309,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
nbd_size_update(nbd);
nbd_size_update(nbd, true);
return error;
}

View File

@ -47,6 +47,8 @@ struct nullb_device {
unsigned int nr_zones_closed;
struct blk_zone *zones;
sector_t zone_size_sects;
spinlock_t zone_dev_lock;
unsigned long *zone_locks;
unsigned long size; /* device size in MB */
unsigned long completion_nsec; /* time in ns to complete a request */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include "null_blk.h"
#define CREATE_TRACE_POINTS
@ -45,6 +46,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
if (!dev->zones)
return -ENOMEM;
spin_lock_init(&dev->zone_dev_lock);
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
if (!dev->zone_locks) {
kvfree(dev->zones);
return -ENOMEM;
}
if (dev->zone_nr_conv >= dev->nr_zones) {
dev->zone_nr_conv = dev->nr_zones - 1;
pr_info("changed the number of conventional zones to %u",
@ -123,15 +131,26 @@ int null_register_zoned_dev(struct nullb *nullb)
void null_free_zoned_dev(struct nullb_device *dev)
{
bitmap_free(dev->zone_locks);
kvfree(dev->zones);
}
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
{
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
}
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
{
clear_and_wake_up_bit(zno, dev->zone_locks);
}
int null_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nullb *nullb = disk->private_data;
struct nullb_device *dev = nullb->dev;
unsigned int first_zone, i;
unsigned int first_zone, i, zno;
struct blk_zone zone;
int error;
@ -142,15 +161,18 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
trace_nullb_report_zones(nullb, nr_zones);
for (i = 0; i < nr_zones; i++) {
zno = first_zone;
for (i = 0; i < nr_zones; i++, zno++) {
/*
* Stacked DM target drivers will remap the zone information by
* modifying the zone information passed to the report callback.
* So use a local copy to avoid corruption of the device zone
* array.
*/
memcpy(&zone, &dev->zones[first_zone + i],
sizeof(struct blk_zone));
null_lock_zone(dev, zno);
memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
null_unlock_zone(dev, zno);
error = cb(&zone, i, data);
if (error)
return error;
@ -159,6 +181,10 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
return nr_zones;
}
/*
* This is called in the case of memory backing from null_process_cmd()
* with the target zone already locked.
*/
size_t null_zone_valid_read_len(struct nullb *nullb,
sector_t sector, unsigned int len)
{
@ -295,22 +321,27 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
null_lock_zone(dev, zno);
spin_lock(&dev->zone_dev_lock);
switch (zone->cond) {
case BLK_ZONE_COND_FULL:
/* Cannot write to a full zone */
return BLK_STS_IOERR;
ret = BLK_STS_IOERR;
goto unlock;
case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_CLOSED:
ret = null_check_zone_resources(dev, zone);
if (ret != BLK_STS_OK)
return ret;
goto unlock;
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
break;
default:
/* Invalid zone condition */
return BLK_STS_IOERR;
ret = BLK_STS_IOERR;
goto unlock;
}
/*
@ -326,11 +357,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
else
cmd->rq->__sector = sector;
} else if (sector != zone->wp) {
return BLK_STS_IOERR;
ret = BLK_STS_IOERR;
goto unlock;
}
if (zone->wp + nr_sectors > zone->start + zone->capacity)
return BLK_STS_IOERR;
if (zone->wp + nr_sectors > zone->start + zone->capacity) {
ret = BLK_STS_IOERR;
goto unlock;
}
if (zone->cond == BLK_ZONE_COND_CLOSED) {
dev->nr_zones_closed--;
@ -341,9 +375,11 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
spin_unlock(&dev->zone_dev_lock);
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
spin_lock(&dev->zone_dev_lock);
if (ret != BLK_STS_OK)
return ret;
goto unlock;
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity) {
@ -353,7 +389,13 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
dev->nr_zones_imp_open--;
zone->cond = BLK_ZONE_COND_FULL;
}
return BLK_STS_OK;
ret = BLK_STS_OK;
unlock:
spin_unlock(&dev->zone_dev_lock);
null_unlock_zone(dev, zno);
return ret;
}
static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
@ -464,16 +506,33 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
unsigned int zone_no = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zone_no];
blk_status_t ret = BLK_STS_OK;
unsigned int zone_no;
struct blk_zone *zone;
blk_status_t ret;
size_t i;
if (op == REQ_OP_ZONE_RESET_ALL) {
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
null_lock_zone(dev, i);
zone = &dev->zones[i];
if (zone->cond != BLK_ZONE_COND_EMPTY) {
spin_lock(&dev->zone_dev_lock);
null_reset_zone(dev, zone);
spin_unlock(&dev->zone_dev_lock);
trace_nullb_zone_op(cmd, i, zone->cond);
}
null_unlock_zone(dev, i);
}
return BLK_STS_OK;
}
zone_no = null_zone_no(dev, sector);
zone = &dev->zones[zone_no];
null_lock_zone(dev, zone_no);
spin_lock(&dev->zone_dev_lock);
switch (op) {
case REQ_OP_ZONE_RESET_ALL:
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++)
null_reset_zone(dev, &dev->zones[i]);
break;
case REQ_OP_ZONE_RESET:
ret = null_reset_zone(dev, zone);
break;
@ -487,30 +546,46 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
ret = null_finish_zone(dev, zone);
break;
default:
return BLK_STS_NOTSUPP;
ret = BLK_STS_NOTSUPP;
break;
}
spin_unlock(&dev->zone_dev_lock);
if (ret == BLK_STS_OK)
trace_nullb_zone_op(cmd, zone_no, zone->cond);
null_unlock_zone(dev, zone_no);
return ret;
}
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
sector_t sector, sector_t nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
unsigned int zno = null_zone_no(dev, sector);
blk_status_t sts;
switch (op) {
case REQ_OP_WRITE:
return null_zone_write(cmd, sector, nr_sectors, false);
sts = null_zone_write(cmd, sector, nr_sectors, false);
break;
case REQ_OP_ZONE_APPEND:
return null_zone_write(cmd, sector, nr_sectors, true);
sts = null_zone_write(cmd, sector, nr_sectors, true);
break;
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
return null_zone_mgmt(cmd, op, sector);
sts = null_zone_mgmt(cmd, op, sector);
break;
default:
return null_process_cmd(cmd, op, sector, nr_sectors);
null_lock_zone(dev, zno);
sts = null_process_cmd(cmd, op, sector, nr_sectors);
null_unlock_zone(dev, zno);
}
return sts;
}

View File

@ -443,22 +443,27 @@ static void ace_fix_driveid(u16 *id)
#define ACE_FSM_NUM_STATES 11
/* Set flag to exit FSM loop and reschedule tasklet */
static inline void ace_fsm_yield(struct ace_device *ace)
static inline void ace_fsm_yieldpoll(struct ace_device *ace)
{
dev_dbg(ace->dev, "ace_fsm_yield()\n");
tasklet_schedule(&ace->fsm_tasklet);
ace->fsm_continue_flag = 0;
}
static inline void ace_fsm_yield(struct ace_device *ace)
{
dev_dbg(ace->dev, "%s()\n", __func__);
ace_fsm_yieldpoll(ace);
}
/* Set flag to exit FSM loop and wait for IRQ to reschedule tasklet */
static inline void ace_fsm_yieldirq(struct ace_device *ace)
{
dev_dbg(ace->dev, "ace_fsm_yieldirq()\n");
if (!ace->irq)
/* No IRQ assigned, so need to poll */
tasklet_schedule(&ace->fsm_tasklet);
ace->fsm_continue_flag = 0;
if (ace->irq > 0)
ace->fsm_continue_flag = 0;
else
ace_fsm_yieldpoll(ace);
}
static bool ace_has_next_request(struct request_queue *q)
@ -1053,12 +1058,12 @@ static int ace_setup(struct ace_device *ace)
ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ);
/* Now we can hook up the irq handler */
if (ace->irq) {
if (ace->irq > 0) {
rc = request_irq(ace->irq, ace_interrupt, 0, "systemace", ace);
if (rc) {
/* Failure - fall back to polled mode */
dev_err(ace->dev, "request_irq failed\n");
ace->irq = 0;
ace->irq = rc;
}
}
@ -1110,7 +1115,7 @@ static void ace_teardown(struct ace_device *ace)
tasklet_kill(&ace->fsm_tasklet);
if (ace->irq)
if (ace->irq > 0)
free_irq(ace->irq, ace);
iounmap(ace->baseaddr);
@ -1123,11 +1128,6 @@ static int ace_alloc(struct device *dev, int id, resource_size_t physaddr,
int rc;
dev_dbg(dev, "ace_alloc(%p)\n", dev);
if (!physaddr) {
rc = -ENODEV;
goto err_noreg;
}
/* Allocate and initialize the ace device structure */
ace = kzalloc(sizeof(struct ace_device), GFP_KERNEL);
if (!ace) {
@ -1153,7 +1153,6 @@ static int ace_alloc(struct device *dev, int id, resource_size_t physaddr,
dev_set_drvdata(dev, NULL);
kfree(ace);
err_alloc:
err_noreg:
dev_err(dev, "could not initialize device, err=%i\n", rc);
return rc;
}
@ -1176,10 +1175,11 @@ static void ace_free(struct device *dev)
static int ace_probe(struct platform_device *dev)
{
resource_size_t physaddr = 0;
int bus_width = ACE_BUS_WIDTH_16; /* FIXME: should not be hard coded */
resource_size_t physaddr;
struct resource *res;
u32 id = dev->id;
int irq = 0;
int irq;
int i;
dev_dbg(&dev->dev, "ace_probe(%p)\n", dev);
@ -1190,12 +1190,15 @@ static int ace_probe(struct platform_device *dev)
if (of_find_property(dev->dev.of_node, "8-bit", NULL))
bus_width = ACE_BUS_WIDTH_8;
for (i = 0; i < dev->num_resources; i++) {
if (dev->resource[i].flags & IORESOURCE_MEM)
physaddr = dev->resource[i].start;
if (dev->resource[i].flags & IORESOURCE_IRQ)
irq = dev->resource[i].start;
}
res = platform_get_resource(dev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
physaddr = res->start;
if (!physaddr)
return -ENODEV;
irq = platform_get_irq_optional(dev, 0);
/* Call the bus-independent setup code */
return ace_alloc(&dev->dev, id, physaddr, irq, bus_width);

View File

@ -47,7 +47,7 @@ enum {
struct intel_tlv {
u8 type;
u8 len;
u8 val[0];
u8 val[];
} __packed;
struct intel_version_tlv {

View File

@ -1176,8 +1176,10 @@ mptscsih_remove(struct pci_dev *pdev)
MPT_SCSI_HOST *hd;
int sz1;
if((hd = shost_priv(host)) == NULL)
return;
if (host == NULL)
hd = NULL;
else
hd = shost_priv(host);
mptscsih_shutdown(pdev);
@ -1193,14 +1195,15 @@ mptscsih_remove(struct pci_dev *pdev)
"Free'd ScsiLookup (%d) memory\n",
ioc->name, sz1));
kfree(hd->info_kbuf);
if (hd)
kfree(hd->info_kbuf);
/* NULL the Scsi_Host pointer
*/
ioc->sh = NULL;
scsi_host_put(host);
if (host)
scsi_host_put(host);
mpt_detach(pdev);
}

View File

@ -224,7 +224,7 @@ struct mei_ext_hdr {
u8 type;
u8 length;
u8 ext_payload[2];
u8 hdr[0];
u8 hdr[];
};
/**
@ -238,7 +238,7 @@ struct mei_ext_meta_hdr {
u8 count;
u8 size;
u8 reserved[2];
struct mei_ext_hdr hdrs[0];
struct mei_ext_hdr hdrs[];
};
/*
@ -308,7 +308,7 @@ struct mei_msg_hdr {
u32 dma_ring:1;
u32 internal:1;
u32 msg_complete:1;
u32 extension[0];
u32 extension[];
} __packed;
/* The length is up to 9 bits */

View File

@ -402,7 +402,7 @@ struct enetc_psfp_gate {
u32 num_entries;
refcount_t refcount;
struct hlist_node node;
struct action_gate_entry entries[0];
struct action_gate_entry entries[];
};
/* Only enable the green color frame now

View File

@ -198,7 +198,7 @@ static_assert(sizeof(struct stats) == 16);
struct gve_stats_report {
__be64 written_count;
struct stats stats[0];
struct stats stats[];
};
static_assert(sizeof(struct gve_stats_report) == 8);

View File

@ -116,9 +116,8 @@ static int gve_alloc_stats_report(struct gve_priv *priv)
priv->tx_cfg.num_queues;
rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
priv->rx_cfg.num_queues;
priv->stats_report_len = sizeof(struct gve_stats_report) +
(tx_stats_num + rx_stats_num) *
sizeof(struct stats);
priv->stats_report_len = struct_size(priv->stats_report, stats,
tx_stats_num + rx_stats_num);
priv->stats_report =
dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
&priv->stats_report_bus, GFP_KERNEL);

View File

@ -2125,7 +2125,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns);
if (ret)
if (ret && !nvme_first_scan(ns->disk))
return ret;
}

View File

@ -146,7 +146,8 @@ struct nvme_fc_rport {
/* fc_ctrl flags values - specified as bit positions */
#define ASSOC_ACTIVE 0
#define FCCTRL_TERMIO 1
#define ASSOC_FAILED 1
#define FCCTRL_TERMIO 2
struct nvme_fc_ctrl {
spinlock_t lock;
@ -157,7 +158,6 @@ struct nvme_fc_ctrl {
u32 cnum;
bool ioq_live;
atomic_t err_work_active;
u64 association_id;
struct nvmefc_ls_rcv_op *rcv_disconn;
@ -167,7 +167,6 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set tag_set;
struct delayed_work connect_work;
struct work_struct err_work;
struct kref ref;
unsigned long flags;
@ -2414,24 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
nvme_fc_ctrl_put(ctrl);
}
/*
* This routine is used by the transport when it needs to find active
* io on a queue that is to be terminated. The transport uses
* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
* this routine to kill them on a 1 by 1 basis.
*
* As FC allocates FC exchange for each io, the transport must contact
* the LLDD to terminate the exchange, thus releasing the FC exchange.
* After terminating the exchange the LLDD will call the transport's
* normal io done path for the request, but it will have an aborted
* status. The done path will return the io request back to the block
* layer with an error status.
*/
static bool
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
{
struct nvme_ctrl *nctrl = data;
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
__nvme_fc_abort_op(ctrl, op);
return true;
}
/*
* This routine runs through all outstanding commands on the association
* and aborts them. This routine is typically be called by the
* delete_association routine. It is also called due to an error during
* reconnect. In that scenario, it is most likely a command that initializes
* the controller, including fabric Connect commands on io queues, that
* may have timed out or failed thus the io must be killed for the connect
* thread to see the error.
*/
static void
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
{
/*
* If io queues are present, stop them and terminate all outstanding
* ios on them. As FC allocates FC exchange for each io, the
* transport must contact the LLDD to terminate the exchange,
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
* to tell us what io's are busy and invoke a transport routine
* to kill them with the LLDD. After terminating the exchange
* the LLDD will call the transport's normal io done path, but it
* will have an aborted status. The done path will return the
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
if (start_queues)
nvme_start_queues(&ctrl->ctrl);
}
/*
* Other transports, which don't have link-level contexts bound
* to sqe's, would try to gracefully shutdown the controller by
* writing the registers for shutdown and polling (call
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
* just aborted and we will wait on those contexts, and given
* there was no indication of how live the controlelr is on the
* link, don't send more io to create more contexts for the
* shutdown. Let the controller fail via keepalive failure if
* its still present.
*/
/*
* clean up the admin queue. Same thing as above.
*/
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
}
static void
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
{
int active;
/*
* if an error (io timeout, etc) while (re)connecting,
* it's an error on creating the new association.
* Start the error recovery thread if it hasn't already
* been started. It is expected there could be multiple
* ios hitting this path before things are cleaned up.
* if an error (io timeout, etc) while (re)connecting, the remote
* port requested terminating of the association (disconnect_ls)
* or an error (timeout or abort) occurred on an io while creating
* the controller. Abort any ios on the association and let the
* create_association error path resolve things.
*/
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
active = atomic_xchg(&ctrl->err_work_active, 1);
if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
atomic_set(&ctrl->err_work_active, 0);
WARN_ON(1);
}
__nvme_fc_abort_outstanding_ios(ctrl, true);
set_bit(ASSOC_FAILED, &ctrl->flags);
return;
}
@ -2745,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq)
nvme_fc_ctrl_put(ctrl);
}
/*
* This routine is used by the transport when it needs to find active
* io on a queue that is to be terminated. The transport uses
* blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
* this routine to kill them on a 1 by 1 basis.
*
* As FC allocates FC exchange for each io, the transport must contact
* the LLDD to terminate the exchange, thus releasing the FC exchange.
* After terminating the exchange the LLDD will call the transport's
* normal io done path for the request, but it will have an aborted
* status. The done path will return the io request back to the block
* layer with an error status.
*/
static bool
nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
{
struct nvme_ctrl *nctrl = data;
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
__nvme_fc_abort_op(ctrl, op);
return true;
}
static const struct blk_mq_ops nvme_fc_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
@ -2988,6 +3036,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ctrl->cnum, ctrl->lport->localport.port_name,
ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
clear_bit(ASSOC_FAILED, &ctrl->flags);
/*
* Create the admin queue
*/
@ -3016,7 +3066,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/
ret = nvme_enable_ctrl(&ctrl->ctrl);
if (ret)
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_disconnect_admin_queue;
ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
@ -3026,7 +3076,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
ret = nvme_init_identify(&ctrl->ctrl);
if (ret)
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_disconnect_admin_queue;
/* sanity checks */
@ -3071,9 +3121,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ret = nvme_fc_create_io_queues(ctrl);
else
ret = nvme_fc_recreate_io_queues(ctrl);
if (ret)
goto out_term_aen_ops;
}
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_term_aen_ops;
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@ -3107,60 +3157,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
}
/*
* This routine runs through all outstanding commands on the association
* and aborts them. This routine is typically be called by the
* delete_association routine. It is also called due to an error during
* reconnect. In that scenario, it is most likely a command that initializes
* the controller, including fabric Connect commands on io queues, that
* may have timed out or failed thus the io must be killed for the connect
* thread to see the error.
*/
static void
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
{
/*
* If io queues are present, stop them and terminate all outstanding
* ios on them. As FC allocates FC exchange for each io, the
* transport must contact the LLDD to terminate the exchange,
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
* to tell us what io's are busy and invoke a transport routine
* to kill them with the LLDD. After terminating the exchange
* the LLDD will call the transport's normal io done path, but it
* will have an aborted status. The done path will return the
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
if (start_queues)
nvme_start_queues(&ctrl->ctrl);
}
/*
* Other transports, which don't have link-level contexts bound
* to sqe's, would try to gracefully shutdown the controller by
* writing the registers for shutdown and polling (call
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
* just aborted and we will wait on those contexts, and given
* there was no indication of how live the controlelr is on the
* link, don't send more io to create more contexts for the
* shutdown. Let the controller fail via keepalive failure if
* its still present.
*/
/*
* clean up the admin queue. Same thing as above.
*/
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
}
/*
* This routine stops operation of the controller on the host side.
* On the host os stack side: Admin and IO queues are stopped,
@ -3237,7 +3233,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
cancel_work_sync(&ctrl->err_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/*
* kill the association on the link side. this will block
@ -3291,79 +3286,35 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
}
}
static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{
/*
* if state is CONNECTING - the error occurred as part of a
* reconnect attempt. Abort any ios on the association and
* let the create_association error paths resolve things.
*/
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
__nvme_fc_abort_outstanding_ios(ctrl, true);
return;
}
/*
* For any other state, kill the association. As this routine
* is a common io abort routine for resetting and such, after
* the association is terminated, ensure that the state is set
* to CONNECTING.
*/
nvme_stop_keep_alive(&ctrl->ctrl);
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
"to CONNECTING\n", ctrl->cnum);
}
static void
nvme_fc_reset_ctrl_work(struct work_struct *work)
{
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
int ret;
__nvme_fc_terminate_io(ctrl);
nvme_stop_ctrl(&ctrl->ctrl);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
ret = nvme_fc_create_association(ctrl);
else
ret = -ENOTCONN;
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
if (ret)
nvme_fc_reconnect_or_delete(ctrl, ret);
else
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reset complete\n",
ctrl->cnum);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
"to CONNECTING\n", ctrl->cnum);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to schedule connect "
"after reset\n", ctrl->cnum);
} else {
flush_delayed_work(&ctrl->connect_work);
}
} else {
nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN);
}
}
static void
nvme_fc_connect_err_work(struct work_struct *work)
{
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, err_work);
__nvme_fc_terminate_io(ctrl);
atomic_set(&ctrl->err_work_active, 0);
/*
* Rescheduling the connection after recovering
* from the io error is left to the reconnect work
* item, which is what should have stalled waiting on
* the io that had the error that scheduled this work.
*/
}
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.name = "fc",
@ -3491,7 +3442,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->dev = lport->dev;
ctrl->cnum = idx;
ctrl->ioq_live = false;
atomic_set(&ctrl->err_work_active, 0);
init_waitqueue_head(&ctrl->ioabort_wait);
get_device(ctrl->dev);
@ -3499,7 +3449,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
spin_lock_init(&ctrl->lock);
/* io queue count */
@ -3592,7 +3541,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
fail_ctrl:
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_work_sync(&ctrl->err_work);
cancel_delayed_work_sync(&ctrl->connect_work);
ctrl->ctrl.opts = NULL;

View File

@ -1768,6 +1768,14 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
/* sanity checking for received data length */
if (unlikely(wc->byte_len < len)) {
dev_err(queue->ctrl->ctrl.device,
"Unexpected nvme completion length(%d)\n", wc->byte_len);
nvme_rdma_error_recovery(queue->ctrl);
return;
}
ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
/*
* AEN requests are special as they don't time out and can

View File

@ -907,8 +907,6 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
trace_nvmet_req_init(req, req->cmd);
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
req->error_loc = offsetof(struct nvme_common_command, flags);
@ -938,6 +936,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
if (status)
goto fail;
trace_nvmet_req_init(req, req->cmd);
if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
goto fail;

View File

@ -46,19 +46,12 @@ static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
return req->sq->ctrl;
}
static inline void __assign_disk_name(char *name, struct nvmet_req *req,
bool init)
static inline void __assign_req_name(char *name, struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req);
struct nvmet_ns *ns;
if ((init && req->sq->qid) || (!init && req->cq->qid)) {
ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid);
strncpy(name, ns->device_path, DISK_NAME_LEN);
return;
}
memset(name, 0, DISK_NAME_LEN);
if (req->ns)
strncpy(name, req->ns->device_path, DISK_NAME_LEN);
else
memset(name, 0, DISK_NAME_LEN);
}
#endif
@ -81,7 +74,7 @@ TRACE_EVENT(nvmet_req_init,
TP_fast_assign(
__entry->cmd = cmd;
__entry->ctrl = nvmet_req_to_ctrl(req);
__assign_disk_name(__entry->disk, req, true);
__assign_req_name(__entry->disk, req);
__entry->qid = req->sq->qid;
__entry->cid = cmd->common.command_id;
__entry->opcode = cmd->common.opcode;
@ -121,7 +114,7 @@ TRACE_EVENT(nvmet_req_complete,
__entry->cid = req->cqe->command_id;
__entry->result = le64_to_cpu(req->cqe->result.u64);
__entry->status = le16_to_cpu(req->cqe->status) >> 1;
__assign_disk_name(__entry->disk, req, false);
__assign_req_name(__entry->disk, req);
),
TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
__print_ctrl_name(__entry->ctrl),

View File

@ -93,7 +93,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
{
const struct iommu_ops *iommu;
const struct bus_dma_region *map = NULL;
dma_addr_t dma_start = 0;
u64 dma_start = 0;
u64 mask, end, size = 0;
bool coherent;
int ret;
@ -109,10 +109,10 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
return ret == -ENODEV ? 0 : ret;
} else {
const struct bus_dma_region *r = map;
dma_addr_t dma_end = 0;
u64 dma_end = 0;
/* Determine the overall bounds of all DMA regions */
for (dma_start = ~(dma_addr_t)0; r->size; r++) {
for (dma_start = ~0ULL; r->size; r++) {
/* Take lower and upper limits */
if (r->dma_start < dma_start)
dma_start = r->dma_start;

View File

@ -445,7 +445,7 @@ static int hisi_sas_task_prep(struct sas_task *task,
}
}
if (scmd) {
if (scmd && hisi_hba->shost->nr_hw_queues) {
unsigned int dq_index;
u32 blk_tag;

View File

@ -806,6 +806,22 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
}
/**
* ibmvscsi_set_request_limit - Set the adapter request_limit in response to
* an adapter failure, reset, or SRP Login. Done under host lock to prevent
* race with SCSI command submission.
* @hostdata: adapter to adjust
* @limit: new request limit
*/
static void ibmvscsi_set_request_limit(struct ibmvscsi_host_data *hostdata, int limit)
{
unsigned long flags;
spin_lock_irqsave(hostdata->host->host_lock, flags);
atomic_set(&hostdata->request_limit, limit);
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
}
/**
* ibmvscsi_reset_host - Reset the connection to the server
* @hostdata: struct ibmvscsi_host_data to reset
@ -813,7 +829,7 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
{
scsi_block_requests(hostdata->host);
atomic_set(&hostdata->request_limit, 0);
ibmvscsi_set_request_limit(hostdata, 0);
purge_requests(hostdata, DID_ERROR);
hostdata->action = IBMVSCSI_HOST_ACTION_RESET;
@ -1146,13 +1162,13 @@ static void login_rsp(struct srp_event_struct *evt_struct)
dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
evt_struct->xfer_iu->srp.login_rej.reason);
/* Login failed. */
atomic_set(&hostdata->request_limit, -1);
ibmvscsi_set_request_limit(hostdata, -1);
return;
default:
dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
evt_struct->xfer_iu->srp.login_rsp.opcode);
/* Login failed. */
atomic_set(&hostdata->request_limit, -1);
ibmvscsi_set_request_limit(hostdata, -1);
return;
}
@ -1163,7 +1179,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
* This value is set rather than added to request_limit because
* request_limit could have been set to -1 by this client.
*/
atomic_set(&hostdata->request_limit,
ibmvscsi_set_request_limit(hostdata,
be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
/* If we had any pending I/Os, kick them */
@ -1195,13 +1211,13 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
spin_lock_irqsave(hostdata->host->host_lock, flags);
/* Start out with a request limit of 0, since this is negotiated in
* the login request we are just sending and login requests always
* get sent by the driver regardless of request_limit.
*/
atomic_set(&hostdata->request_limit, 0);
ibmvscsi_set_request_limit(hostdata, 0);
spin_lock_irqsave(hostdata->host->host_lock, flags);
rc = ibmvscsi_send_srp_event(evt_struct, hostdata, login_timeout * 2);
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
dev_info(hostdata->dev, "sent SRP login\n");
@ -1781,7 +1797,7 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
return;
case VIOSRP_CRQ_XPORT_EVENT: /* Hypervisor telling us the connection is closed */
scsi_block_requests(hostdata->host);
atomic_set(&hostdata->request_limit, 0);
ibmvscsi_set_request_limit(hostdata, 0);
if (crq->format == 0x06) {
/* We need to re-setup the interpartition connection */
dev_info(hostdata->dev, "Re-enabling adapter!\n");
@ -2137,12 +2153,12 @@ static void ibmvscsi_do_work(struct ibmvscsi_host_data *hostdata)
}
hostdata->action = IBMVSCSI_HOST_ACTION_NONE;
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
if (rc) {
atomic_set(&hostdata->request_limit, -1);
ibmvscsi_set_request_limit(hostdata, -1);
dev_err(hostdata->dev, "error after %s\n", action);
}
spin_unlock_irqrestore(hostdata->host->host_lock, flags);
scsi_unblock_requests(hostdata->host);
}
@ -2226,7 +2242,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
init_waitqueue_head(&hostdata->work_wait_q);
hostdata->host = host;
hostdata->dev = dev;
atomic_set(&hostdata->request_limit, -1);
ibmvscsi_set_request_limit(hostdata, -1);
hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
if (map_persist_bufs(hostdata)) {

View File

@ -554,10 +554,12 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
fcport = qla_rport->fcport;
if (!qpair || !fcport || (qpair && !qpair->fw_started) ||
(fcport && fcport->deleted))
if (!qpair || !fcport)
return -ENODEV;
if (!qpair->fw_started || fcport->deleted)
return -EBUSY;
vha = fcport->vha;
if (!(fcport->nvme_flag & NVME_FLAG_REGISTERED))

View File

@ -1714,15 +1714,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
*/
static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
{
struct async_scan_data *data;
struct async_scan_data *data = NULL;
unsigned long flags;
if (strncmp(scsi_scan_type, "sync", 4) == 0)
return NULL;
mutex_lock(&shost->scan_mutex);
if (shost->async_scan) {
shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
return NULL;
goto err;
}
data = kmalloc(sizeof(*data), GFP_KERNEL);
@ -1733,7 +1734,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
goto err;
init_completion(&data->prev_finished);
mutex_lock(&shost->scan_mutex);
spin_lock_irqsave(shost->host_lock, flags);
shost->async_scan = 1;
spin_unlock_irqrestore(shost->host_lock, flags);
@ -1748,6 +1748,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
return data;
err:
mutex_unlock(&shost->scan_mutex);
kfree(data);
return NULL;
}

View File

@ -194,7 +194,7 @@ struct tcmu_tmr {
uint8_t tmr_type;
uint32_t tmr_cmd_cnt;
int16_t tmr_cmd_ids[0];
int16_t tmr_cmd_ids[];
};
/*

View File

@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
u64 paend;
struct scatterlist *sg;
struct device *dma = mvdev->mdev->device;
int ret;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
done:
mr->log_size = log_entity_size;
mr->nsg = nsg;
ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
if (!ret)
err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
if (!err)
goto err_map;
err = create_direct_mr(mvdev, mr);

View File

@ -38,6 +38,10 @@ static int batch_mapping = 1;
module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
struct vdpasim_virtqueue {
struct vringh vring;
struct vringh_kiov iov;
@ -60,7 +64,8 @@ struct vdpasim_virtqueue {
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
(1ULL << VIRTIO_F_VERSION_1) |
(1ULL << VIRTIO_F_ACCESS_PLATFORM);
(1ULL << VIRTIO_F_ACCESS_PLATFORM) |
(1ULL << VIRTIO_NET_F_MAC);
/* State of each vdpasim device */
struct vdpasim {
@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void)
spin_lock_init(&vdpasim->iommu_lock);
dev = &vdpasim->vdpa.dev;
dev->coherent_dma_mask = DMA_BIT_MASK(64);
dev->dma_mask = &dev->coherent_dma_mask;
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
goto err_iommu;
set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void)
if (!vdpasim->buffer)
goto err_iommu;
eth_random_addr(vdpasim->config.mac);
if (macaddr) {
mac_pton(macaddr, vdpasim->config.mac);
if (!is_valid_ether_addr(vdpasim->config.mac)) {
ret = -EADDRNOTAVAIL;
goto err_iommu;
}
} else {
eth_random_addr(vdpasim->config.mac);
}
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
return vdpasim->generation;
}
static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
{
struct vdpa_iova_range range = {
.first = 0ULL,
.last = ULLONG_MAX,
};
return range;
}
static int vdpasim_set_map(struct vdpa_device *vdpa,
struct vhost_iotlb *iotlb)
{
@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
.get_iova_range = vdpasim_get_iova_range,
.dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap,
.free = vdpasim_free,
@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
.get_iova_range = vdpasim_get_iova_range,
.set_map = vdpasim_set_map,
.free = vdpasim_free,
};

View File

@ -47,6 +47,7 @@ struct vhost_vdpa {
int minor;
struct eventfd_ctx *config_ctx;
int in_batch;
struct vdpa_iova_range range;
};
static DEFINE_IDA(vhost_vdpa_ida);
@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
if (unlikely(ret))
dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
qid, vq->call_ctx.producer.token, ret);
}
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
return 0;
}
static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
{
struct vhost_vdpa_iova_range range = {
.first = v->range.first,
.last = v->range.last,
};
return copy_to_user(argp, &range, sizeof(range));
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
u64 features;
long r;
long r = 0;
if (cmd == VHOST_SET_BACKEND_FEATURES) {
r = copy_from_user(&features, featurep, sizeof(features));
if (r)
return r;
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
if (features & ~VHOST_VDPA_BACKEND_FEATURES)
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
r = copy_to_user(featurep, &features, sizeof(features));
if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
case VHOST_VDPA_GET_IOVA_RANGE:
r = vhost_vdpa_get_iova_range(v, argp);
break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
struct vm_area_struct **vmas;
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long map_pfn, last_pfn = 0;
unsigned long npages, lock_limit;
unsigned long i, nmap = 0;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
unsigned long locked, lock_limit, pinned, i;
u64 iova = msg->iova;
long pinned;
int ret = 0;
if (msg->iova < v->range.first ||
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (!npages)
return -EINVAL;
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!page_list || !vmas) {
ret = -ENOMEM;
goto free;
}
mmap_read_lock(dev->mm);
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
if (locked > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
page_list, vmas);
if (npages != pinned) {
if (pinned < 0) {
ret = pinned;
} else {
unpin_user_pages(page_list, pinned);
ret = -ENOMEM;
}
goto unlock;
goto out;
}
cur_base = msg->uaddr & PAGE_MASK;
iova &= PAGE_MASK;
map_pfn = page_to_pfn(page_list[0]);
/* One more iteration to avoid extra vdpa_map() call out of loop. */
for (i = 0; i <= npages; i++) {
unsigned long this_pfn;
u64 csize;
while (npages) {
pinned = min_t(unsigned long, npages, list_size);
ret = pin_user_pages(cur_base, pinned,
gup_flags, page_list, NULL);
if (ret != pinned)
goto out;
/* The last chunk may have no valid PFN next to it */
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
if (!last_pfn)
map_pfn = page_to_pfn(page_list[0]);
if (last_pfn && (this_pfn == -1UL ||
this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
csize = last_pfn - map_pfn + 1;
ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
map_pfn << PAGE_SHIFT,
msg->perm);
if (ret) {
/*
* Unpin the rest chunks of memory on the
* flight with no corresponding vdpa_map()
* calls having been made yet. On the other
* hand, vdpa_unmap() in the failure path
* is in charge of accounting the number of
* pinned pages for its own.
* This asymmetrical pattern of accounting
* is for efficiency to pin all pages at
* once, while there is no other callsite
* of vdpa_map() than here above.
*/
unpin_user_pages(&page_list[nmap],
npages - nmap);
goto out;
for (i = 0; i < ret; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
if (vhost_vdpa_map(v, iova, csize,
map_pfn << PAGE_SHIFT,
msg->perm))
goto out;
map_pfn = this_pfn;
iova += csize;
}
atomic64_add(csize, &dev->mm->pinned_vm);
nmap += csize;
iova += csize << PAGE_SHIFT;
map_pfn = this_pfn;
last_pfn = this_pfn;
}
last_pfn = this_pfn;
cur_base += ret << PAGE_SHIFT;
npages -= ret;
}
WARN_ON(nmap != npages);
/* Pin the rest chunk */
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
map_pfn << PAGE_SHIFT, msg->perm);
out:
if (ret)
if (ret) {
vhost_vdpa_unmap(v, msg->iova, msg->size);
unlock:
atomic64_sub(npages, &dev->mm->pinned_vm);
}
mmap_read_unlock(dev->mm);
free:
kvfree(vmas);
kvfree(page_list);
free_page((unsigned long)page_list);
return ret;
}
@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
v->domain = NULL;
}
static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
{
struct vdpa_iova_range *range = &v->range;
struct iommu_domain_geometry geo;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->get_iova_range) {
*range = ops->get_iova_range(vdpa);
} else if (v->domain &&
!iommu_domain_get_attr(v->domain,
DOMAIN_ATTR_GEOMETRY, &geo) &&
geo.force_aperture) {
range->first = geo.aperture_start;
range->last = geo.aperture_end;
} else {
range->first = 0;
range->last = ULLONG_MAX;
}
}
static int vhost_vdpa_open(struct inode *inode, struct file *filep)
{
struct vhost_vdpa *v;
@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
if (r)
goto err_init_iotlb;
vhost_vdpa_set_iova_range(v);
filep->private_data = v;
return 0;

View File

@ -1690,7 +1690,7 @@ struct elf_thread_core_info {
struct elf_thread_core_info *next;
struct task_struct *task;
struct elf_prstatus prstatus;
struct memelfnote notes[0];
struct memelfnote notes[];
};
struct elf_note_info {

View File

@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int level = ref->level;
struct btrfs_key search_key = ref->key_for_search;
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
/*
* If we're search_commit_root we could possibly be holding locks on
* other tree nodes. This happens when qgroups does backref walks when
* adding new delayed refs. To deal with this we need to look in cache
* for the root, and if we don't find it then we need to search the
* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
* here.
*/
if (path->search_commit_root)
root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
else
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_free;

View File

@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
key.offset = 0;
btrfs_release_path(path);
}
btrfs_release_path(path);
list_for_each_entry(space_info, &info->space_info, list) {
int i;

View File

@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct extent_buffer *eb, int err);
void btrfs_reada_remove_dev(struct btrfs_device *dev);
void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);
static inline int is_fstree(u64 rootid)
{

View File

@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
if (!scrub_ret)
btrfs_reada_remove_dev(src_device);
/*
* We have to use this loop approach because at this point src_device
* has to be available for transaction commit to complete, yet new
@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
while (1) {
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_reada_undo_remove_dev(src_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
@ -746,6 +750,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_reada_undo_remove_dev(src_device);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(tgt_device);

View File

@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return 0;
}
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key)
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_path *path,
struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
u64 generation;
int ret;
int level;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
if (!root) {
ret = -ENOMEM;
goto alloc_fail;
}
if (!root)
return ERR_PTR(-ENOMEM);
ret = btrfs_find_root(tree_root, key, path,
&root->root_item, &root->root_key);
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto find_fail;
goto fail;
}
generation = btrfs_root_generation(&root->root_item);
@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
goto find_fail;
goto fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
goto find_fail;
goto fail;
}
root->commit_root = btrfs_root_node(root);
out:
btrfs_free_path(path);
return root;
find_fail:
fail:
btrfs_put_root(root);
alloc_fail:
root = ERR_PTR(ret);
goto out;
return ERR_PTR(ret);
}
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_path *path;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
root = read_tree_root_path(tree_root, path, key);
btrfs_free_path(path);
return root;
}
/*
@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
return root;
}
static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
u64 objectid)
{
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->tree_root);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->extent_root);
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
return NULL;
}
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root)
{
@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
int ret;
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->tree_root);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->extent_root);
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
root = btrfs_get_global_root(fs_info, objectid);
if (root)
return root;
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
/*
* btrfs_get_fs_root_commit_root - return a root for the given objectid
* @fs_info: the fs_info
* @objectid: the objectid we need to lookup
*
* This is exclusively used for backref walking, and exists specifically because
* of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref
* creation time, which means we may have to read the tree_root in order to look
* up a fs root that is not in memory. If the root is not in memory we will
* read the tree root commit root and look up the fs root from there. This is a
* temporary root, it will not be inserted into the radix tree as it doesn't
* have the most uptodate information, it'll simply be discarded once the
* backref code is finished using the root.
*/
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid)
{
struct btrfs_root *root;
struct btrfs_key key;
ASSERT(path->search_commit_root && path->skip_locking);
/*
* This can return -ENOENT if we ask for a root that doesn't exist, but
* since this is called via the backref walking code we won't be looking
* up a root that doesn't exist, unless there's corruption. So if root
* != NULL just return it.
*/
root = btrfs_get_global_root(fs_info, objectid);
if (root)
return root;
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root)
return root;
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = read_tree_root_path(fs_info->tree_root, path, &key);
btrfs_release_path(path);
return root;
}
/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens

View File

@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 objectid);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

View File

@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_tree_block_info *bi;
if (item_size < sizeof(*ei) + sizeof(*bi)) {
btrfs_crit(info,
"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu",
"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi));

View File

@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
inode_lock_shared(inode);
ret = btrfs_direct_IO(iocb, to);
inode_unlock_shared(inode);
if (ret < 0)
if (ret < 0 || !iov_iter_count(to) ||
iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
return ret;
}

View File

@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
* clear_offset by our extent size.
*/
clear_offset += ins.offset;
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset;
trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
/*
* Now that we inserted the prealloc extent we can finally
* decrement the number of reservations in the block group.
* If we did it before, we could race with relocation and have
* relocation miss the reserved extent, making it fail later.
*/
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_free_reserved_extent(fs_info, ins.objectid,

View File

@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.type == BTRFS_ROOT_REF_KEY) {
/* Release locks on tree_root before we access quota_root */
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
if (ret) {
@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_search_slot_for_read(tree_root, &found_key,
path, 1, 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
if (ret > 0) {
/*
* Shouldn't happen, but in case it does we
* don't need to do the btrfs_next_item, just
* continue.
*/
continue;
}
}
ret = btrfs_next_item(tree_root, path);
if (ret < 0) {

View File

@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev)
continue;
if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
continue;
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
have_zone = 1;
}
if (!have_zone)
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
up_read(&fs_info->dev_replace.rwsem);
@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle)
kref_put(&rc->refcnt, reada_control_release);
}
/*
* Before removing a device (device replace or device remove ioctls), call this
* function to wait for all existing readahead requests on the device and to
* make sure no one queues more readahead requests for the device.
*
* Must be called without holding neither the device list mutex nor the device
* replace semaphore, otherwise it will deadlock.
*/
void btrfs_reada_remove_dev(struct btrfs_device *dev)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
/* Serialize with readahead extent creation at reada_find_extent(). */
spin_lock(&fs_info->reada_lock);
set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
spin_unlock(&fs_info->reada_lock);
/*
* There might be readahead requests added to the radix trees which
* were not yet added to the readahead work queue. We need to start
* them and wait for their completion, otherwise we can end up with
* use-after-free problems when dropping the last reference on the
* readahead extents and their zones, as they need to access the
* device structure.
*/
reada_start_machine(fs_info);
btrfs_flush_workqueue(fs_info->readahead_workers);
}
/*
* If when removing a device (device replace or device remove ioctls) an error
* happens after calling btrfs_reada_remove_dev(), call this to undo what that
* function did. This is safe to call even if btrfs_reada_remove_dev() was not
* called before.
*/
void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
{
spin_lock(&dev->fs_info->reada_lock);
clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
spin_unlock(&dev->fs_info->reada_lock);
}

View File

@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
u64 type;
u64 features;
bool mixed = false;
int raid_index;
int nparity;
int ncopies;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
type = btrfs_chunk_type(leaf, chunk);
raid_index = btrfs_bg_flags_to_raid_index(type);
ncopies = btrfs_raid_array[raid_index].ncopies;
nparity = btrfs_raid_array[raid_index].nparity;
if (!num_stripes) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes, have %u", num_stripes);
return -EUCLEAN;
}
if (num_stripes < ncopies) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes < ncopies, have %u < %d",
num_stripes, ncopies);
return -EUCLEAN;
}
if (nparity && num_stripes == nparity) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes == nparity, have %u == %d",
num_stripes, nparity);
return -EUCLEAN;
}
if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
chunk_err(leaf, chunk, logical,
"invalid chunk logical, have %llu should aligned to %u",

View File

@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
btrfs_device_data_ordered_init(dev, fs_info);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
extent_io_tree_init(fs_info, &dev->alloc_state,
@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0);
if (!ret)
btrfs_reada_remove_dev(device);
mutex_lock(&uuid_mutex);
if (ret)
goto error_undo;
@ -2179,6 +2181,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
return ret;
error_undo:
btrfs_reada_undo_remove_dev(device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,

View File

@ -39,10 +39,10 @@ struct btrfs_io_geometry {
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __BTRFS_NEED_DEVICE_DATA_ORDERED
#define btrfs_device_data_ordered_init(device) \
seqcount_init(&device->data_seqcount)
#define btrfs_device_data_ordered_init(device, info) \
seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex)
#else
#define btrfs_device_data_ordered_init(device) do { } while (0)
#define btrfs_device_data_ordered_init(device, info) do { } while (0)
#endif
#define BTRFS_DEV_STATE_WRITEABLE (0)
@ -50,6 +50,7 @@ struct btrfs_io_geometry {
#define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
@ -71,7 +72,8 @@ struct btrfs_device {
blk_status_t last_flush_error;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
seqcount_t data_seqcount;
/* A seqcount_t with associated chunk_mutex (for lockdep) */
seqcount_mutex_t data_seqcount;
#endif
/* the internal btrfs device id */
@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \
static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \
preempt_disable(); \
write_seqcount_begin(&dev->data_seqcount); \
dev->name = size; \
write_seqcount_end(&dev->data_seqcount); \
preempt_enable(); \
}
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \

View File

@ -60,7 +60,7 @@ struct hfs_bnode {
wait_queue_head_t lock_wq;
atomic_t refcnt;
unsigned int page_offset;
struct page *page[0];
struct page *page[];
};
#define HFS_BNODE_ERROR 0

View File

@ -117,7 +117,7 @@ struct hfs_bnode {
wait_queue_head_t lock_wq;
atomic_t refcnt;
unsigned int page_offset;
struct page *page[0];
struct page *page[];
};
#define HFS_BNODE_LOCK 0

View File

@ -1365,6 +1365,9 @@ static void io_prep_async_work(struct io_kiocb *req)
io_req_init_async(req);
id = req->work.identity;
if (req->flags & REQ_F_FORCE_ASYNC)
req->work.flags |= IO_WQ_WORK_CONCURRENT;
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
@ -1846,59 +1849,39 @@ static void __io_free_req(struct io_kiocb *req)
percpu_ref_put(&ctx->refs);
}
static bool io_link_cancel_timeout(struct io_kiocb *req)
static void io_kill_linked_timeout(struct io_kiocb *req)
{
struct io_timeout_data *io = req->async_data;
struct io_ring_ctx *ctx = req->ctx;
int ret;
ret = hrtimer_try_to_cancel(&io->timer);
if (ret != -1) {
io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(ctx);
req->flags &= ~REQ_F_LINK_HEAD;
io_put_req_deferred(req, 1);
return true;
}
return false;
}
static bool __io_kill_linked_timeout(struct io_kiocb *req)
{
struct io_kiocb *link;
bool wake_ev;
bool cancelled = false;
unsigned long flags;
if (list_empty(&req->link_list))
return false;
link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
if (link->opcode != IORING_OP_LINK_TIMEOUT)
return false;
spin_lock_irqsave(&ctx->completion_lock, flags);
link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
link_list);
/*
* Can happen if a linked timeout fired and link had been like
* req -> link t-out -> link t-out [-> ...]
*/
if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
return false;
if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
struct io_timeout_data *io = link->async_data;
int ret;
list_del_init(&link->link_list);
wake_ev = io_link_cancel_timeout(link);
list_del_init(&link->link_list);
ret = hrtimer_try_to_cancel(&io->timer);
if (ret != -1) {
io_cqring_fill_event(link, -ECANCELED);
io_commit_cqring(ctx);
cancelled = true;
}
}
req->flags &= ~REQ_F_LINK_TIMEOUT;
return wake_ev;
}
static void io_kill_linked_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
bool wake_ev;
spin_lock_irqsave(&ctx->completion_lock, flags);
wake_ev = __io_kill_linked_timeout(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (wake_ev)
if (cancelled) {
io_cqring_ev_posted(ctx);
io_put_req(link);
}
}
static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
@ -4977,8 +4960,10 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
/* make sure double remove sees this as being gone */
wait->private = NULL;
spin_unlock(&poll->head->lock);
if (!done)
__io_async_wake(req, poll, mask, io_poll_task_func);
if (!done) {
/* use wait func handler, so it matches the rq type */
poll->wait.func(&poll->wait, mode, sync, key);
}
}
refcount_dec(&req->refs);
return 1;
@ -6180,7 +6165,6 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
{
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt;
const struct cred *old_creds = NULL;
int ret;
@ -6206,7 +6190,6 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
*/
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
if (!io_arm_poll_handler(req)) {
punt:
/*
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
@ -6216,33 +6199,25 @@ static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
if (linked_timeout)
io_queue_linked_timeout(linked_timeout);
goto exit;
}
} else if (likely(!ret)) {
/* drop submission reference */
req = io_put_req_find_next(req);
if (linked_timeout)
io_queue_linked_timeout(linked_timeout);
if (unlikely(ret)) {
if (req) {
if (!(req->flags & REQ_F_FORCE_ASYNC))
goto again;
io_queue_async_work(req);
}
} else {
/* un-prep timeout, so it'll be killed as any other linked */
req->flags &= ~REQ_F_LINK_TIMEOUT;
req_set_fail_links(req);
io_put_req(req);
io_req_complete(req, ret);
goto exit;
}
/* drop submission reference */
nxt = io_put_req_find_next(req);
if (linked_timeout)
io_queue_linked_timeout(linked_timeout);
if (nxt) {
req = nxt;
if (req->flags & REQ_F_FORCE_ASYNC) {
linked_timeout = NULL;
goto punt;
}
goto again;
}
exit:
if (old_creds)
revert_creds(old_creds);
}
@ -6266,13 +6241,6 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(ret))
goto fail_req;
}
/*
* Never try inline submit of IOSQE_ASYNC is set, go straight
* to async execution.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_CONCURRENT;
io_queue_async_work(req);
} else {
if (sqe) {

View File

@ -22,7 +22,7 @@ struct SU_ER_s {
__u8 len_des;
__u8 len_src;
__u8 ext_ver;
__u8 data[0];
__u8 data[];
} __attribute__ ((packed));
struct RR_RR_s {
@ -44,7 +44,7 @@ struct RR_PN_s {
struct SL_component {
__u8 flags;
__u8 len;
__u8 text[0];
__u8 text[];
} __attribute__ ((packed));
struct RR_SL_s {
@ -54,7 +54,7 @@ struct RR_SL_s {
struct RR_NM_s {
__u8 flags;
char name[0];
char name[];
} __attribute__ ((packed));
struct RR_CL_s {
@ -71,7 +71,7 @@ struct stamp {
struct RR_TF_s {
__u8 flags;
struct stamp times[0]; /* Variable number of these beasts */
struct stamp times[]; /* Variable number of these beasts */
} __attribute__ ((packed));
/* Linux-specific extension for transparent decompression */

View File

@ -97,7 +97,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
struct poll_table_entry entries[0];
struct poll_table_entry entries[];
};
#define POLL_TABLE_FULL(table) \
@ -836,7 +836,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
struct poll_list {
struct poll_list *next;
int len;
struct pollfd entries[0];
struct pollfd entries[];
};
#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))

View File

@ -47,7 +47,7 @@ struct cppi5_host_desc_t {
u32 buf_info1;
u32 org_buf_len;
u64 org_buf_ptr;
u32 epib[0];
u32 epib[];
} __packed;
#define CPPI5_DESC_MIN_ALIGN (16U)
@ -139,7 +139,7 @@ struct cppi5_desc_epib_t {
*/
struct cppi5_monolithic_desc_t {
struct cppi5_desc_hdr_t hdr;
u32 epib[0];
u32 epib[];
};
#define CPPI5_INFO2_MDESC_DATA_OFFSET_SHIFT (18U)

View File

@ -3287,7 +3287,7 @@ static inline ino_t parent_ino(struct dentry *dentry)
*/
struct simple_transaction_argresp {
ssize_t size;
char data[0];
char data[];
};
#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))

View File

@ -14,7 +14,7 @@
*/
struct zynqmp_ipi_message {
size_t len;
u8 data[0];
u8 data[];
};
#endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */

View File

@ -5823,7 +5823,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits {
u8 reserved_at_68[0x10];
u8 num_of_actions[0x8];
union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0];
union mlx5_ifc_set_add_copy_action_in_auto_bits actions[];
};
struct mlx5_ifc_dealloc_modify_header_context_out_bits {
@ -9761,7 +9761,7 @@ struct mlx5_ifc_mcda_reg_bits {
u8 reserved_at_60[0x20];
u8 data[0][0x20];
u8 data[][0x20];
};
enum {

View File

@ -1419,7 +1419,7 @@ struct ec_response_flash_info_2 {
uint16_t num_banks_total;
/* Number of banks described in banks array. */
uint16_t num_banks_desc;
struct ec_flash_bank banks[0];
struct ec_flash_bank banks[];
} __ec_align4;
/*
@ -2420,12 +2420,12 @@ struct ec_response_motion_sense_fifo_info {
/* Total amount of vector lost */
uint16_t total_lost;
/* Lost events since the last fifo_info, per sensors */
uint16_t lost[0];
uint16_t lost[];
} __ec_todo_packed;
struct ec_response_motion_sense_fifo_data {
uint32_t number_data;
struct ec_response_motion_sensor_data data[0];
struct ec_response_motion_sensor_data data[];
} __ec_todo_packed;
/* List supported activity recognition */
@ -3093,7 +3093,7 @@ struct ec_response_tmp006_get_calibration_v1 {
uint8_t algorithm;
uint8_t num_params;
uint8_t reserved[2];
float val[0];
float val[];
} __ec_align4;
struct ec_params_tmp006_set_calibration_v1 {
@ -3101,7 +3101,7 @@ struct ec_params_tmp006_set_calibration_v1 {
uint8_t algorithm;
uint8_t num_params;
uint8_t reserved;
float val[0];
float val[];
} __ec_align4;
@ -5076,7 +5076,7 @@ struct ec_response_pd_log {
uint8_t type; /* event type : see PD_EVENT_xx below */
uint8_t size_port; /* [7:5] port number [4:0] payload size in bytes */
uint16_t data; /* type-defined data payload */
uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
uint8_t payload[]; /* optional additional data payload: 0..16 bytes */
} __ec_align4;
/* The timestamp is the microsecond counter shifted to get about a ms. */
@ -5789,7 +5789,7 @@ struct ec_response_fp_encryption_status {
struct ec_response_tp_frame_info {
uint32_t n_frames;
uint32_t frame_sizes[0];
uint32_t frame_sizes[];
} __ec_align4;
/* Create a snapshot of current frame readings */

View File

@ -69,7 +69,7 @@ struct cros_ec_command {
uint32_t outsize;
uint32_t insize;
uint32_t result;
uint8_t data[0];
uint8_t data[];
};
/**

View File

@ -52,6 +52,16 @@ struct vdpa_device {
int nvqs;
};
/**
* vDPA IOVA range - the IOVA range support by the device
* @first: start of the IOVA range
* @last: end of the IOVA range
*/
struct vdpa_iova_range {
u64 first;
u64 last;
};
/**
* vDPA_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
@ -151,6 +161,10 @@ struct vdpa_device {
* @get_generation: Get device config generation (optional)
* @vdev: vdpa device
* Returns u32: device generation
* @get_iova_range: Get supported iova range (optional)
* @vdev: vdpa device
* Returns the iova range supported by
* the device.
* @set_map: Set device memory mapping (optional)
* Needed for device that using device
* specific DMA translation (on-chip IOMMU)
@ -216,6 +230,7 @@ struct vdpa_config_ops {
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev);
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);

View File

@ -146,4 +146,8 @@
/* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
/* Get the valid iova range */
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
struct vhost_vdpa_iova_range)
#endif

View File

@ -138,6 +138,15 @@ struct vhost_vdpa_config {
__u8 buf[0];
};
/* vhost vdpa IOVA range
* @first: First address that can be mapped by vhost-vDPA
* @last: Last address that can be mapped by vhost-vDPA
*/
struct vhost_vdpa_iova_range {
__u64 first;
__u64 last;
};
/* Feature bits */
/* Log all write descriptors. Can be changed while device is active. */
#define VHOST_F_LOG_ALL 26

View File

@ -530,7 +530,7 @@ struct module_param_attrs
{
unsigned int num;
struct attribute_group grp;
struct param_attribute attrs[0];
struct param_attribute attrs[];
};
#ifdef CONFIG_SYSFS

View File

@ -345,7 +345,7 @@ DESC_ID((id) - DESCS_COUNT(desc_ring))
*/
struct prb_data_block {
unsigned long id;
char data[0];
char data[];
};
/*

View File

@ -50,7 +50,7 @@ static bool ok_to_free_tracepoints;
*/
struct tp_probes {
struct rcu_head rcu;
struct tracepoint_func probes[0];
struct tracepoint_func probes[];
};
static inline void *allocate_probes(int count)

View File

@ -933,7 +933,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
sg_miter_start(&miter, sgl, nents, sg_flags);
if (!sg_miter_skip(&miter, skip))
return false;
return 0;
while ((offset < buflen) && sg_miter_next(&miter)) {
unsigned int len;

View File

@ -12,12 +12,13 @@
struct msft_cp_read_supported_features {
__u8 sub_opcode;
} __packed;
struct msft_rp_read_supported_features {
__u8 status;
__u8 sub_opcode;
__le64 features;
__u8 evt_prefix_len;
__u8 evt_prefix[0];
__u8 evt_prefix[];
} __packed;
struct msft_data {

View File

@ -124,7 +124,7 @@ struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
u8 reserved[16];
u8 user_eids[0][SMC_MAX_EID_LEN];
u8 user_eids[][SMC_MAX_EID_LEN];
};
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
@ -143,7 +143,7 @@ struct smc_clc_msg_smcd { /* SMC-D GID information */
struct smc_clc_smcd_v2_extension {
u8 system_eid[SMC_MAX_EID_LEN];
u8 reserved[16];
struct smc_clc_smcd_gid_chid gidchid[0];
struct smc_clc_smcd_gid_chid gidchid[];
};
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */

View File

@ -101,7 +101,7 @@ struct ima_template_entry {
struct tpm_digest *digests;
struct ima_template_desc *template_desc; /* template descriptor */
u32 template_data_len;
struct ima_field_data template_data[0]; /* template related data */
struct ima_field_data template_data[]; /* template related data */
};
struct ima_queue_entry {