nvme updates for Linux 6.13

- Use uring_cmd helper (Pavel)
  - Host Memory Buffer allocation enhancements (Christoph)
  - Target persistent reservation support (Guixin)
  - Persistent reservation tracing (Guixen)
  - NVMe 2.1 specification support (Keith)
  - Rotational Meta Support (Matias, Wang, Keith)
  - Volatile cache detection enhancment (Guixen)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE3Fbyvv+648XNRdHTPe3zGtjzRgkFAmc0264ACgkQPe3zGtjz
 RgmhvhAAzEVbniR/OBlJbqZ+rdwEHOj181XJIWUD72yZUVl2akikYq88JpiMCfcS
 pwdVAdFDEfvMjyIGpWXqE/G2NIYzb2qdGC0D3q5e/CgH/mxJ+5zJKKjj+6pqtWBt
 BJnoJ0YZcTnLXQWOrY6NxUOVn2LxxtvrKArCbh467GnDxWF7WJbwv+wkbPZZ78YR
 6IYRQU0La/uAvdpZ+ijHEOdieHtN3uJtu1AxxCFOK9gMpbHq92tm4Ya6bF09VDbG
 F+Ywhuu/gZkglTL5jEUvtt1Jd4VlhtGzBC2BhCFeSI54IwjhV3UFCajQeBh0zT/V
 Ca2VkFMAO1/Z3gRuK1QtEYkAf6Bwv591zpsoUEYvvlolXDL2aRKT5Jggwe/SMYYI
 ZA/3dSW/gRAV+bny2htVMK2n+hcn+VXhFaJlpZ7kSySK0b89wMlQ96BupTnmfyMD
 PdgVVaWVQ4onQcEu7/ItD9uFVe9tvTCH12MXRqlgJx4iM0w4ucpBh8QdOdHxMorD
 0bVCE4oLSbw6XJrfKmlytHJs4ZMdmNEoXzaJuBMsPDAlCvZiaihzTusIY7dWq4xi
 xNt6mQOOriONNpYRlaBrBGsinmQx6Ysz8q60RT9mLmGAwwI/nY9r1oxAd4ZknhKv
 c9clP0F20uO3se8vKUMbXOeGe8ZETD+S94hcGtHp9uxF8w6DfnQ=
 =Vi8L
 -----END PGP SIGNATURE-----

Merge tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme into for-6.13/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.13

 - Use uring_cmd helper (Pavel)
 - Host Memory Buffer allocation enhancements (Christoph)
 - Target persistent reservation support (Guixin)
 - Persistent reservation tracing (Guixen)
 - NVMe 2.1 specification support (Keith)
 - Rotational Meta Support (Matias, Wang, Keith)
 - Volatile cache detection enhancment (Guixen)"

* tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme: (22 commits)
  nvmet: add tracing of reservation commands
  nvme: parse reservation commands's action and rtype to string
  nvmet: report ns's vwc not present
  nvme: check ns's volatile write cache not present
  nvme: add rotational support
  nvme: use command set independent id ns if available
  nvmet: support for csi identify ns
  nvmet: implement rotational media information log
  nvmet: implement endurance groups
  nvmet: declare 2.1 version compliance
  nvmet: implement crto property
  nvmet: implement supported features log
  nvmet: implement supported log pages
  nvmet: implement active command set ns list
  nvmet: implement id ns for nvm command set
  nvmet: support reservation feature
  nvme: add reservation command's defines
  nvme-core: remove repeated wq flags
  nvmet: make nvmet_wq visible in sysfs
  nvme-pci: use dma_alloc_noncontigous if possible
  ...
This commit is contained in:
Jens Axboe 2024-11-13 10:43:11 -07:00
commit 15da3dd3f5
14 changed files with 1968 additions and 52 deletions

View File

@ -42,6 +42,8 @@ struct nvme_ns_info {
bool is_readonly;
bool is_ready;
bool is_removed;
bool is_rotational;
bool no_vwc;
};
unsigned int admin_timeout = 60;
@ -1615,6 +1617,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
}
kfree(id);
return ret;
@ -2157,11 +2161,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);
if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
else
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
if (info->is_rotational)
lim.features |= BLK_FEAT_ROTATIONAL;
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
@ -3608,6 +3615,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = info->nsid;
head->ids = info->ids;
head->shared = info->is_shared;
head->rotational = info->is_rotational;
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
kref_init(&head->ref);
@ -3988,7 +3996,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_info info = { .nsid = nsid };
struct nvme_ns *ns;
int ret;
int ret = 1;
if (nvme_identify_ns_descs(ctrl, &info))
return;
@ -4005,9 +4013,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
* set up a namespace. If not fall back to the legacy version.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
ctrl->vs >= NVME_VS(2, 0, 0))
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
else
if (ret > 0)
ret = nvme_ns_info_from_identify(ctrl, &info);
if (info.is_removed)
@ -5006,6 +5015,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
@ -5014,22 +5025,20 @@ static inline void _nvme_check_size(void)
static int __init nvme_core_init(void)
{
unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
int result = -ENOMEM;
_nvme_check_size();
nvme_wq = alloc_workqueue("nvme-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
if (!nvme_wq)
goto out;
nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
if (!nvme_reset_wq)
goto destroy_wq;
nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
if (!nvme_delete_wq)
goto destroy_reset_wq;

View File

@ -401,7 +401,7 @@ struct nvme_uring_cmd_pdu {
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
struct io_uring_cmd *ioucmd)
{
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
}
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
@ -631,8 +631,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
ret = nvme_uring_cmd_checks(issue_flags);
if (ret)
return ret;

View File

@ -474,6 +474,7 @@ struct nvme_ns_head {
struct list_head entry;
struct kref ref;
bool shared;
bool rotational;
bool passthru_err_log_enabled;
struct nvme_effects_log *effects;
u64 nuse;

View File

@ -141,6 +141,7 @@ struct nvme_dev {
struct nvme_ctrl ctrl;
u32 last_ps;
bool hmb;
struct sg_table *hmb_sgt;
mempool_t *iod_mempool;
@ -153,6 +154,7 @@ struct nvme_dev {
/* host memory buffer support: */
u64 host_mem_size;
u32 nr_host_mem_descs;
u32 host_mem_descs_size;
dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
@ -1951,7 +1953,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
return ret;
}
static void nvme_free_host_mem(struct nvme_dev *dev)
static void nvme_free_host_mem_multi(struct nvme_dev *dev)
{
int i;
@ -1966,18 +1968,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
kfree(dev->host_mem_desc_bufs);
dev->host_mem_desc_bufs = NULL;
dma_free_coherent(dev->dev,
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
}
static void nvme_free_host_mem(struct nvme_dev *dev)
{
if (dev->hmb_sgt)
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
dev->hmb_sgt, DMA_BIDIRECTIONAL);
else
nvme_free_host_mem_multi(dev);
dma_free_coherent(dev->dev, dev->host_mem_descs_size,
dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL;
dev->host_mem_descs_size = 0;
dev->nr_host_mem_descs = 0;
}
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
{
dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
if (!dev->hmb_sgt)
return -ENOMEM;
dev->host_mem_descs = dma_alloc_coherent(dev->dev,
sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
GFP_KERNEL);
if (!dev->host_mem_descs) {
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
dev->hmb_sgt, DMA_BIDIRECTIONAL);
dev->hmb_sgt = NULL;
return -ENOMEM;
}
dev->host_mem_size = size;
dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
dev->nr_host_mem_descs = 1;
dev->host_mem_descs[0].addr =
cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
return 0;
}
static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
u32 max_entries, len;
u32 max_entries, len, descs_size;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
@ -1990,8 +2028,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;
descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
&descs_dma, GFP_KERNEL);
descs_size = max_entries * sizeof(*descs);
descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
GFP_KERNEL);
if (!descs)
goto out;
@ -2020,6 +2059,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
dev->host_mem_size = size;
dev->host_mem_descs = descs;
dev->host_mem_descs_dma = descs_dma;
dev->host_mem_descs_size = descs_size;
dev->host_mem_desc_bufs = bufs;
return 0;
@ -2034,8 +2074,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
kfree(bufs);
out_free_descs:
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
descs_dma);
dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
out:
dev->host_mem_descs = NULL;
return -ENOMEM;
@ -2047,9 +2086,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
u64 chunk_size;
/*
* If there is an IOMMU that can merge pages, try a virtually
* non-contiguous allocation for a single segment first.
*/
if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
if (!nvme_alloc_host_mem_single(dev, preferred))
return 0;
}
/* start big and work our way down */
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min)
return 0;
nvme_free_host_mem(dev);
@ -2097,8 +2145,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
}
dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
dev->host_mem_size >> ilog2(SZ_1M));
"allocated %lld MiB host memory buffer (%u segment%s).\n",
dev->host_mem_size >> ilog2(SZ_1M),
dev->nr_host_mem_descs,
str_plural(dev->nr_host_mem_descs));
}
ret = nvme_set_host_mem(dev, enable_bits);

View File

@ -228,27 +228,61 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrega_strs[] = {
[0x00] = "register",
[0x01] = "unregister",
[0x02] = "replace",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrega = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 ptpl = (cdw10[3] >> 6) & 0x3;
const char *rrega_str;
trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
rrega, iekey, ptpl);
if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
rrega_str = rrega_strs[rrega];
else
rrega_str = "reserved";
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
rrega, rrega_str, iekey, ptpl);
trace_seq_putc(p, 0);
return ret;
}
static const char * const rtype_strs[] = {
[0x00] = "reserved",
[0x01] = "write exclusive",
[0x02] = "exclusive access",
[0x03] = "write exclusive registrants only",
[0x04] = "exclusive access registrants only",
[0x05] = "write exclusive all registrants",
[0x06] = "exclusive access all registrants",
};
static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
static const char * const racqa_strs[] = {
[0x00] = "acquire",
[0x01] = "preempt",
[0x02] = "preempt and abort",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 racqa = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *racqa_str = "reserved";
const char *rtype_str = "reserved";
trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
racqa, iekey, rtype);
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
racqa_str = racqa_strs[racqa];
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];
trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
racqa, racqa_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;
@ -256,13 +290,25 @@ static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrela_strs[] = {
[0x00] = "release",
[0x01] = "clear",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrela = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *rrela_str = "reserved";
const char *rtype_str = "reserved";
trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
rrela, iekey, rtype);
if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
rrela_str = rrela_strs[rrela];
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
rrela, rrela_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;

View File

@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o

View File

@ -71,6 +71,35 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
nvmet_req_complete(req, 0);
}
static void nvmet_execute_get_supported_log_pages(struct nvmet_req *req)
{
struct nvme_supported_log *logs;
u16 status;
logs = kzalloc(sizeof(*logs), GFP_KERNEL);
if (!logs) {
status = NVME_SC_INTERNAL;
goto out;
}
logs->lids[NVME_LOG_SUPPORTED] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_ERROR] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_SMART] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_FW_SLOT] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_CHANGED_NS] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_CMD_EFFECTS] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_ENDURANCE_GROUP] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_ANA] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_FEATURES] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_RMI] = cpu_to_le32(NVME_LIDS_LSUPP);
logs->lids[NVME_LOG_RESERVATION] = cpu_to_le32(NVME_LIDS_LSUPP);
status = nvmet_copy_to_sgl(req, 0, logs, sizeof(*logs));
kfree(logs);
out:
nvmet_req_complete(req, status);
}
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@ -130,6 +159,45 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
return NVME_SC_SUCCESS;
}
static void nvmet_execute_get_log_page_rmi(struct nvmet_req *req)
{
struct nvme_rotational_media_log *log;
struct gendisk *disk;
u16 status;
req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
req->cmd->get_log_page.lsi));
status = nvmet_req_find_ns(req);
if (status)
goto out;
if (!req->ns->bdev || bdev_nonrot(req->ns->bdev)) {
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
if (req->transfer_len != sizeof(*log)) {
status = NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
goto out;
}
log = kzalloc(sizeof(*log), GFP_KERNEL);
if (!log)
goto out;
log->endgid = req->cmd->get_log_page.lsi;
disk = req->ns->bdev->bd_disk;
if (disk && disk->ia_ranges)
log->numa = cpu_to_le16(disk->ia_ranges->nr_ia_ranges);
else
log->numa = cpu_to_le16(1);
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
kfree(log);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
{
struct nvme_smart_log *log;
@ -176,6 +244,10 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
log->iocs[nvme_cmd_read] =
log->iocs[nvme_cmd_flush] =
log->iocs[nvme_cmd_dsm] =
log->iocs[nvme_cmd_resv_acquire] =
log->iocs[nvme_cmd_resv_register] =
log->iocs[nvme_cmd_resv_release] =
log->iocs[nvme_cmd_resv_report] =
cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
log->iocs[nvme_cmd_write] =
log->iocs[nvme_cmd_write_zeroes] =
@ -272,6 +344,49 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
return struct_size(desc, nsids, count);
}
static void nvmet_execute_get_log_page_endgrp(struct nvmet_req *req)
{
u64 host_reads, host_writes, data_units_read, data_units_written;
struct nvme_endurance_group_log *log;
u16 status;
/*
* The target driver emulates each endurance group as its own
* namespace, reusing the nsid as the endurance group identifier.
*/
req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
req->cmd->get_log_page.lsi));
status = nvmet_req_find_ns(req);
if (status)
goto out;
log = kzalloc(sizeof(*log), GFP_KERNEL);
if (!log) {
status = NVME_SC_INTERNAL;
goto out;
}
if (!req->ns->bdev)
goto copy;
host_reads = part_stat_read(req->ns->bdev, ios[READ]);
data_units_read =
DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000);
host_writes = part_stat_read(req->ns->bdev, ios[WRITE]);
data_units_written =
DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000);
put_unaligned_le64(host_reads, &log->hrc[0]);
put_unaligned_le64(data_units_read, &log->dur[0]);
put_unaligned_le64(host_writes, &log->hwc[0]);
put_unaligned_le64(data_units_written, &log->duw[0]);
copy:
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
kfree(log);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
struct nvme_ana_rsp_hdr hdr = { 0, };
@ -317,12 +432,44 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_log_page_features(struct nvmet_req *req)
{
struct nvme_supported_features_log *features;
u16 status;
features = kzalloc(sizeof(*features), GFP_KERNEL);
if (!features) {
status = NVME_SC_INTERNAL;
goto out;
}
features->fis[NVME_FEAT_NUM_QUEUES] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
features->fis[NVME_FEAT_KATO] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
features->fis[NVME_FEAT_ASYNC_EVENT] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
features->fis[NVME_FEAT_HOST_ID] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
features->fis[NVME_FEAT_WRITE_PROTECT] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
features->fis[NVME_FEAT_RESV_MASK] =
cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
status = nvmet_copy_to_sgl(req, 0, features, sizeof(*features));
kfree(features);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
case NVME_LOG_SUPPORTED:
return nvmet_execute_get_supported_log_pages(req);
case NVME_LOG_ERROR:
return nvmet_execute_get_log_page_error(req);
case NVME_LOG_SMART:
@ -338,8 +485,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
return nvmet_execute_get_log_changed_ns(req);
case NVME_LOG_CMD_EFFECTS:
return nvmet_execute_get_log_cmd_effects_ns(req);
case NVME_LOG_ENDURANCE_GROUP:
return nvmet_execute_get_log_page_endgrp(req);
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
case NVME_LOG_FEATURES:
return nvmet_execute_get_log_page_features(req);
case NVME_LOG_RMI:
return nvmet_execute_get_log_page_rmi(req);
case NVME_LOG_RESERVATION:
return nvmet_execute_get_log_page_resv(req);
}
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
@ -433,7 +588,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES);
NVME_CTRL_ONCS_WRITE_ZEROES |
NVME_CTRL_ONCS_RESERVATIONS);
/* XXX: don't report vwc if the underlying device is write through */
id->vwc = NVME_CTRL_VWC_PRESENT;
@ -467,6 +623,13 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->msdbd = ctrl->ops->msdbd;
/*
* Endurance group identifier is 16 bits, so we can't let namespaces
* overflow that since we reuse the nsid
*/
BUILD_BUG_ON(NVMET_MAX_NAMESPACES > USHRT_MAX);
id->endgidmax = cpu_to_le16(NVMET_MAX_NAMESPACES);
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
id->anatt = 10; /* random value */
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
@ -551,6 +714,21 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->nmic = NVME_NS_NMIC_SHARED;
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
if (req->ns->pr.enable)
id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF;
/*
* Since we don't know any better, every namespace is its own endurance
* group.
*/
id->endgid = cpu_to_le16(req->ns->nsid);
memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
id->lbaf[0].ds = req->ns->blksize_shift;
@ -576,7 +754,40 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
static void nvmet_execute_identify_endgrp_list(struct nvmet_req *req)
{
u16 min_endgid = le16_to_cpu(req->cmd->identify.cnssid);
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
unsigned long idx;
__le16 *list;
u16 status;
int i = 1;
list = kzalloc(buf_size, GFP_KERNEL);
if (!list) {
status = NVME_SC_INTERNAL;
goto out;
}
xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_endgid)
continue;
list[i++] = cpu_to_le16(ns->nsid);
if (i == buf_size / sizeof(__le16))
break;
}
list[0] = cpu_to_le16(i - 1);
status = nvmet_copy_to_sgl(req, 0, list, buf_size);
kfree(list);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_nslist(struct nvmet_req *req, bool match_css)
{
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@ -606,6 +817,8 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_nsid)
continue;
if (match_css && req->ns->csi != req->cmd->identify.csi)
continue;
list[i++] = cpu_to_le32(ns->nsid);
if (i == buf_size / sizeof(__le32))
break;
@ -685,6 +898,56 @@ static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
}
static void nvme_execute_identify_ns_nvm(struct nvmet_req *req)
{
u16 status;
status = nvmet_req_find_ns(req);
if (status)
goto out;
status = nvmet_copy_to_sgl(req, 0, ZERO_PAGE(0),
NVME_IDENTIFY_DATA_SIZE);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_id_cs_indep(struct nvmet_req *req)
{
struct nvme_id_ns_cs_indep *id;
u16 status;
status = nvmet_req_find_ns(req);
if (status)
goto out;
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
id->nstat = NVME_NSTAT_NRDY;
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
id->nmic = NVME_NS_NMIC_SHARED;
if (req->ns->readonly)
id->nsattr |= NVME_NS_ATTR_RO;
if (req->ns->bdev && !bdev_nonrot(req->ns->bdev))
id->nsfeat |= NVME_NS_ROTATIONAL;
/*
* We need flush command to flush the file's metadata,
* so report supporting vwc if backend is file, even
* though buffered_io is disable.
*/
if (req->ns->bdev && !bdev_write_cache(req->ns->bdev))
id->nsfeat |= NVME_NS_VWC_NOT_PRESENT;
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify(struct nvmet_req *req)
{
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
@ -698,7 +961,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
nvmet_execute_identify_ctrl(req);
return;
case NVME_ID_CNS_NS_ACTIVE_LIST:
nvmet_execute_identify_nslist(req);
nvmet_execute_identify_nslist(req, false);
return;
case NVME_ID_CNS_NS_DESC_LIST:
nvmet_execute_identify_desclist(req);
@ -706,8 +969,8 @@ static void nvmet_execute_identify(struct nvmet_req *req)
case NVME_ID_CNS_CS_NS:
switch (req->cmd->identify.csi) {
case NVME_CSI_NVM:
/* Not supported */
break;
nvme_execute_identify_ns_nvm(req);
return;
case NVME_CSI_ZNS:
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
nvmet_execute_identify_ns_zns(req);
@ -729,6 +992,15 @@ static void nvmet_execute_identify(struct nvmet_req *req)
break;
}
break;
case NVME_ID_CNS_NS_ACTIVE_LIST_CS:
nvmet_execute_identify_nslist(req, true);
return;
case NVME_ID_CNS_NS_CS_INDEP:
nvmet_execute_id_cs_indep(req);
return;
case NVME_ID_CNS_ENDGRP_LIST:
nvmet_execute_identify_endgrp_list(req);
return;
}
pr_debug("unhandled identify cns %d on qid %d\n",
@ -861,6 +1133,9 @@ void nvmet_execute_set_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_set_feat_write_protect(req);
break;
case NVME_FEAT_RESV_MASK:
status = nvmet_set_feat_resv_notif_mask(req, cdw11);
break;
default:
req->error_loc = offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
@ -959,6 +1234,9 @@ void nvmet_execute_get_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_get_feat_write_protect(req);
break;
case NVME_FEAT_RESV_MASK:
status = nvmet_get_feat_resv_notif_mask(req);
break;
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);

View File

@ -769,6 +769,32 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page)
{
return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable);
}
static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
if (kstrtobool(page, &val))
return -EINVAL;
mutex_lock(&ns->subsys->lock);
if (ns->enabled) {
pr_err("the ns:%d is already enabled.\n", ns->nsid);
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
ns->pr.enable = val;
mutex_unlock(&ns->subsys->lock);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, resv_enable);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@ -777,6 +803,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
&nvmet_ns_attr_resv_enable,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif

View File

@ -611,6 +611,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ret)
goto out_restore_subsys_maxnsid;
if (ns->pr.enable) {
ret = nvmet_pr_init_ns(ns);
if (ret)
goto out_remove_from_subsys;
}
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
@ -620,6 +626,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
mutex_unlock(&subsys->lock);
return ret;
out_remove_from_subsys:
xa_erase(&subsys->namespaces, ns->nsid);
out_restore_subsys_maxnsid:
subsys->max_nsid = nvmet_max_nsid(subsys);
percpu_ref_exit(&ns->ref);
@ -663,6 +671,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
if (ns->pr.enable)
nvmet_pr_exit_ns(ns);
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
@ -754,6 +765,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
{
struct nvmet_ns *ns = req->ns;
struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;
if (!req->sq->sqhd_disabled)
nvmet_update_sq_head(req);
@ -766,6 +778,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
trace_nvmet_req_complete(req);
req->ops->queue_response(req);
if (pc_ref)
nvmet_pr_put_ns_pc_ref(pc_ref);
if (ns)
nvmet_put_namespace(ns);
}
@ -929,18 +944,39 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
}
if (req->ns->pr.enable) {
ret = nvmet_parse_pr_cmd(req);
if (!ret)
return ret;
}
switch (req->ns->csi) {
case NVME_CSI_NVM:
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
return nvmet_bdev_parse_io_cmd(req);
ret = nvmet_file_parse_io_cmd(req);
else
ret = nvmet_bdev_parse_io_cmd(req);
break;
case NVME_CSI_ZNS:
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return nvmet_bdev_zns_parse_io_cmd(req);
return NVME_SC_INVALID_IO_CMD_SET;
ret = nvmet_bdev_zns_parse_io_cmd(req);
else
ret = NVME_SC_INVALID_IO_CMD_SET;
break;
default:
return NVME_SC_INVALID_IO_CMD_SET;
ret = NVME_SC_INVALID_IO_CMD_SET;
}
if (ret)
return ret;
if (req->ns->pr.enable) {
ret = nvmet_pr_check_cmd_access(req);
if (ret)
return ret;
ret = nvmet_pr_get_ns_pc_ref(req);
}
return ret;
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@ -964,6 +1000,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->ns = NULL;
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
req->pc_ref = NULL;
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
@ -1015,6 +1052,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
void nvmet_req_uninit(struct nvmet_req *req)
{
percpu_ref_put(&req->sq->ref);
if (req->pc_ref)
nvmet_pr_put_ns_pc_ref(req->pc_ref);
if (req->ns)
nvmet_put_namespace(req->ns);
}
@ -1383,7 +1422,8 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
}
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
uuid_t *hostid)
{
struct nvmet_subsys *subsys;
struct nvmet_ctrl *ctrl;
@ -1462,6 +1502,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
}
ctrl->cntlid = ret;
uuid_copy(&ctrl->hostid, hostid);
/*
* Discovery controllers may use some arbitrary high value
* in order to cleanup stale discovery sessions
@ -1478,6 +1520,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_start_keep_alive_timer(ctrl);
mutex_lock(&subsys->lock);
ret = nvmet_ctrl_init_pr(ctrl);
if (ret)
goto init_pr_fail;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
nvmet_setup_p2p_ns_map(ctrl, req);
nvmet_debugfs_ctrl_setup(ctrl);
@ -1486,6 +1531,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
*ctrlp = ctrl;
return 0;
init_pr_fail:
mutex_unlock(&subsys->lock);
nvmet_stop_keep_alive_timer(ctrl);
ida_free(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_changed_ns_list:
@ -1504,6 +1553,7 @@ static void nvmet_ctrl_free(struct kref *ref)
struct nvmet_subsys *subsys = ctrl->subsys;
mutex_lock(&subsys->lock);
nvmet_ctrl_destroy_pr(ctrl);
nvmet_release_p2p_ns_map(ctrl);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
@ -1717,7 +1767,7 @@ static int __init nvmet_init(void)
goto out_free_zbd_work_queue;
nvmet_wq = alloc_workqueue("nvmet-wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0);
if (!nvmet_wq)
goto out_free_buffered_work_queue;

View File

@ -64,6 +64,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
case NVME_REG_CSTS:
val = ctrl->csts;
break;
case NVME_REG_CRTO:
val = NVME_CAP_TIMEOUT(ctrl->csts);
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
@ -245,12 +248,10 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
le32_to_cpu(c->kato), &ctrl, &d->hostid);
if (status)
goto out;
uuid_copy(&ctrl->hostid, &d->hostid);
dhchap_status = nvmet_setup_auth(ctrl);
if (dhchap_status) {
pr_err("Failed to setup authentication, dhchap status %u\n",

View File

@ -20,8 +20,9 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
#include <linux/kfifo.h>
#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
#define NVMET_DEFAULT_VS NVME_VS(2, 1, 0)
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@ -30,6 +31,7 @@
#define NVMET_MN_MAX_SIZE 40
#define NVMET_SN_MAX_SIZE 20
#define NVMET_FR_MAX_SIZE 8
#define NVMET_PR_LOG_QUEUE_SIZE 64
/*
* Supported optional AENs:
@ -56,6 +58,38 @@
#define IPO_IATTR_CONNECT_SQE(x) \
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
struct nvmet_pr_registrant {
u64 rkey;
uuid_t hostid;
enum nvme_pr_type rtype;
struct list_head entry;
struct rcu_head rcu;
};
struct nvmet_pr {
bool enable;
unsigned long notify_mask;
atomic_t generation;
struct nvmet_pr_registrant __rcu *holder;
/*
* During the execution of the reservation command, mutual
* exclusion is required throughout the process. However,
* while waiting asynchronously for the 'per controller
* percpu_ref' to complete before the 'preempt and abort'
* command finishes, a semaphore is needed to ensure mutual
* exclusion instead of a mutex.
*/
struct semaphore pr_sem;
struct list_head registrant_list;
};
struct nvmet_pr_per_ctrl_ref {
struct percpu_ref ref;
struct completion free_done;
struct completion confirm_done;
uuid_t hostid;
};
struct nvmet_ns {
struct percpu_ref ref;
struct file *bdev_file;
@ -85,6 +119,8 @@ struct nvmet_ns {
int pi_type;
int metadata_size;
u8 csi;
struct nvmet_pr pr;
struct xarray pr_per_ctrl_refs;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@ -191,6 +227,13 @@ static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
}
struct nvmet_pr_log_mgr {
struct mutex lock;
u64 lost_count;
u64 counter;
DECLARE_KFIFO(log_queue, struct nvme_pr_log, NVMET_PR_LOG_QUEUE_SIZE);
};
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
@ -246,6 +289,7 @@ struct nvmet_ctrl {
u8 *dh_key;
size_t dh_keysize;
#endif
struct nvmet_pr_log_mgr pr_log_mgr;
};
struct nvmet_subsys {
@ -396,6 +440,9 @@ struct nvmet_req {
struct work_struct zmgmt_work;
} z;
#endif /* CONFIG_BLK_DEV_ZONED */
struct {
struct work_struct abort_work;
} r;
};
int sg_cnt;
int metadata_sg_cnt;
@ -412,6 +459,7 @@ struct nvmet_req {
struct device *p2p_client;
u16 error_loc;
u64 error_slba;
struct nvmet_pr_per_ctrl_ref *pc_ref;
};
#define NVMET_MAX_MPOOL_BVEC 16
@ -498,7 +546,8 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
uuid_t *hostid);
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
const char *hostnqn, u16 cntlid,
struct nvmet_req *req);
@ -761,4 +810,18 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
#endif
int nvmet_pr_init_ns(struct nvmet_ns *ns);
u16 nvmet_parse_pr_cmd(struct nvmet_req *req);
u16 nvmet_pr_check_cmd_access(struct nvmet_req *req);
int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl);
void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl);
void nvmet_pr_exit_ns(struct nvmet_ns *ns);
void nvmet_execute_get_log_page_resv(struct nvmet_req *req);
u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask);
u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req);
u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req);
static inline void nvmet_pr_put_ns_pc_ref(struct nvmet_pr_per_ctrl_ref *pc_ref)
{
percpu_ref_put(&pc_ref->ref);
}
#endif /* _NVMET_H */

1156
drivers/nvme/target/pr.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -180,6 +180,106 @@ static const char *nvmet_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
return ret;
}
static const char *nvmet_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrega_strs[] = {
[0x00] = "register",
[0x01] = "unregister",
[0x02] = "replace",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrega = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 ptpl = (cdw10[3] >> 6) & 0x3;
const char *rrega_str;
if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
rrega_str = rrega_strs[rrega];
else
rrega_str = "reserved";
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
rrega, rrega_str, iekey, ptpl);
trace_seq_putc(p, 0);
return ret;
}
static const char * const rtype_strs[] = {
[0x00] = "reserved",
[0x01] = "write exclusive",
[0x02] = "exclusive access",
[0x03] = "write exclusive registrants only",
[0x04] = "exclusive access registrants only",
[0x05] = "write exclusive all registrants",
[0x06] = "exclusive access all registrants",
};
static const char *nvmet_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
static const char * const racqa_strs[] = {
[0x00] = "acquire",
[0x01] = "preempt",
[0x02] = "preempt and abort",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 racqa = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *racqa_str = "reserved";
const char *rtype_str = "reserved";
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
racqa_str = racqa_strs[racqa];
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];
trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
racqa, racqa_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvmet_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
static const char * const rrela_strs[] = {
[0x00] = "release",
[0x01] = "clear",
};
const char *ret = trace_seq_buffer_ptr(p);
u8 rrela = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
const char *rrela_str = "reserved";
const char *rtype_str = "reserved";
if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
rrela_str = rrela_strs[rrela];
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
rtype_str = rtype_strs[rtype];
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
rrela, rrela_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;
}
static const char *nvmet_trace_resv_report(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
u32 numd = get_unaligned_le32(cdw10);
u8 eds = cdw10[4] & 0x1;
trace_seq_printf(p, "numd=%u, eds=%u", numd, eds);
trace_seq_putc(p, 0);
return ret;
}
const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
u8 opcode, u8 *cdw10)
{
@ -195,6 +295,14 @@ const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
return nvmet_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvmet_trace_zone_mgmt_recv(p, cdw10);
case nvme_cmd_resv_register:
return nvmet_trace_resv_reg(p, cdw10);
case nvme_cmd_resv_acquire:
return nvmet_trace_resv_acq(p, cdw10);
case nvme_cmd_resv_release:
return nvmet_trace_resv_rel(p, cdw10);
case nvme_cmd_resv_report:
return nvmet_trace_resv_report(p, cdw10);
default:
return nvmet_trace_common(p, cdw10);
}

View File

@ -327,7 +327,8 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
__u8 rsvd338[4];
__le16 nvmsetidmax;
__le16 endgidmax;
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
@ -522,6 +523,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
@ -530,6 +532,7 @@ enum {
NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
NVME_ID_CNS_NS_GRANULARITY = 0x16,
NVME_ID_CNS_UUID_LIST = 0x17,
NVME_ID_CNS_ENDGRP_LIST = 0x19,
};
enum {
@ -560,6 +563,8 @@ enum {
NVME_NS_FLBAS_LBA_SHIFT = 1,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_NMIC_SHARED = 1 << 0,
NVME_NS_ROTATIONAL = 1 << 4,
NVME_NS_VWC_NOT_PRESENT = 1 << 5,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
@ -617,6 +622,40 @@ enum {
NVME_NIDT_CSI = 0x04,
};
struct nvme_endurance_group_log {
__u8 egcw;
__u8 egfeat;
__u8 rsvd2;
__u8 avsp;
__u8 avspt;
__u8 pused;
__le16 did;
__u8 rsvd8[24];
__u8 ee[16];
__u8 dur[16];
__u8 duw[16];
__u8 muw[16];
__u8 hrc[16];
__u8 hwc[16];
__u8 mdie[16];
__u8 neile[16];
__u8 tegcap[16];
__u8 uegcap[16];
__u8 rsvd192[320];
};
struct nvme_rotational_media_log {
__le16 endgid;
__le16 numa;
__le16 nrs;
__u8 rsvd6[2];
__le32 spinc;
__le32 fspinc;
__le32 ldc;
__le32 fldc;
__u8 rsvd24[488];
};
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@ -1244,6 +1283,7 @@ enum {
NVME_FEAT_WRITE_PROTECT = 0x84,
NVME_FEAT_VENDOR_START = 0xC0,
NVME_FEAT_VENDOR_END = 0xFF,
NVME_LOG_SUPPORTED = 0x00,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
@ -1254,6 +1294,8 @@ enum {
NVME_LOG_TELEMETRY_CTRL = 0x08,
NVME_LOG_ENDURANCE_GROUP = 0x09,
NVME_LOG_ANA = 0x0c,
NVME_LOG_FEATURES = 0x12,
NVME_LOG_RMI = 0x16,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@ -1261,6 +1303,24 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
struct nvme_supported_log {
__le32 lids[256];
};
enum {
NVME_LIDS_LSUPP = 1 << 0,
};
struct nvme_supported_features_log {
__le32 fis[256];
};
enum {
NVME_FIS_FSUPP = 1 << 0,
NVME_FIS_NSCPE = 1 << 20,
NVME_FIS_CSCPE = 1 << 21,
};
/* NVMe Namespace Write Protect State */
enum {
NVME_NS_NO_WRITE_PROTECT = 0,
@ -1281,7 +1341,8 @@ struct nvme_identify {
__u8 cns;
__u8 rsvd3;
__le16 ctrlid;
__u8 rsvd11[3];
__le16 cnssid;
__u8 rsvd11;
__u8 csi;
__u32 rsvd12[4];
};
@ -1389,7 +1450,7 @@ struct nvme_get_log_page_command {
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
__u16 rsvd11;
__le16 lsi;
union {
struct {
__le32 lpol;
@ -2037,4 +2098,72 @@ struct nvme_completion {
#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
#define NVME_TERTIARY(ver) ((ver) & 0xff)
enum {
NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00,
};
enum {
NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
};
enum {
NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1,
NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2,
NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3,
};
struct nvme_pr_log {
__le64 count;
__u8 type;
__u8 nr_pages;
__u8 rsvd1[2];
__le32 nsid;
__u8 rsvd2[48];
};
struct nvmet_pr_register_data {
__le64 crkey;
__le64 nrkey;
};
struct nvmet_pr_acquire_data {
__le64 crkey;
__le64 prkey;
};
struct nvmet_pr_release_data {
__le64 crkey;
};
enum nvme_pr_capabilities {
NVME_PR_SUPPORT_PTPL = 1,
NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1,
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2,
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3,
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4,
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5,
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6,
NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7,
};
enum nvme_pr_register_action {
NVME_PR_REGISTER_ACT_REG = 0,
NVME_PR_REGISTER_ACT_UNREG = 1,
NVME_PR_REGISTER_ACT_REPLACE = 1 << 1,
};
enum nvme_pr_acquire_action {
NVME_PR_ACQUIRE_ACT_ACQUIRE = 0,
NVME_PR_ACQUIRE_ACT_PREEMPT = 1,
NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1,
};
enum nvme_pr_release_action {
NVME_PR_RELEASE_ACT_RELEASE = 0,
NVME_PR_RELEASE_ACT_CLEAR = 1,
};
#endif /* _LINUX_NVME_H */