Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:SRIZkP4w2m
qemu
hw-nvme-implement-pi-pass-read-write-com.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File hw-nvme-implement-pi-pass-read-write-com.patch of Package qemu
From: Dmitry Tihov <d.tihov@yadro.com> Date: Wed, 11 May 2022 19:09:35 +0000 Subject: hw/nvme: implement pi pass read/write commands Read and write protection information metadata from/to integrity capable bdrv_host_device. Using block level transfer of Protection Information. Signed-off-by: Dmitry Tihov <d.tihov@yadro.com> --- hw/nvme/ctrl.c | 13 +- hw/nvme/dif.c | 300 +++++++++++++++++++++++++++++++++++++++++++ hw/nvme/nvme.h | 16 +++ hw/nvme/trace-events | 4 + 4 files changed, 330 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 5f573c417b3d66c30814a74b192a..a5b8facf6afabef6c61a45b56355 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1909,7 +1909,7 @@ static void nvme_rw_cb(void *opaque, int ret) goto out; } - if (ns->lbaf.ms) { + if (ns->lbaf.ms && !ns->pip) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; @@ -3109,7 +3109,9 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) } } - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + if (ns->pip) { + return nvme_dif_pass_rw(n, req); + } else if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { return nvme_dif_rw(n, req); } @@ -3139,6 +3141,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; uint16_t ctrl = le16_to_cpu(rw->control); uint8_t prinfo = NVME_RW_PRINFO(ctrl); + bool pract = !!(prinfo & NVME_PRINFO_PRACT); uint64_t data_size = nvme_l2b(ns, nlb); uint64_t mapped_size = data_size; uint64_t data_offset; @@ -3237,7 +3240,11 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, data_offset = nvme_l2b(ns, slba); - if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + if (ns->pip) { + if (!wrz || pract) { + return nvme_dif_pass_rw(n, req); + } + } else if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { return nvme_dif_rw(n, req); } diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 5dbd18b2a4a59161a5eba4a1bdb6..b312de6ba04c06c1978ba9f52110 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -507,3 +507,303 @@ err: return status; } + +void nvme_dif_pass_dump(uint8_t *mdata_buf, size_t mdata_len) +{ + NvmeDifTuple *mdata = (NvmeDifTuple *) mdata_buf; + size_t i, dif_count; + dif_count = mdata_len / sizeof(NvmeDifTuple); + for (i = 0; i < dif_count; ++i, mdata++) { + trace_pci_nvme_dif_dump_pass_pi(i + 1, be16_to_cpu(mdata->guard), + be16_to_cpu(mdata->apptag), + be32_to_cpu(mdata->reftag)); + } +} + +static void nvme_dif_pass_read_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeCtrl *n = nvme_ctrl(req); + NvmeDifPassContext *ctx = req->opaque; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + bool pract = !!(prinfo & NVME_PRINFO_PRACT); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t slba = le64_to_cpu(rw->slba); + uint16_t status; + + trace_pci_nvme_dif_pass_read_cb(nvme_cid(req), ctx->iov.dif.iov_len >> 3); + if (trace_event_get_state_backends(TRACE_PCI_NVME_DIF_DUMP_PASS_PI)) { + nvme_dif_pass_dump(ctx->iov.dif.iov_base, ctx->iov.dif.iov_len); + } + + /* block layer returns EILSEQ in case of integrity check failure */ + /* determine exact pi error and return status accordingly */ + if (unlikely(ret == -EILSEQ)) { + req->status = nvme_dif_pass_check(ns, ctx->data.bounce, ctx->data.len, + ctx->iov.dif.iov_base, prinfo, slba, reftag); + if (req->status) { + /* zero out ret to allow req->status passthrough */ + ret = 0; + } + goto out; + } + + if (ret) { + goto out; + } + + status = nvme_dif_pass_apptag_check(ns, ctx->iov.dif.iov_base, + ctx->iov.dif.iov_len, prinfo, apptag, appmask); + if (status) { + req->status = status; + goto out; + } + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.len, + NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + goto out; + } + + if (!pract) { + status = nvme_bounce_mdata(n, ctx->iov.dif.iov_base, + ctx->iov.dif.iov_len, NVME_TX_DIRECTION_FROM_DEVICE, req); + if (status) { + req->status = status; + } + } + +out: + qemu_iovec_destroy_pi(&ctx->iov); + qemu_vfree(ctx->data.bounce); + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + +static void nvme_diff_pass_write_cb(void *opaque, int ret) +{ + NvmeRequest *req = opaque; + NvmeDifPassContext *ctx = req->opaque; + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t slba = le64_to_cpu(rw->slba); + + trace_pci_nvme_dif_pass_write_cb(nvme_cid(req), ctx->iov.dif.iov_len >> 3); + if (trace_event_get_state_backends(TRACE_PCI_NVME_DIF_DUMP_PASS_PI)) { + nvme_dif_pass_dump(ctx->iov.dif.iov_base, ctx->iov.dif.iov_len); + } + + /* block layer returns EILSEQ in case of integrity check failure */ + /* determine exact pi error and return status accordingly */ + if (unlikely(ret == -EILSEQ)) { + req->status = nvme_dif_pass_check(ns, ctx->data.bounce, ctx->data.len, + ctx->iov.dif.iov_base, prinfo, slba, reftag); + if (req->status) { + /* zero out ret to allow req->status passthrough */ + ret = 0; + } + } + + qemu_iovec_destroy_pi(&ctx->iov); + qemu_vfree(ctx->data.bounce); + g_free(ctx); + + nvme_rw_complete_cb(req, ret); +} + +uint16_t nvme_dif_pass_rw(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); + uint16_t apptag = le16_to_cpu(rw->apptag); + uint16_t appmask = le16_to_cpu(rw->appmask); + uint32_t reftag = le32_to_cpu(rw->reftag); + bool pract = !!(prinfo & NVME_PRINFO_PRACT); + NvmeNamespace *ns = req->ns; + BlockBackend *blk = ns->blkconf.blk; + bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint64_t slba = le64_to_cpu(rw->slba); + size_t len = nvme_l2b(ns, nlb); + int64_t offset = nvme_l2b(ns, slba); + NvmeDifPassContext *ctx; + uint16_t status; + + trace_pci_nvme_dif_pass_rw(nvme_cid(req), + NVME_ID_NS_DPS_TYPE(ns->id_ns.dps), prinfo, apptag, appmask, reftag); + + ctx = g_new0(NvmeDifPassContext, 1); + qemu_iovec_init_pi(&ctx->iov, 1, nlb); + ctx->data.len = len; + ctx->data.bounce = qemu_memalign(qemu_real_host_page_size, ctx->data.len); + qemu_iovec_add(&ctx->iov, ctx->data.bounce, ctx->data.len); + + req->opaque = ctx; + + status = nvme_check_prinfo(ns, prinfo, slba, reftag); + if (status) { + goto err; + } + status = nvme_map_dptr(n, &req->sg, len, &req->cmd); + if (status) { + goto err; + } + + if (req->cmd.opcode == NVME_CMD_READ) { + block_acct_start(blk_get_stats(blk), &req->acct, ctx->iov.size, + BLOCK_ACCT_READ); + + req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->iov, 0, + nvme_dif_pass_read_cb, req); + + return NVME_NO_COMPLETE; + } + + if (wrz) { + + assert(pract); + + if (prinfo & NVME_PRINFO_PRCHK_MASK) { + status = NVME_INVALID_PROT_INFO | NVME_DNR; + goto err; + } + uint8_t *mbuf, *end; + + mbuf = ctx->iov.dif.iov_base; + end = mbuf + ctx->iov.dif.iov_len; + + for (; mbuf < end; mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf); + + dif->apptag = cpu_to_be16(apptag); + dif->reftag = cpu_to_be32(reftag); + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + reftag++; + } + } + memset(ctx->data.bounce, 0, ctx->data.len); + + req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->iov, 0, + nvme_diff_pass_write_cb, req); + + } else { + + status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.len, + NVME_TX_DIRECTION_TO_DEVICE, req); + if (status) { + goto err; + } + if (pract) { + nvme_dif_pract_generate_dif(ns, ctx->data.bounce, + ctx->data.len, ctx->iov.dif.iov_base, + ctx->iov.dif.iov_len, apptag, &reftag); + } else { + status = nvme_bounce_mdata(n, ctx->iov.dif.iov_base, + ctx->iov.dif.iov_len, NVME_TX_DIRECTION_TO_DEVICE, + req); + if (status) { + goto err; + } + status = nvme_dif_pass_apptag_check(ns, ctx->iov.dif.iov_base, + ctx->iov.dif.iov_len, prinfo, apptag, appmask); + if (status) { + goto err; + } + } + + block_acct_start(blk_get_stats(blk), &req->acct, ctx->iov.size, + BLOCK_ACCT_WRITE); + + req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->iov, 0, + nvme_diff_pass_write_cb, req); + + } + + return NVME_NO_COMPLETE; + +err: + qemu_iovec_destroy_pi(&ctx->iov); + qemu_vfree(ctx->data.bounce); + g_free(ctx); + + return status; +} + +uint16_t nvme_dif_pass_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, uint8_t prinfo, uint64_t slba, + uint32_t reftag) +{ + Error *local_err = NULL; + uint16_t status; + + status = nvme_check_prinfo(ns, prinfo, slba, reftag); + if (status) { + return status; + } + + uint8_t *end = buf + len; + + for (uint8_t *bufp = buf, *mbufp = mbuf; bufp < end; bufp += ns->lbasz, + mbufp += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)mbufp; + + if (be16_to_cpu(dif->guard) != crc_t10dif(0x0, bufp, ns->lbasz)) { + if (prinfo & NVME_PRINFO_PRCHK_GUARD) { + return NVME_E2E_GUARD_ERROR; + } else { + error_setg(&local_err, "Nvme namespace %u, backed by %s" + " drive, can not pass custom guard tag", + nvme_nsid(ns), blk_name(ns->blkconf.blk)); + error_report_err(local_err); + return NVME_INTERNAL_DEV_ERROR; + } + } + + if (be32_to_cpu(dif->reftag) != reftag) { + if (prinfo & NVME_PRINFO_PRCHK_REF) { + return NVME_E2E_REF_ERROR; + } else if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != + NVME_ID_NS_DPS_TYPE_3) { + error_setg(&local_err, "Nvme namespace %u, backed by %s" + " drive can not pass custom ref tag", + nvme_nsid(ns), blk_name(ns->blkconf.blk)); + error_report_err(local_err); + return NVME_INTERNAL_DEV_ERROR; + } + } + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + reftag++; + } + } + + return NVME_SUCCESS; +} + +uint16_t nvme_dif_pass_apptag_check(NvmeNamespace *ns, uint8_t *mbuf, + size_t mlen, uint8_t prinfo, + uint16_t apptag, uint16_t appmask) +{ + if (prinfo & NVME_PRINFO_PRCHK_APP) { + uint8_t *end = mbuf + mlen; + for (uint8_t *mbufp = mbuf; mbufp < end; mbufp += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)mbufp; + if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { + return NVME_E2E_APP_ERROR; + } + } + } + + return NVME_SUCCESS; +} diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index cf5bc874451dc5d4c8159fafd99f..8b5fc067c3b16a8d6b287602a54b 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -554,5 +554,21 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint16_t appmask, uint32_t *reftag); uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); +typedef struct NvmeDifPassContext { + struct { + uint8_t *bounce; + size_t len; + } data; + QEMUIOVector iov; +} NvmeDifPassContext; + +uint16_t nvme_dif_pass_rw(NvmeCtrl *n, NvmeRequest *req); +void nvme_dif_pass_dump(uint8_t *mdata_buf, size_t mdata_len); +uint16_t nvme_dif_pass_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, uint8_t prinfo, uint64_t slba, + uint32_t reftag); +uint16_t nvme_dif_pass_apptag_check(NvmeNamespace *ns, uint8_t *mbuf, + size_t mlen, uint8_t prinfo, + uint16_t apptag, uint16_t appmask); #endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ff6cafd520df81ab8be6d061671b..5081627ddd1db56ff6b494c4ecd2 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -16,6 +16,10 @@ pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16"" pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" +pci_nvme_dif_pass_rw(uint16_t cid, uint8_t type, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" type %"PRIu8" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pass_read_cb(uint16_t cid, size_t count) "cid %"PRIu16" number of DIF elements %zu" +pci_nvme_dif_pass_write_cb(uint16_t cid, size_t count) "cid %"PRIu16" number of DIF elements %zu" +pci_nvme_dif_dump_pass_pi(size_t dif_num, uint16_t guard, uint16_t apptag, uint32_t reftag) "DIF element %zu guard tag 0x%"PRIx16" apptag 0x%"PRIx16" reftag 0x%"PRIx32"" pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor