From 87cafa082ce395ff8a28ab23ff97dfe108604bf2 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Wed, 13 Nov 2024 19:47:15 -0500 Subject: [PATCH 01/66] dt-bindings: iommu: arm,smmu: add sdm670 adreno iommu compatible SDM670 has a separate IOMMU for the GPU, like SDM845. Add the compatible for it. Signed-off-by: Richard Acayan Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20241114004713.42404-5-mailingradian@gmail.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index c1e11bc6b7a0..d88ad03c2f6b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -122,6 +122,7 @@ properties: - qcom,msm8996-smmu-v2 - qcom,sc7180-smmu-v2 - qcom,sdm630-smmu-v2 + - qcom,sdm670-smmu-v2 - qcom,sdm845-smmu-v2 - qcom,sm6350-smmu-v2 - qcom,sm7150-smmu-v2 From 42314738906380cbd3b6e9caf3ad34e1b2d66035 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Wed, 13 Nov 2024 19:47:16 -0500 Subject: [PATCH 02/66] iommu/arm-smmu-qcom: add sdm670 adreno iommu compatible Add the compatible for the separate IOMMU on SDM670 for the Adreno GPU. This IOMMU has the compatible strings: "qcom,sdm670-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2" While the SMMU 500 doesn't need an entry for this specific SoC, the SMMU v2 compatible should have its own entry, as the fallback entry in arm-smmu.c handles "qcom,smmu-v2" without per-process page table support unless there is an entry here. This entry can't be the "qcom,adreno-smmu" compatible because dedicated GPU IOMMUs can also be SMMU 500 with different handling. Signed-off-by: Richard Acayan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241114004713.42404-6-mailingradian@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6372f3e25c4b..601fb878d0ef 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -567,6 +567,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, From d1e22c7145af42f26f05771897a9fd5162212edb Mon Sep 17 00:00:00 2001 From: Qingqing Zhou Date: Fri, 22 Nov 2024 13:19:21 +0530 Subject: [PATCH 03/66] dt-bindings: arm-smmu: document QCS615 GPU SMMU Add the compatible for Qualcomm QCS615 GPU SMMU. Add the compatible in the list of 3 clocks required by the GPU SMMU. Remove the compatible from the "no clocks" list. Signed-off-by: Qingqing Zhou Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241122074922.28153-2-quic_qqzhou@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index d88ad03c2f6b..9e29ef7cef77 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -88,6 +88,7 @@ properties: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qcs615-smmu-500 - qcom,sa8255p-smmu-500 - qcom,sa8775p-smmu-500 - qcom,sar2130p-smmu-500 @@ -475,6 +476,7 @@ allOf: items: - enum: - qcom,qcm2290-smmu-500 + - qcom,qcs615-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 - const: qcom,adreno-smmu @@ -560,7 +562,6 @@ allOf: - cavium,smmu-v2 - marvell,ap806-smmu-500 - nvidia,smmu-500 - - qcom,qcs615-smmu-500 - qcom,qcs8300-smmu-500 - qcom,qdu1000-smmu-500 - qcom,sa8255p-smmu-500 From 2593988fd00e1fa9401fa5f951c098682b41f2ef Mon Sep 17 00:00:00 2001 From: Melody Olvera Date: Wed, 4 Dec 2024 14:27:25 -0800 Subject: [PATCH 04/66] dt-bindings: arm-smmu: Document SM8750 SMMU Document the SM8750 SMMU block. Reviewed-by: Rob Herring (Arm) Signed-off-by: Melody Olvera Link: https://lore.kernel.org/r/20241204-sm8750_master_smmu-v2-1-9e73e3fc15f2@quicinc.com Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 9e29ef7cef77..032fdc27127b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -61,6 +61,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,sm8750-smmu-500 - qcom,x1e80100-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -103,6 +104,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,sm8750-smmu-500 - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 @@ -553,6 +555,23 @@ allOf: - description: GPU SNoC bus clock - description: GPU AHB clock + - if: + properties: + compatible: + items: + - const: qcom,sm8750-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + then: + properties: + clock-names: + items: + - const: hlos + clocks: + items: + - description: HLOS vote clock + # Disallow clocks for all other platforms with specific compatibles - if: properties: From 43ca55f5555b53d0c37039c62d75e882cccbfb69 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Tue, 3 Dec 2024 18:49:05 +0000 Subject: [PATCH 05/66] iommu/arm-smmu-v3: Introduce struct arm_smmu_event Introduce `struct arm_smmu_event` to represent event records. Parse out relevant fields from raw event records for ease and use the new `struct arm_smmu_event` instead. Signed-off-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20241203184906.2264528-2-praan@google.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 55 ++++++++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 18 +++++++ 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a5c7002ff75b..9fcba8fab9e3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1759,17 +1759,34 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ -static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) +static void arm_smmu_decode_event(u64 *raw, struct arm_smmu_event *event) +{ + event->id = FIELD_GET(EVTQ_0_ID, raw[0]); + event->sid = FIELD_GET(EVTQ_0_SID, raw[0]); + event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]); + event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID; + event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]); + event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]); + event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]); + event->read = FIELD_GET(EVTQ_1_RnW, raw[1]); + event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]); + event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]); + event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]); + event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]); + event->ipa = raw[3] & EVTQ_3_IPA; + event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR; +} + +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; struct arm_smmu_master *master; - bool ssid_valid = evt[0] & EVTQ_0_SSV; - u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); struct iopf_fault fault_evt = { }; struct iommu_fault *flt = &fault_evt.fault; - switch (FIELD_GET(EVTQ_0_ID, evt[0])) { + switch (event->id) { case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1779,35 +1796,35 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) return -EOPNOTSUPP; } - if (!(evt[1] & EVTQ_1_STALL)) + if (!event->stall) return -EOPNOTSUPP; - if (evt[1] & EVTQ_1_RnW) + if (event->read) perm |= IOMMU_FAULT_PERM_READ; else perm |= IOMMU_FAULT_PERM_WRITE; - if (evt[1] & EVTQ_1_InD) + if (event->instruction) perm |= IOMMU_FAULT_PERM_EXEC; - if (evt[1] & EVTQ_1_PnU) + if (event->privileged) perm |= IOMMU_FAULT_PERM_PRIV; flt->type = IOMMU_FAULT_PAGE_REQ; flt->prm = (struct iommu_fault_page_request) { .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), + .grpid = event->stag, .perm = perm, - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), + .addr = event->iova, }; - if (ssid_valid) { + if (event->ssv) { flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + flt->prm.pasid = event->ssid; } mutex_lock(&smmu->streams_mutex); - master = arm_smmu_find_master(smmu, sid); + master = arm_smmu_find_master(smmu, event->sid); if (!master) { ret = -EINVAL; goto out_unlock; @@ -1822,23 +1839,23 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { int i, ret; + u64 evt[EVTQ_ENT_DWORDS]; + struct arm_smmu_event event = {0}; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - u64 evt[EVTQ_ENT_DWORDS]; do { while (!queue_remove_raw(q, evt)) { - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); - - ret = arm_smmu_handle_evt(smmu, evt); + arm_smmu_decode_event(evt, &event); + ret = arm_smmu_handle_evt(smmu, &event); if (!ret || !__ratelimit(&rs)) continue; - dev_info(smmu->dev, "event 0x%02x received:\n", id); - for (i = 0; i < ARRAY_SIZE(evt); ++i) + dev_info(smmu->dev, "event 0x%02x received:\n", event.id); + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) dev_info(smmu->dev, "\t0x%016llx\n", (unsigned long long)evt[i]); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 0107d3f333a1..c37ed3c925ec 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -470,6 +470,7 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_1_TT_READ (1UL << 44) #define EVTQ_2_ADDR GENMASK_ULL(63, 0) #define EVTQ_3_IPA GENMASK_ULL(51, 12) +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 @@ -789,6 +790,23 @@ struct arm_smmu_stream { struct rb_node node; }; +struct arm_smmu_event { + u8 stall : 1, + ssv : 1, + privileged : 1, + instruction : 1, + s2 : 1, + read : 1; + u8 id; + u8 class; + u16 stag; + u32 sid; + u32 ssid; + u64 iova; + u64 ipa; + u64 fetch_addr; +}; + /* SMMU private data for each master */ struct arm_smmu_master { struct arm_smmu_device *smmu; From d814b70b9b901c823ddedd12757ca4a19b18c8f7 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Tue, 3 Dec 2024 18:49:06 +0000 Subject: [PATCH 06/66] iommu/arm-smmu-v3: Log better event records Currently, the driver dumps the raw hex for a received event record. Improve this by leveraging `struct arm_smmu_event` for event fields and log human-readable event records with meaningful information. Signed-off-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20241203184906.2264528-3-praan@google.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 114 +++++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 14 ++- 2 files changed, 115 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9fcba8fab9e3..143ff6336a95 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -83,6 +83,28 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; +static const char * const event_str[] = { + [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID", + [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH", + [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE", + [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED", + [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID", + [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH", + [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD", + [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION", + [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE", + [EVT_ID_ACCESS_FAULT] = "F_ACCESS", + [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION", + [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH", +}; + +static const char * const event_class_str[] = { + [0] = "CD fetch", + [1] = "Stage 1 translation table fetch", + [2] = "Input address caused fault", + [3] = "Reserved", +}; + static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); @@ -1759,8 +1781,11 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ -static void arm_smmu_decode_event(u64 *raw, struct arm_smmu_event *event) +static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) { + struct arm_smmu_master *master; + event->id = FIELD_GET(EVTQ_0_ID, raw[0]); event->sid = FIELD_GET(EVTQ_0_SID, raw[0]); event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]); @@ -1775,9 +1800,21 @@ static void arm_smmu_decode_event(u64 *raw, struct arm_smmu_event *event) event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]); event->ipa = raw[3] & EVTQ_3_IPA; event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR; + event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]); + event->class_tt = false; + event->dev = NULL; + + if (event->id == EVT_ID_PERMISSION_FAULT) + event->class_tt = (event->class == EVTQ_1_CLASS_TT); + + mutex_lock(&smmu->streams_mutex); + master = arm_smmu_find_master(smmu, event->sid); + if (master) + event->dev = get_device(master->dev); + mutex_unlock(&smmu->streams_mutex); } -static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, struct arm_smmu_event *event) { int ret = 0; @@ -1836,9 +1873,67 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, return ret; } +static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) +{ + int i; + + dev_err(smmu->dev, "event 0x%02x received:\n", event->id); + + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) + dev_err(smmu->dev, "\t0x%016llx\n", raw[i]); +} + +#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id]) +#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN" +#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)" + +static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *evt, + struct ratelimit_state *rs) +{ + if (!__ratelimit(rs)) + return; + + arm_smmu_dump_raw_event(smmu, raw, evt); + + switch (evt->id) { + case EVT_ID_TRANSLATION_FAULT: + case EVT_ID_ADDR_SIZE_FAULT: + case EVT_ID_ACCESS_FAULT: + case EVT_ID_PERMISSION_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->iova, evt->ipa); + + dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x", + evt->privileged ? "priv" : "unpriv", + evt->instruction ? "inst" : "data", + evt->read ? "read" : "write", + evt->s2 ? "s2" : "s1", event_class_str[evt->class], + evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "", + evt->stall ? " stall" : "", evt->stag); + + break; + + case EVT_ID_STE_FETCH_FAULT: + case EVT_ID_CD_FETCH_FAULT: + case EVT_ID_VMS_FETCH_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->fetch_addr); + + break; + + default: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid); + } +} + static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i, ret; u64 evt[EVTQ_ENT_DWORDS]; struct arm_smmu_event event = {0}; struct arm_smmu_device *smmu = dev; @@ -1849,16 +1944,11 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) do { while (!queue_remove_raw(q, evt)) { - arm_smmu_decode_event(evt, &event); - ret = arm_smmu_handle_evt(smmu, &event); - if (!ret || !__ratelimit(&rs)) - continue; - - dev_info(smmu->dev, "event 0x%02x received:\n", event.id); - for (i = 0; i < EVTQ_ENT_DWORDS; ++i) - dev_info(smmu->dev, "\t0x%016llx\n", - (unsigned long long)evt[i]); + arm_smmu_decode_event(smmu, evt, &event); + if (arm_smmu_handle_event(smmu, &event)) + arm_smmu_dump_event(smmu, evt, &event, &rs); + put_device(event.dev); cond_resched(); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index c37ed3c925ec..c7f37fd47768 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -452,10 +452,18 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVT_ID_BAD_STREAMID_CONFIG 0x02 +#define EVT_ID_STE_FETCH_FAULT 0x03 +#define EVT_ID_BAD_STE_CONFIG 0x04 +#define EVT_ID_STREAM_DISABLED_FAULT 0x06 +#define EVT_ID_BAD_SUBSTREAMID_CONFIG 0x08 +#define EVT_ID_CD_FETCH_FAULT 0x09 +#define EVT_ID_BAD_CD_CONFIG 0x0a #define EVT_ID_TRANSLATION_FAULT 0x10 #define EVT_ID_ADDR_SIZE_FAULT 0x11 #define EVT_ID_ACCESS_FAULT 0x12 #define EVT_ID_PERMISSION_FAULT 0x13 +#define EVT_ID_VMS_FETCH_FAULT 0x25 #define EVTQ_0_SSV (1UL << 11) #define EVTQ_0_SSID GENMASK_ULL(31, 12) @@ -467,6 +475,7 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_1_RnW (1UL << 35) #define EVTQ_1_S2 (1UL << 39) #define EVTQ_1_CLASS GENMASK_ULL(41, 40) +#define EVTQ_1_CLASS_TT 0x01 #define EVTQ_1_TT_READ (1UL << 44) #define EVTQ_2_ADDR GENMASK_ULL(63, 0) #define EVTQ_3_IPA GENMASK_ULL(51, 12) @@ -796,7 +805,9 @@ struct arm_smmu_event { privileged : 1, instruction : 1, s2 : 1, - read : 1; + read : 1, + ttrnw : 1, + class_tt : 1; u8 id; u8 class; u16 stag; @@ -805,6 +816,7 @@ struct arm_smmu_event { u64 iova; u64 ipa; u64 fetch_addr; + struct device *dev; }; /* SMMU private data for each master */ From 9b640ae7fbba13d45a8b9712dff2911a0c2b5ff4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Dec 2024 11:40:15 -0400 Subject: [PATCH 07/66] iommu/arm-smmuv3: Update comments about ATS and bypass The SMMUv3 spec has a note that BYPASS and ATS don't work together under the STE EATS field definition. However there is another section "13.6.4 Full ATS skipping stage 1" that explains under certain conditions BYPASS and ATS do work together if the STE is using S1DSS to select BYPASS and the CD table has the possibility for a substream. When these comments were written the understanding was that all forms of BYPASS just didn't work and this was to be a future problem to solve. It turns out that ATS and IDENTITY will always work just fine: - If STE.Config = BYPASS then the PCI ATS is disabled - If a PASID domain is attached then S1DSS = BYPASS and ATS will be enabled. This meets the requirements of 13.6.4 to automatically generate 1:1 ATS replies on the RID. Update the comments to reflect this. Fixes: 7497f4211f4f ("iommu/arm-smmu-v3: Make changing domains be hitless for ATS") Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-f27174f44f39+27a33-smmuv3_ats_note_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 143ff6336a95..45e87ce4b973 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2852,9 +2852,14 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * Translation Requests and Translated transactions are denied * as though ATS is disabled for the stream (STE.EATS == 0b00), * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events - * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be - * enabled if we have arm_smmu_domain, those always have page - * tables. + * (IHI0070Ea 5.2 Stream Table Entry). + * + * However, if we have installed a CD table and are using S1DSS + * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS + * skipping stage 1". + * + * Disable ATS if we are going to create a normal 0b100 bypass + * STE. */ state->ats_enabled = !state->disable_ats && arm_smmu_ats_supported(master); @@ -3177,8 +3182,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, if (arm_smmu_ssids_in_use(&master->cd_table)) { /* * If a CD table has to be present then we need to run with ATS - * on even though the RID will fail ATS queries with UR. This is - * because we have no idea what the PASID's need. + * on because we have to assume a PASID is using ATS. For + * IDENTITY this will setup things so that S1DSS=bypass which + * follows the explanation in "13.6.4 Full ATS skipping stage 1" + * and allows for ATS on the RID to work. */ state.cd_needs_ats = true; arm_smmu_attach_prepare(&state, domain); From 7d835134d4e13e9c30509fd24a42f8c2b94135ea Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 16:33:55 +0000 Subject: [PATCH 08/66] iommu/arm-smmu: Make instance lookup robust Relying on the driver list was a cute idea for minimising the scope of our SMMU device lookups, however it turns out to have a subtle flaw. The SMMU device only gets added to that list after arm_smmu_device_probe() returns success, so there's actually no way the iommu_device_register() call from there could ever work as intended, even if it wasn't already hampered by the fwspec setup not happening early enough. Switch both arm_smmu_get_by_fwnode() implementations to use a platform bus lookup instead, which *will* reliably work. Also make sure that we don't register SMMUv2 instances until we've fully initialised them, to avoid similar consequences of the lookup now finding a device with no drvdata. Moving the error returns is also a perfect excuse to streamline them with dev_err_probe() in the process. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/6d7ce1dc31873abdb75c895fb8bd2097cce098b4.1733406914.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +-- drivers/iommu/arm/arm-smmu/arm-smmu.c | 31 ++++++++++----------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 45e87ce4b973..dbacf8986fa7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3351,8 +3351,8 @@ static struct platform_driver arm_smmu_driver; static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 650664e0f6e3..0949f2734e5d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1411,8 +1411,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } @@ -2227,21 +2227,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) i, irq); } - err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, - "smmu.%pa", &smmu->ioaddr); - if (err) { - dev_err(dev, "Failed to register iommu in sysfs\n"); - return err; - } - - err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, - using_legacy_binding ? NULL : dev); - if (err) { - dev_err(dev, "Failed to register iommu\n"); - iommu_device_sysfs_remove(&smmu->iommu); - return err; - } - platform_set_drvdata(pdev, smmu); /* Check for RMRs and install bypass SMRs if any */ @@ -2250,6 +2235,18 @@ static int arm_smmu_device_probe(struct platform_device *pdev) arm_smmu_device_reset(smmu); arm_smmu_test_smr_masks(smmu); + err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, + "smmu.%pa", &smmu->ioaddr); + if (err) + return dev_err_probe(dev, err, "Failed to register iommu in sysfs\n"); + + err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, + using_legacy_binding ? NULL : dev); + if (err) { + iommu_device_sysfs_remove(&smmu->iommu); + return dev_err_probe(dev, err, "Failed to register iommu\n"); + } + /* * We want to avoid touching dev->power.lock in fastpaths unless * it's really going to do something useful - pm_runtime_enabled() From 97cb1fa0272646c2a033b05338bb8e0260879968 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 16:33:56 +0000 Subject: [PATCH 09/66] iommu/arm-smmu: Retire probe deferral workaround This reverts commit 229e6ee43d2a160a1592b83aad620d6027084aad. Now that the fundamental ordering issue between arm_smmu_get_by_fwnode() and iommu_device_register() is resolved, the race condition for client probe no longer exists either, so retire the specific workaround. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/4167c5dfa052d4c8bb780f0a30af63dcfc4ce6c1.1733406914.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 0949f2734e5d..79afc92e1d8b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1437,17 +1437,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) goto out_free; } else { smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - - /* - * Defer probe if the relevant SMMU instance hasn't finished - * probing yet. This is a fragile hack and we'd ideally - * avoid this race in the core code. Until that's ironed - * out, however, this is the most pragmatic option on the - * table. - */ - if (!smmu) - return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER, - "smmu dev has not bound yet\n")); } ret = -EINVAL; From fcbd621567420b3a2f21f49bbc056de8b273c625 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 16:33:57 +0000 Subject: [PATCH 10/66] iommu/arm-smmu-v3: Clean up more on probe failure kmemleak noticed that the iopf queue allocated deep down within arm_smmu_init_structures() can be leaked by a subsequent error return from arm_smmu_device_probe(). Furthermore, after arm_smmu_device_reset() we will also leave the SMMU enabled with an empty Stream Table, silently blocking all DMA. This proves rather annoying for debugging said probe failure, so let's handle it a bit better by putting the SMMU back into (more or less) the same state as if it hadn't probed at all. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5137901958471cf67f2fad5c2229f8a8f1ae901a.1733406914.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index dbacf8986fa7..d8ebe18a5507 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4777,7 +4777,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Initialise in-memory data structures */ ret = arm_smmu_init_structures(smmu); if (ret) - return ret; + goto err_free_iopf; /* Record our private device structure */ platform_set_drvdata(pdev, smmu); @@ -4788,22 +4788,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) - return ret; + goto err_disable; /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); if (ret) - return ret; + goto err_disable; ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - iommu_device_sysfs_remove(&smmu->iommu); - return ret; + goto err_free_sysfs; } return 0; + +err_free_sysfs: + iommu_device_sysfs_remove(&smmu->iommu); +err_disable: + arm_smmu_device_disable(smmu); +err_free_iopf: + iopf_queue_free(smmu->evtq.iopf); + return ret; } static void arm_smmu_device_remove(struct platform_device *pdev) From 46b3df8eb9bd035620bc48bd7a1f028490626621 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 16:33:58 +0000 Subject: [PATCH 11/66] iommu: Manage driver probe deferral better Since iommu_fwspec_init() absorbed the basic driver probe deferral check to wait for an IOMMU to register, we may as well handle the probe deferral timeout there as well. The current inconsistency of callers results in client devices deferring forever on an arm64 ACPI system where an SMMU has failed its own driver probe. Acked-by: Will Deacon Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/41fa59f156ef8d196d08fa75c4901e6d4b12e6c4.1733406914.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 2 +- drivers/iommu/of_iommu.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 599030e1e890..851fd5aeccf5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2819,7 +2819,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (!ops) - return -EPROBE_DEFER; + return driver_deferred_probe_check_state(dev); if (fwspec) return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e7a6a1611d19..97987cd78da9 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -29,8 +29,6 @@ static int of_iommu_xlate(struct device *dev, return -ENODEV; ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np)); - if (ret == -EPROBE_DEFER) - return driver_deferred_probe_check_state(dev); if (ret) return ret; From 6e192214c6c82c2f52238d5e1865f11594e58a6f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 13:48:09 +0000 Subject: [PATCH 12/66] iommu/arm-smmu-v3: Document SVA interaction with new pagetable features Process pagetables may now be using new permission-indirection-based features which an SMMU may not understand when given such a table for SVA. Although SMMUv3.4 does add its own S1PIE feature, realistically we're still going to have to cope with feature mismatches between CPUs and SMMUs, so let's start simple and essentially just document the expectations for what falls out as-is. Although it seems unlikely for SVA applications to also depend on memory-hardening features, or vice-versa, the relative lifecycles make it tricky to enforce mutual exclusivity. Thankfully our PIE index allocation makes it relatively benign for an SMMU to keep interpreting them as direct permissions, the only real implication is that an SVA application cannot harden itself against its own devices with these features. Thus, inform the user about that just in case they have other expectations. Also we don't (yet) support LPA2, so deny SVA entirely if we're going to misunderstand the pagetable format altogether. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/68a37b00a720f0827cac0e4f40e4d3a688924054.1733406275.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 1d3e71569775..9ba596430e7c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -112,6 +112,15 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, * from the current CPU register */ target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); + + /* + * Note that we don't bother with S1PIE on the SMMU, we just rely on + * our default encoding scheme matching direct permissions anyway. + * SMMU has no notion of S1POE nor GCS, so make sure that is clear if + * either is enabled for CPUs, just in case anyone imagines otherwise. + */ + if (system_supports_poe() || system_supports_gcs()) + dev_warn_once(master->smmu->dev, "SVA devices ignore permission overlays and GCS\n"); } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); @@ -206,8 +215,12 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) unsigned long asid_bits; u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; - if (vabits_actual == 52) + if (vabits_actual == 52) { + /* We don't support LPA2 */ + if (PAGE_SIZE != SZ_64K) + return false; feat_mask |= ARM_SMMU_FEAT_VAX; + } if ((smmu->features & feat_mask) != feat_mask) return false; From 48e7b8e284e5be9fd1b54b60246bcbe9711d43e4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Dec 2024 11:43:27 -0400 Subject: [PATCH 13/66] iommu/arm-smmu-v3: Remove arm_smmu_domain_finalise() during attach Domains are now always finalized during allocation because the core code no longer permits a NULL dev argument to domain_alloc_paging/_flags(). Remove the late finalize during attach that supported domains that were not fully initialized. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v1-0bb8d5313a27+27b-smmuv3_paging_flags_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 37 +++++---------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d8ebe18a5507..dc5fab268f7d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2460,7 +2460,6 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) if (!smmu_domain) return ERR_PTR(-ENOMEM); - mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); @@ -2469,7 +2468,9 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_domain *smmu_domain; + int ret; /* * Allocate the domain and initialise some of its data structures. @@ -2480,15 +2481,10 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); - if (dev) { - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - int ret; - - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); - if (ret) { - kfree(smmu_domain); - return ERR_PTR(ret); - } + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); + if (ret) { + kfree(smmu_domain); + return ERR_PTR(ret); } return &smmu_domain->domain; } @@ -2965,15 +2961,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - mutex_lock(&smmu_domain->init_mutex); - - if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - } else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - - mutex_unlock(&smmu_domain->init_mutex); - if (ret) + if (smmu_domain->smmu != smmu) return ret; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { @@ -3030,16 +3018,9 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cd target_cd; - int ret = 0; - mutex_lock(&smmu_domain->init_mutex); - if (!smmu_domain->smmu) - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - mutex_unlock(&smmu_domain->init_mutex); - if (ret) - return ret; + if (smmu_domain->smmu != smmu) + return -EINVAL; if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) return -EINVAL; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index c7f37fd47768..bd9d7c85576a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -843,7 +843,6 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; - struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; atomic_t nr_ats_masters; From bb857c5c015033026d82d404061b26bbb37c821d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Dec 2024 11:43:28 -0400 Subject: [PATCH 14/66] iommu/arm-smmu-v3: Make domain_alloc_paging_flags() directly determine the S1/S2 The selection of S1/S2 is a bit indirect today, make domain_alloc_paging_flags() directly decode the flags and select the correct S1/S2 type. Directly reject flag combinations the HW doesn't support when processing the flags. Fix missing rejection of some flag combinations that are not supported today (ie NEST_PARENT | DIRTY_TRACKING) by using a switch statement to list out exactly the combinations that are currently supported. Move the determination of the stage out of arm_smmu_domain_finalise() and into both callers. As today the default stage is S1 if supported in HW. This makes arm_smmu_domain_alloc_paging_flags() self contained and no longer calling arm_smmu_domain_alloc_paging(). Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v1-0bb8d5313a27+27b-smmuv3_paging_flags_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 +++++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index dc5fab268f7d..ef0d644ad34d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2481,6 +2481,11 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); + if (master->smmu->features & ARM_SMMU_FEAT_TRANS_S1) + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); if (ret) { kfree(smmu_domain); @@ -2554,12 +2559,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_domain *smmu_domain); bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; - /* Restrict the stage to what we can actually support */ - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, @@ -3231,6 +3230,7 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID | IOMMU_HWPT_ALLOC_NEST_PARENT; @@ -3242,25 +3242,43 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, if (user_data) return ERR_PTR(-EOPNOTSUPP); - if (flags & IOMMU_HWPT_ALLOC_PASID) - return arm_smmu_domain_alloc_paging(dev); - smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); - if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) { - if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) { + switch (flags) { + case 0: + /* Prefer S1 if available */ + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + break; + case IOMMU_HWPT_ALLOC_NEST_PARENT: + if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) { ret = -EOPNOTSUPP; goto err_free; } smmu_domain->stage = ARM_SMMU_DOMAIN_S2; smmu_domain->nest_parent = true; + break; + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID: + case IOMMU_HWPT_ALLOC_PASID: + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) { + ret = -EOPNOTSUPP; + goto err_free; + } + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -EOPNOTSUPP; + goto err_free; } smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags); if (ret) goto err_free; return &smmu_domain->domain; From cdfb9840fcc60b6e493aec077b1eecaa3268640b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 5 Dec 2024 11:43:29 -0400 Subject: [PATCH 15/66] iommu/arm-smmu-v3: Remove domain_alloc_paging() arm_smmu_domain_alloc_paging_flags() with a flags = 0 now does the same thing as arm_smmu_domain_alloc_paging(), remove arm_smmu_domain_alloc_paging(). Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v1-0bb8d5313a27+27b-smmuv3_paging_flags_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 31 --------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ef0d644ad34d..ea76f25c0661 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -105,8 +105,6 @@ static const char * const event_class_str[] = { [3] = "Reserved", }; -static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -2466,34 +2464,6 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) return smmu_domain; } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *smmu_domain; - int ret; - - /* - * Allocate the domain and initialise some of its data structures. - * We can't really do anything meaningful until we've added a - * master. - */ - smmu_domain = arm_smmu_domain_alloc(); - if (IS_ERR(smmu_domain)) - return ERR_CAST(smmu_domain); - - if (master->smmu->features & ARM_SMMU_FEAT_TRANS_S1) - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); - if (ret) { - kfree(smmu_domain); - return ERR_PTR(ret); - } - return &smmu_domain->domain; -} - static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -3656,7 +3626,6 @@ static struct iommu_ops arm_smmu_ops = { .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .hw_info = arm_smmu_hw_info, - .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags, .probe_device = arm_smmu_probe_device, From 4dcac8407fe1be21990f356e2e8d8309ba63e346 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Mon, 2 Dec 2024 14:06:03 +0000 Subject: [PATCH 16/66] iommu/io-pgtable-arm: Fix stage-2 concatenation with 16K At the moment, io-pgtable-arm uses concatenation only if it is possible at level 0, which misses a case where concatenation is mandatory at level 1 according to R_SRKBC in Arm spec DDI0487 K.a. Also, that means concatenation can be used when not mandated, contradicting the comment on the code. However, these cases can only happen if the SMMUv3 driver is changed to use ias != oas for stage-2. This patch re-writes the code to use concatenation only if mandatory, fixing the missing case for level-1 and granule 16K with PA = 40 bits. Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241202140604.422235-2-smostafa@google.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 45 +++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6b9bb58a414f..600e1f03a458 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -223,6 +223,33 @@ static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data) return ptes_per_table - (i & (ptes_per_table - 1)); } +/* + * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a) + * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0. + * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup + * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas) + * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas) + * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas) + * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas) + */ +static inline bool arm_lpae_concat_mandatory(struct arm_lpae_io_pgtable *data) +{ + unsigned int ias = data->iop.cfg.ias; + unsigned int oas = data->iop.cfg.oas; + + /* Covers 1 and 2.d */ + if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0)) + return (oas == 48) || (ias == 48); + + /* Covers 2.a and 2.c */ + if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0)) + return (oas == 40) || (oas == 42); + + /* Case 2.b */ + return (ARM_LPAE_GRANULE(data) == SZ_16K) && + (data->start_level == 1) && (oas == 40); +} + static bool selftest_running = false; static dma_addr_t __arm_lpae_dma_addr(void *pages) @@ -1006,18 +1033,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) if (!data) return NULL; - /* - * Concatenate PGDs at level 1 if possible in order to reduce - * the depth of the stage-2 walk. - */ - if (data->start_level == 0) { - unsigned long pgd_pages; - - pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); - if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { - data->pgd_bits += data->bits_per_level; - data->start_level++; - } + if (arm_lpae_concat_mandatory(data)) { + if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) > + ARM_LPAE_S2_MAX_CONCAT_PAGES)) + return NULL; + data->pgd_bits += data->bits_per_level; + data->start_level++; } /* VTCR */ From 376ce8b35ed15d5deee57bdecd8449f6a4df4c42 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Mon, 2 Dec 2024 14:06:04 +0000 Subject: [PATCH 17/66] iommu/io-pgtable-arm: Add coverage for different OAS in selftest Run selftests with different OAS values intead of hardcoding it to 48 bits. We always keep OAS >= IAS to make the config valid for stage-2. This can be further improved, if we split IAS/OAS configuration for stage-1 and stage-2 (to use input sizes compatible with VA_BITS as SMMUv3 does, or IAS > OAS which is valid for stage-1). However, that adds more complexity, and the current change improves coverage and makes it possible to test all concatenation cases. Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241202140604.422235-3-smostafa@google.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 600e1f03a458..c1b62c7d81ba 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1385,15 +1385,14 @@ static int __init arm_lpae_do_selftests(void) SZ_64K | SZ_512M, }; - static const unsigned int ias[] __initconst = { + static const unsigned int address_size[] __initconst = { 32, 36, 40, 42, 44, 48, }; - int i, j, pass = 0, fail = 0; + int i, j, k, pass = 0, fail = 0; struct device dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, - .oas = 48, .coherent_walk = true, .iommu_dev = &dev, }; @@ -1402,15 +1401,19 @@ static int __init arm_lpae_do_selftests(void) set_dev_node(&dev, NUMA_NO_NODE); for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { - for (j = 0; j < ARRAY_SIZE(ias); ++j) { - cfg.pgsize_bitmap = pgsize[i]; - cfg.ias = ias[j]; - pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", - pgsize[i], ias[j]); - if (arm_lpae_run_tests(&cfg)) - fail++; - else - pass++; + for (j = 0; j < ARRAY_SIZE(address_size); ++j) { + /* Don't use ias > oas as it is not valid for stage-2. */ + for (k = 0; k <= j; ++k) { + cfg.pgsize_bitmap = pgsize[i]; + cfg.ias = address_size[k]; + cfg.oas = address_size[j]; + pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u OAS %u\n", + pgsize[i], cfg.ias, cfg.oas); + if (arm_lpae_run_tests(&cfg)) + fail++; + else + pass++; + } } } From d5f88acdd6ff84607043a6845b81e4be148f9fd9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 12 Nov 2024 14:35:06 +0100 Subject: [PATCH 18/66] iommu/riscv: Add support for platform msi Apply platform_device_msi_init_and_alloc_irqs() to add support for MSIs when the IOMMU is a platform device. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20241112133504.491984-4-ajones@ventanamicro.com Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu-platform.c | 102 ++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c index 382ba2841849..c722eebcdd9b 100644 --- a/drivers/iommu/riscv/iommu-platform.c +++ b/drivers/iommu/riscv/iommu-platform.c @@ -11,18 +11,43 @@ */ #include +#include +#include #include #include #include "iommu-bits.h" #include "iommu.h" +static void riscv_iommu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + struct device *dev = msi_desc_to_dev(desc); + struct riscv_iommu_device *iommu = dev_get_drvdata(dev); + u16 idx = desc->msi_index; + u64 addr; + + addr = ((u64)msg->address_hi << 32) | msg->address_lo; + + if (addr != (addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR)) { + dev_err_once(dev, + "uh oh, the IOMMU can't send MSIs to 0x%llx, sending to 0x%llx instead\n", + addr, addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR); + } + + addr &= RISCV_IOMMU_MSI_CFG_TBL_ADDR; + + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(idx), addr); + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(idx), msg->data); + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(idx), 0); +} + static int riscv_iommu_platform_probe(struct platform_device *pdev) { + enum riscv_iommu_igs_settings igs; struct device *dev = &pdev->dev; struct riscv_iommu_device *iommu = NULL; struct resource *res = NULL; - int vec; + int vec, ret; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -40,16 +65,6 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES); iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL); - /* For now we only support WSI */ - switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) { - case RISCV_IOMMU_CAPABILITIES_IGS_WSI: - case RISCV_IOMMU_CAPABILITIES_IGS_BOTH: - break; - default: - return dev_err_probe(dev, -ENODEV, - "unable to use wire-signaled interrupts\n"); - } - iommu->irqs_count = platform_irq_count(pdev); if (iommu->irqs_count <= 0) return dev_err_probe(dev, -ENODEV, @@ -57,13 +72,58 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT) iommu->irqs_count = RISCV_IOMMU_INTR_COUNT; - for (vec = 0; vec < iommu->irqs_count; vec++) - iommu->irqs[vec] = platform_get_irq(pdev, vec); + igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps); + switch (igs) { + case RISCV_IOMMU_CAPABILITIES_IGS_BOTH: + case RISCV_IOMMU_CAPABILITIES_IGS_MSI: + if (is_of_node(dev->fwnode)) + of_msi_configure(dev, to_of_node(dev->fwnode)); - /* Enable wire-signaled interrupts, fctl.WSI */ - if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) { - iommu->fctl |= RISCV_IOMMU_FCTL_WSI; - riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + if (!dev_get_msi_domain(dev)) { + dev_warn(dev, "failed to find an MSI domain\n"); + goto msi_fail; + } + + ret = platform_device_msi_init_and_alloc_irqs(dev, iommu->irqs_count, + riscv_iommu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + goto msi_fail; + } + + for (vec = 0; vec < iommu->irqs_count; vec++) + iommu->irqs[vec] = msi_get_virq(dev, vec); + + /* Enable message-signaled interrupts, fctl.WSI */ + if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) { + iommu->fctl ^= RISCV_IOMMU_FCTL_WSI; + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + } + + dev_info(dev, "using MSIs\n"); + break; + +msi_fail: + if (igs != RISCV_IOMMU_CAPABILITIES_IGS_BOTH) { + return dev_err_probe(dev, -ENODEV, + "unable to use wire-signaled interrupts\n"); + } + + fallthrough; + + case RISCV_IOMMU_CAPABILITIES_IGS_WSI: + for (vec = 0; vec < iommu->irqs_count; vec++) + iommu->irqs[vec] = platform_get_irq(pdev, vec); + + /* Enable wire-signaled interrupts, fctl.WSI */ + if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) { + iommu->fctl |= RISCV_IOMMU_FCTL_WSI; + riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl); + } + dev_info(dev, "using wire-signaled interrupts\n"); + break; + default: + return dev_err_probe(dev, -ENODEV, "invalid IGS\n"); } return riscv_iommu_init(iommu); @@ -71,7 +131,13 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev) static void riscv_iommu_platform_remove(struct platform_device *pdev) { - riscv_iommu_remove(dev_get_drvdata(&pdev->dev)); + struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev); + bool msi = !(iommu->fctl & RISCV_IOMMU_FCTL_WSI); + + riscv_iommu_remove(iommu); + + if (msi) + platform_device_msi_free_irqs_all(&pdev->dev); }; static const struct of_device_id riscv_iommu_of_match[] = { From f20a6e3eb2ef323aa0a6ac22f94293ce4f17d113 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:29 +0000 Subject: [PATCH 19/66] iommu/amd: Misc ACPI IVRS debug info clean up * Remove redundant AMD-Vi prefix. * Print IVHD device entry settings field using hex value. * Print root device of IVHD ACPI device entry using hex value. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-2-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 2 +- drivers/iommu/amd/init.c | 35 +++++++++++++---------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index fdb0357e0bb9..af87b1d094c1 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -468,7 +468,7 @@ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ if (amd_iommu_dump) \ - pr_info("AMD-Vi: " format, ## arg); \ + pr_info(format, ## arg); \ } while(0); /* global flag if IOMMUs cache non-present entries */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 0e0a531042ac..3a7b2b0472fa 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1239,7 +1239,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, entry->cmd_line = cmd_line; entry->root_devid = (entry->devid & (~0x7)); - pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n", + pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n", entry->cmd_line ? "cmd" : "ivrs", entry->hid, entry->uid, entry->root_devid); @@ -1331,15 +1331,14 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); + DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x\n", + DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1350,8 +1349,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_SELECT_RANGE_START: - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %04x:%02x:%02x.%x flags: %02x\n", + DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1364,8 +1362,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1382,9 +1379,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS_RANGE: - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %04x:%02x:%02x.%x flags: %02x " - "devid_to: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1401,8 +1396,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " - "flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1414,8 +1408,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT_RANGE: - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), @@ -1428,7 +1421,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n", seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); @@ -1461,11 +1454,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", var, (int)handle, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); ret = add_special_device(type, handle, &devid, false); if (ret) @@ -1525,11 +1519,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, } devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n", hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), - PCI_FUNC(devid)); + PCI_FUNC(devid), + e->flags); flags = e->flags; From 82582f85ed22ba6cd27fea76b4248745f3b9fdf7 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:30 +0000 Subject: [PATCH 20/66] iommu/amd: Disable AMD IOMMU if CMPXCHG16B feature is not supported According to the AMD IOMMU spec, IOMMU hardware reads the entire DTE in a single 256-bit transaction. It is recommended to update DTE using 128-bit operation followed by an INVALIDATE_DEVTAB_ENTYRY command when the IV=1b or V=1b before the change. According to the AMD BIOS and Kernel Developer's Guide (BDKG) dated back to family 10h Processor [1], which is the first introduction of AMD IOMMU, AMD processor always has CPUID Fn0000_0001_ECX[CMPXCHG16B]=1. Therefore, it is safe to assume cmpxchg128 is available with all AMD processor w/ IOMMU. In addition, the CMPXCHG16B feature has already been checked separately before enabling the GA, XT, and GAM modes. Consolidate the detection logic, and fail the IOMMU initialization if the feature is not supported. [1] https://www.amd.com/content/dam/amd/en/documents/archived-tech-docs/programmer-references/31116.pdf Reviewed-by: Jason Gunthorpe Suggested-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-3-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 3a7b2b0472fa..c1607b29ebf4 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1752,13 +1752,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling it. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + /* GAM requires GA mode. */ + if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1768,13 +1763,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - /* - * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. - * XT, GAM also requires GA mode. Therefore, we need to - * check cmpxchg16b support before enabling them. - */ - if (!boot_cpu_has(X86_FEATURE_CX16) || - ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { + /* XT and GAM require GA mode. */ + if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; } @@ -3028,6 +3018,11 @@ static int __init early_amd_iommu_init(void) return -EINVAL; } + if (!boot_cpu_has(X86_FEATURE_CX16)) { + pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n"); + return -EINVAL; + } + /* * Validate checksum here so we don't need to do it when * we actually parse the table From 7bea695ada0e84c40685551159068996cea29ef8 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:31 +0000 Subject: [PATCH 21/66] iommu/amd: Introduce struct ivhd_dte_flags to store persistent DTE flags During early initialization, the driver parses IVRS IVHD block to get list of downstream devices along with their DTE flags (i.e INITPass, EIntPass, NMIPass, SysMgt, Lint0Pass, Lint1Pass). This information is currently store in the device DTE, and needs to be preserved when clearing and configuring each DTE, which makes it difficult to manage. Introduce struct ivhd_dte_flags to store IVHD DTE settings for a device or range of devices, which are stored in the amd_ivhd_dev_flags_list during initial IVHD parsing. Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-4-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 16 ++++ drivers/iommu/amd/init.c | 113 +++++++++++++++++++++------- 2 files changed, 100 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index af87b1d094c1..ae5f1e031722 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -220,6 +220,8 @@ #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 +#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40) + #define DEV_ENTRY_IRQ_TBL_EN 0x80 #define DEV_ENTRY_INIT_PASS 0xb8 #define DEV_ENTRY_EINT_PASS 0xb9 @@ -516,6 +518,9 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \ list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list) +#define for_each_ivhd_dte_flags(entry) \ + list_for_each_entry((entry), &amd_ivhd_dev_flags_list, list) + struct amd_iommu; struct iommu_domain; struct irq_domain; @@ -884,6 +889,17 @@ struct dev_table_entry { u64 data[4]; }; +/* + * Structure to sture persistent DTE flags from IVHD + */ +struct ivhd_dte_flags { + struct list_head list; + u16 segid; + u16 devid_first; + u16 devid_last; + struct dev_table_entry dte; +}; + /* * One entry for unity mappings parsed out of the ACPI table. */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c1607b29ebf4..015c9b045685 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -174,8 +174,8 @@ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */ /* Number of IOMMUs present in the system */ static int amd_iommus_present; @@ -984,6 +984,14 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ +static void set_dte_bit(struct dev_table_entry *dte, u8 bit) +{ + int i = (bit >> 6) & 0x03; + int _bit = bit & 0x3f; + + dte->data[i] |= (1UL << _bit); +} + static void __set_dev_entry_bit(struct dev_table_entry *dev_table, u16 devid, u8 bit) { @@ -1136,6 +1144,19 @@ static bool copy_device_table(void) return true; } +static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) +{ + struct ivhd_dte_flags *e; + + for_each_ivhd_dte_flags(e) { + if ((e->segid == segid) && + (e->devid_first == first) && + (e->devid_last == last)) + return true; + } + return false; +} + void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) { int sysmgt; @@ -1151,27 +1172,66 @@ void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information */ +static void __init +set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last, + u32 flags, u32 ext_flags) +{ + int i; + struct dev_table_entry dte = {}; + + /* Parse IVHD DTE setting flags and store information */ + if (flags) { + struct ivhd_dte_flags *d; + + if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) + return; + + d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); + if (!d) + return; + + pr_debug("%s: devid range %#x:%#x\n", __func__, first, last); + + if (flags & ACPI_DEVFLAG_INITPASS) + set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); + if (flags & ACPI_DEVFLAG_EXTINT) + set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); + if (flags & ACPI_DEVFLAG_NMI) + set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); + if (flags & ACPI_DEVFLAG_SYSMGT1) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); + if (flags & ACPI_DEVFLAG_SYSMGT2) + set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); + if (flags & ACPI_DEVFLAG_LINT0) + set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); + if (flags & ACPI_DEVFLAG_LINT1) + set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS); + + /* Apply erratum 63, which needs info in initial_dte */ + if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1) + dte.data[0] |= DTE_FLAG_IW; + + memcpy(&d->dte, &dte, sizeof(dte)); + d->segid = iommu->pci_seg->id; + d->devid_first = first; + d->devid_last = last; + list_add_tail(&d->list, &amd_ivhd_dev_flags_list); + } + + for (i = first; i <= last; i++) { + if (flags) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + + memcpy(&dev_table[i], &dte, sizeof(dte)); + } + amd_iommu_set_rlookup_table(iommu, i); + } +} + static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, u16 devid, u32 flags, u32 ext_flags) { - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); - - amd_iommu_apply_erratum_63(iommu, devid); - - amd_iommu_set_rlookup_table(iommu, devid); + set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags); } int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) @@ -1332,9 +1392,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALL: DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags); - - for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); + set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0); break; case IVHD_DEV_SELECT: @@ -1428,14 +1486,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { + if (alias) pci_seg->alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); } + set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags); + set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); break; case IVHD_DEV_SPECIAL: { u8 handle, type; From 8b3f78733814b180089a400743b6f19d118aec62 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:32 +0000 Subject: [PATCH 22/66] iommu/amd: Introduce helper function to update 256-bit DTE The current implementation does not follow 128-bit write requirement to update DTE as specified in the AMD I/O Virtualization Techonology (IOMMU) Specification. Therefore, modify the struct dev_table_entry to contain union of u128 data array, and introduce a helper functions update_dte256() to update DTE using two 128-bit cmpxchg operations to update 256-bit DTE with the modified structure, and take into account the DTE[V, GV] bits when programming the DTE to ensure proper order of DTE programming and flushing. In addition, introduce a per-DTE spin_lock struct dev_data.dte_lock to provide synchronization when updating the DTE to prevent cmpxchg128 failure. Suggested-by: Jason Gunthorpe Suggested-by: Uros Bizjak Reviewed-by: Jason Gunthorpe Reviewed-by: Uros Bizjak Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-5-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 10 ++- drivers/iommu/amd/iommu.c | 123 ++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ae5f1e031722..ea7922b06325 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -427,9 +427,13 @@ #define DTE_GCR3_SHIFT_C 43 #define DTE_GPT_LEVEL_SHIFT 54 +#define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54) #define GCR3_VALID 0x01ULL +/* DTE[128:179] | DTE[184:191] */ +#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52) + #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) #define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD) @@ -842,6 +846,7 @@ struct devid_map { struct iommu_dev_data { /*Protect against attach/detach races */ struct mutex mutex; + spinlock_t dte_lock; /* DTE lock for 256-bit access */ struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ @@ -886,7 +891,10 @@ extern struct list_head amd_iommu_list; * Structure defining one entry in the device table */ struct dev_table_entry { - u64 data[4]; + union { + u64 data[4]; + u128 data128[2]; + }; }; /* diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 16f40b8000d7..08cb740637dd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -83,12 +83,125 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data); +static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid); + /**************************************************************************** * * Helper functions * ****************************************************************************/ +static __always_inline void amd_iommu_atomic128_set(__int128 *ptr, __int128 val) +{ + /* + * Note: + * We use arch_cmpxchg128_local() because: + * - Need cmpxchg16b instruction mainly for 128-bit store to DTE + * (not necessary for cmpxchg since this function is already + * protected by a spin_lock for this DTE). + * - Neither need LOCK_PREFIX nor try loop because of the spin_lock. + */ + arch_cmpxchg128_local(ptr, *ptr, val); +} + +static void write_dte_upper128(struct dev_table_entry *ptr, struct dev_table_entry *new) +{ + struct dev_table_entry old; + + old.data128[1] = ptr->data128[1]; + /* + * Preserve DTE_DATA2_INTR_MASK. This needs to be + * done here since it requires to be inside + * spin_lock(&dev_data->dte_lock) context. + */ + new->data[2] &= ~DTE_DATA2_INTR_MASK; + new->data[2] |= old.data[2] & DTE_DATA2_INTR_MASK; + + amd_iommu_atomic128_set(&ptr->data128[1], new->data128[1]); +} + +static void write_dte_lower128(struct dev_table_entry *ptr, struct dev_table_entry *new) +{ + amd_iommu_atomic128_set(&ptr->data128[0], new->data128[0]); +} + +/* + * Note: + * IOMMU reads the entire Device Table entry in a single 256-bit transaction + * but the driver is programming DTE using 2 128-bit cmpxchg. So, the driver + * need to ensure the following: + * - DTE[V|GV] bit is being written last when setting. + * - DTE[V|GV] bit is being written first when clearing. + * + * This function is used only by code, which updates DMA translation part of the DTE. + * So, only consider control bits related to DMA when updating the entry. + */ +static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data, + struct dev_table_entry *new) +{ + unsigned long flags; + struct dev_table_entry *dev_table = get_dev_table(iommu); + struct dev_table_entry *ptr = &dev_table[dev_data->devid]; + + spin_lock_irqsave(&dev_data->dte_lock, flags); + + if (!(ptr->data[0] & DTE_FLAG_V)) { + /* Existing DTE is not valid. */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!(new->data[0] & DTE_FLAG_V)) { + /* Existing DTE is valid. New DTE is not valid. */ + write_dte_lower128(ptr, new); + write_dte_upper128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!FIELD_GET(DTE_FLAG_GV, ptr->data[0])) { + /* + * Both DTEs are valid. + * Existing DTE has no guest page table. + */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (!FIELD_GET(DTE_FLAG_GV, new->data[0])) { + /* + * Both DTEs are valid. + * Existing DTE has guest page table, + * new DTE has no guest page table, + */ + write_dte_lower128(ptr, new); + write_dte_upper128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else if (FIELD_GET(DTE_GPT_LEVEL_MASK, ptr->data[2]) != + FIELD_GET(DTE_GPT_LEVEL_MASK, new->data[2])) { + /* + * Both DTEs are valid and have guest page table, + * but have different number of levels. So, we need + * to upadte both upper and lower 128-bit value, which + * require disabling and flushing. + */ + struct dev_table_entry clear = {}; + + /* First disable DTE */ + write_dte_lower128(ptr, &clear); + iommu_flush_dte_sync(iommu, dev_data->devid); + + /* Then update DTE */ + write_dte_upper128(ptr, new); + write_dte_lower128(ptr, new); + iommu_flush_dte_sync(iommu, dev_data->devid); + } else { + /* + * Both DTEs are valid and have guest page table, + * and same number of levels. We just need to only + * update the lower 128-bit. So no need to disable DTE. + */ + write_dte_lower128(ptr, new); + } + + spin_unlock_irqrestore(&dev_data->dte_lock, flags); +} + static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { return (pdom && (pdom->pd_mode == PD_MODE_V2)); @@ -209,6 +322,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) return NULL; mutex_init(&dev_data->mutex); + spin_lock_init(&dev_data->dte_lock); dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); @@ -1261,6 +1375,15 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } +static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid) +{ + int ret; + + ret = iommu_flush_dte(iommu, devid); + if (!ret) + iommu_completion_wait(iommu); +} + static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; From fd5dff9de4be29b8ebec63b7e916915d4c984027 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:33 +0000 Subject: [PATCH 23/66] iommu/amd: Modify set_dte_entry() to use 256-bit DTE helpers Also, the set_dte_entry() is used to program several DTE fields (e.g. stage1 table, stage2 table, domain id, and etc.), which is difficult to keep track with current implementation. Therefore, separate logic for clearing DTE (i.e. make_clear_dte) and another function for setting up the GCR3 Table Root Pointer, GIOV, GV, GLX, and GuestPagingMode into another function set_dte_gcr3_table(). Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-6-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 + drivers/iommu/amd/amd_iommu_types.h | 13 +-- drivers/iommu/amd/init.c | 30 ++++++- drivers/iommu/amd/iommu.c | 131 ++++++++++++++++------------ 4 files changed, 107 insertions(+), 69 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1bef5d55b2f9..66b413615d0c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -184,3 +184,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); #endif + +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ea7922b06325..0bbda60d3cdc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -409,8 +409,7 @@ #define DTE_FLAG_HAD (3ULL << 7) #define DTE_FLAG_GIOV BIT_ULL(54) #define DTE_FLAG_GV BIT_ULL(55) -#define DTE_GLX_SHIFT (56) -#define DTE_GLX_MASK (3) +#define DTE_GLX GENMASK_ULL(57, 56) #define DTE_FLAG_IR BIT_ULL(61) #define DTE_FLAG_IW BIT_ULL(62) @@ -418,13 +417,9 @@ #define DTE_FLAG_MASK (0x3ffULL << 32) #define DEV_DOMID_MASK 0xffffULL -#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) -#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) -#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL) - -#define DTE_GCR3_SHIFT_A 58 -#define DTE_GCR3_SHIFT_B 16 -#define DTE_GCR3_SHIFT_C 43 +#define DTE_GCR3_14_12 GENMASK_ULL(60, 58) +#define DTE_GCR3_30_15 GENMASK_ULL(31, 16) +#define DTE_GCR3_51_31 GENMASK_ULL(63, 43) #define DTE_GPT_LEVEL_SHIFT 54 #define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 015c9b045685..1e4b8040c374 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1089,11 +1089,9 @@ static bool __copy_device_table(struct amd_iommu *iommu) } /* If gcr3 table existed, mask it out */ if (old_devtb[devid].data[0] & DTE_FLAG_GV) { - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31); pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; - tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; - tmp |= DTE_FLAG_GV; + tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV); pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; } } @@ -1144,6 +1142,30 @@ static bool copy_device_table(void) return true; } +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid) +{ + struct ivhd_dte_flags *e; + unsigned int best_len = UINT_MAX; + struct dev_table_entry *dte = NULL; + + for_each_ivhd_dte_flags(e) { + /* + * Need to go through the whole list to find the smallest range, + * which contains the devid. + */ + if ((e->segid == segid) && + (e->devid_first <= devid) && (devid <= e->devid_last)) { + unsigned int len = e->devid_last - e->devid_first; + + if (len < best_len) { + dte = &(e->dte); + best_len = len; + } + } + } + return dte; +} + static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) { struct ivhd_dte_flags *e; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 08cb740637dd..0f4b5f4acd30 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1949,90 +1949,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) return ret; } +static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr, + struct dev_table_entry *new) +{ + /* All existing DTE must have V bit set */ + new->data128[0] = DTE_FLAG_V; + new->data128[1] = 0; +} + +/* + * Note: + * The old value for GCR3 table and GPT have been cleared from caller. + */ +static void set_dte_gcr3_table(struct amd_iommu *iommu, + struct iommu_dev_data *dev_data, + struct dev_table_entry *target) +{ + struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + u64 gcr3; + + if (!gcr3_info->gcr3_tbl) + return; + + pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n", + __func__, dev_data->devid, gcr3_info->glx, + (unsigned long long)gcr3_info->gcr3_tbl); + + gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); + + target->data[0] |= DTE_FLAG_GV | + FIELD_PREP(DTE_GLX, gcr3_info->glx) | + FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12); + if (pdom_is_v2_pgtbl_mode(dev_data->domain)) + target->data[0] |= DTE_FLAG_GIOV; + + target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) | + FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31); + + /* Guest page table can only support 4 and 5 levels */ + if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) + target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL); + else + target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL); +} + static void set_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - u64 pte_root = 0; - u64 flags = 0; - u32 old_domid; - u16 devid = dev_data->devid; u16 domid; + u32 old_domid; + struct dev_table_entry *initial_dte; + struct dev_table_entry new = {}; struct protection_domain *domain = dev_data->domain; - struct dev_table_entry *dev_table = get_dev_table(iommu); struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; + struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid]; if (gcr3_info && gcr3_info->gcr3_tbl) domid = dev_data->gcr3_info.domid; else domid = domain->id; - if (domain->iop.mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(domain->iop.root); + make_clear_dte(dev_data, dte, &new); - pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) + if (domain->iop.mode != PAGE_MODE_NONE) + new.data[0] = iommu_virt_to_phys(domain->iop.root); + + new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; /* - * When SNP is enabled, Only set TV bit when IOMMU - * page translation is in use. + * When SNP is enabled, we can only support TV=1 with non-zero domain ID. + * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in + * do_iommu_domain_alloc(). */ - if (!amd_iommu_snp_en || (domid != 0)) - pte_root |= DTE_FLAG_TV; - - flags = dev_table[devid].data[1]; - - if (dev_data->ats_enabled) - flags |= DTE_FLAG_IOTLB; + WARN_ON(amd_iommu_snp_en && (domid == 0)); + new.data[0] |= DTE_FLAG_TV; if (dev_data->ppr) - pte_root |= 1ULL << DEV_ENTRY_PPR; + new.data[0] |= 1ULL << DEV_ENTRY_PPR; if (domain->dirty_tracking) - pte_root |= DTE_FLAG_HAD; + new.data[0] |= DTE_FLAG_HAD; - if (gcr3_info && gcr3_info->gcr3_tbl) { - u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl); - u64 glx = gcr3_info->glx; - u64 tmp; + if (dev_data->ats_enabled) + new.data[1] |= DTE_FLAG_IOTLB; - pte_root |= DTE_FLAG_GV; - pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; + old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK; + new.data[1] |= domid; - /* First mask out possible old values for GCR3 table */ - tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; - flags &= ~tmp; - - tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - flags &= ~tmp; - - /* Encode GCR3 table into DTE */ - tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; - pte_root |= tmp; - - tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; - flags |= tmp; - - tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; - flags |= tmp; - - if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) { - dev_table[devid].data[2] |= - ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); - } - - /* GIOV is supported with V2 page table mode only */ - if (pdom_is_v2_pgtbl_mode(domain)) - pte_root |= DTE_FLAG_GIOV; + /* + * Restore cached persistent DTE bits, which can be set by information + * in IVRS table. See set_dev_entry_from_acpi(). + */ + initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid); + if (initial_dte) { + new.data128[0] |= initial_dte->data128[0]; + new.data128[1] |= initial_dte->data128[1]; } - flags &= ~DEV_DOMID_MASK; - flags |= domid; + set_dte_gcr3_table(iommu, dev_data, &new); - old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; - dev_table[devid].data[1] = flags; - dev_table[devid].data[0] = pte_root; + update_dte256(iommu, dev_data, &new); /* * A kdump kernel might be replacing a domain ID that was copied from From a2ce608a1eb65c2af99c58b63eae557165a0da87 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:34 +0000 Subject: [PATCH 24/66] iommu/amd: Introduce helper function get_dte256() And use it in clone_alias() along with update_dte256(). Also use get_dte256() in dump_dte_entry(). Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-7-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 62 ++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0f4b5f4acd30..b7b5dce10813 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -85,6 +85,8 @@ static void set_dte_entry(struct amd_iommu *iommu, static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid); +static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid); + /**************************************************************************** * * Helper functions @@ -202,6 +204,21 @@ static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_da spin_unlock_irqrestore(&dev_data->dte_lock, flags); } +static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data, + struct dev_table_entry *dte) +{ + unsigned long flags; + struct dev_table_entry *ptr; + struct dev_table_entry *dev_table = get_dev_table(iommu); + + ptr = &dev_table[dev_data->devid]; + + spin_lock_irqsave(&dev_data->dte_lock, flags); + dte->data128[0] = ptr->data128[0]; + dte->data128[1] = ptr->data128[1]; + spin_unlock_irqrestore(&dev_data->dte_lock, flags); +} + static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) { return (pdom && (pdom->pd_mode == PD_MODE_V2)); @@ -350,9 +367,11 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { + struct dev_table_entry new; struct amd_iommu *iommu; - struct dev_table_entry *dev_table; + struct iommu_dev_data *dev_data, *alias_data; u16 devid = pci_dev_id(pdev); + int ret = 0; if (devid == alias) return 0; @@ -361,13 +380,27 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) if (!iommu) return 0; - amd_iommu_set_rlookup_table(iommu, alias); - dev_table = get_dev_table(iommu); - memcpy(dev_table[alias].data, - dev_table[devid].data, - sizeof(dev_table[alias].data)); + /* Copy the data from pdev */ + dev_data = dev_iommu_priv_get(&pdev->dev); + if (!dev_data) { + pr_err("%s : Failed to get dev_data for 0x%x\n", __func__, devid); + ret = -EINVAL; + goto out; + } + get_dte256(iommu, dev_data, &new); - return 0; + /* Setup alias */ + alias_data = find_dev_data(iommu, alias); + if (!alias_data) { + pr_err("%s : Failed to get alias dev_data for 0x%x\n", __func__, alias); + ret = -EINVAL; + goto out; + } + update_dte256(iommu, alias_data, &new); + + amd_iommu_set_rlookup_table(iommu, alias); +out: + return ret; } static void clone_aliases(struct amd_iommu *iommu, struct device *dev) @@ -640,6 +673,12 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) return -ENOMEM; dev_data->dev = dev; + + /* + * The dev_iommu_priv_set() needes to be called before setup_aliases. + * Otherwise, subsequent call to dev_iommu_priv_get() will fail. + */ + dev_iommu_priv_set(dev, dev_data); setup_aliases(iommu, dev); /* @@ -653,8 +692,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) dev_data->flags = pdev_get_caps(to_pci_dev(dev)); } - dev_iommu_priv_set(dev, dev_data); - return 0; } @@ -685,10 +722,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) { int i; - struct dev_table_entry *dev_table = get_dev_table(iommu); + struct dev_table_entry dte; + struct iommu_dev_data *dev_data = find_dev_data(iommu, devid); + + get_dte256(iommu, dev_data, &dte); for (i = 0; i < 4; ++i) - pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]); + pr_err("DTE[%d]: %016llx\n", i, dte.data[i]); } static void dump_command(unsigned long phys_addr) From 66ea3f96ae2b02cf543f3373b67aa6b8c6794926 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:35 +0000 Subject: [PATCH 25/66] iommu/amd: Modify clear_dte_entry() to avoid in-place update By reusing the make_clear_dte() and update_dte256(). Also, there is no need to set TV bit for non-SNP system when clearing DTE for blocked domain, and no longer need to apply erratum 63 in clear_dte() since it is already stored in struct ivhd_dte_flags and apply in set_dte_entry(). Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-8-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b7b5dce10813..3ccad9fd0302 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2103,19 +2103,16 @@ static void set_dte_entry(struct amd_iommu *iommu, } } -static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) +/* + * Clear DMA-remap related flags to block all DMA (blockeded domain) + */ +static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data) { - struct dev_table_entry *dev_table = get_dev_table(iommu); + struct dev_table_entry new = {}; + struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid]; - /* remove entry from the device table seen by the hardware */ - dev_table[devid].data[0] = DTE_FLAG_V; - - if (!amd_iommu_snp_en) - dev_table[devid].data[0] |= DTE_FLAG_TV; - - dev_table[devid].data[1] &= DTE_FLAG_MASK; - - amd_iommu_apply_erratum_63(iommu, devid); + make_clear_dte(dev_data, dte, &new); + update_dte256(iommu, dev_data, &new); } /* Update and flush DTE for the given device */ @@ -2126,7 +2123,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set) if (set) set_dte_entry(iommu, dev_data); else - clear_dte_entry(iommu, dev_data->devid); + clear_dte_entry(iommu, dev_data); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); From 457da57646686fcb38b3f61b153920ca08200078 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:36 +0000 Subject: [PATCH 26/66] iommu/amd: Lock DTE before updating the entry with WRITE_ONCE() When updating only within a 64-bit tuple of a DTE, just lock the DTE and use WRITE_ONCE() because it is writing to memory read back by HW. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-9-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/iommu.c | 43 +++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 66b413615d0c..11be5a62d666 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -186,3 +186,4 @@ struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); #endif struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); +struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ccad9fd0302..3aa80f140cdb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -347,7 +347,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) return dev_data; } -static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) +struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; struct llist_node *node; @@ -2845,12 +2845,12 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { struct protection_domain *pdomain = to_pdomain(domain); - struct dev_table_entry *dev_table; + struct dev_table_entry *dte; struct iommu_dev_data *dev_data; bool domain_flush = false; struct amd_iommu *iommu; unsigned long flags; - u64 pte_root; + u64 new; spin_lock_irqsave(&pdomain->lock, flags); if (!(pdomain->dirty_tracking ^ enable)) { @@ -2859,16 +2859,15 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, } list_for_each_entry(dev_data, &pdomain->dev_list, list) { + spin_lock(&dev_data->dte_lock); iommu = get_amd_iommu_from_dev_data(dev_data); - - dev_table = get_dev_table(iommu); - pte_root = dev_table[dev_data->devid].data[0]; - - pte_root = (enable ? pte_root | DTE_FLAG_HAD : - pte_root & ~DTE_FLAG_HAD); + dte = &get_dev_table(iommu)[dev_data->devid]; + new = dte->data[0]; + new = (enable ? new | DTE_FLAG_HAD : new & ~DTE_FLAG_HAD); + dte->data[0] = new; + spin_unlock(&dev_data->dte_lock); /* Flush device DTE */ - dev_table[dev_data->devid].data[0] = pte_root; device_flush_dte(dev_data); domain_flush = true; } @@ -3135,17 +3134,23 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid) static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { - u64 dte; - struct dev_table_entry *dev_table = get_dev_table(iommu); + u64 new; + struct dev_table_entry *dte = &get_dev_table(iommu)[devid]; + struct iommu_dev_data *dev_data = search_dev_data(iommu, devid); - dte = dev_table[devid].data[2]; - dte &= ~DTE_IRQ_PHYS_ADDR_MASK; - dte |= iommu_virt_to_phys(table->table); - dte |= DTE_IRQ_REMAP_INTCTL; - dte |= DTE_INTTABLEN; - dte |= DTE_IRQ_REMAP_ENABLE; + if (dev_data) + spin_lock(&dev_data->dte_lock); - dev_table[devid].data[2] = dte; + new = READ_ONCE(dte->data[2]); + new &= ~DTE_IRQ_PHYS_ADDR_MASK; + new |= iommu_virt_to_phys(table->table); + new |= DTE_IRQ_REMAP_INTCTL; + new |= DTE_INTTABLEN; + new |= DTE_IRQ_REMAP_ENABLE; + WRITE_ONCE(dte->data[2], new); + + if (dev_data) + spin_unlock(&dev_data->dte_lock); } static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) From b0988acc94c021d8d428c9496649a1b4e6203011 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Nov 2024 05:49:37 +0000 Subject: [PATCH 27/66] iommu/amd: Remove amd_iommu_apply_erratum_63() Also replace __set_dev_entry_bit() with set_dte_bit() and remove unused helper functions. Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20241118054937.5203-10-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/init.c | 50 +++-------------------------------- 2 files changed, 3 insertions(+), 48 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 11be5a62d666..c4cd272b31f4 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -16,7 +16,6 @@ irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, u8 cntrl_intr, u8 cntrl_log, u32 status_run_mask, u32 status_overflow_mask); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 1e4b8040c374..41294807452d 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -992,38 +992,6 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit) dte->data[i] |= (1UL << _bit); } -static void __set_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) -{ - int i = (bit >> 6) & 0x03; - int _bit = bit & 0x3f; - - dev_table[devid].data[i] |= (1UL << _bit); -} - -static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __set_dev_entry_bit(dev_table, devid, bit); -} - -static int __get_dev_entry_bit(struct dev_table_entry *dev_table, - u16 devid, u8 bit) -{ - int i = (bit >> 6) & 0x03; - int _bit = bit & 0x3f; - - return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; -} - -static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) -{ - struct dev_table_entry *dev_table = get_dev_table(iommu); - - return __get_dev_entry_bit(dev_table, devid, bit); -} - static bool __copy_device_table(struct amd_iommu *iommu) { u64 int_ctl, int_tab_len, entry = 0; @@ -1179,17 +1147,6 @@ static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last) return false; } -void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) -{ - int sysmgt; - - sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); - - if (sysmgt == 0x01) - set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); -} - /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information @@ -2637,9 +2594,9 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID); if (!amd_iommu_snp_en) - __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); + set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION); } } @@ -2667,8 +2624,7 @@ static void init_device_table(void) for_each_pci_segment(pci_seg) { for (devid = 0; devid <= pci_seg->last_bdf; ++devid) - __set_dev_entry_bit(pci_seg->dev_table, - devid, DEV_ENTRY_IRQ_TBL_EN); + set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN); } } From fb3de9f9b085d003a8a869ca6a4789d1bfbb3f22 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:22 -0800 Subject: [PATCH 28/66] iommu: Prevent pasid attach if no ops->remove_dev_pasid driver should implement both set_dev_pasid and remove_dev_pasid op, otherwise it is a problem how to detach pasid. In reality, it is impossible that an iommu driver implements set_dev_pasid() but no remove_dev_pasid() op. However, it is better to check it. Move the group check to be the first as dev_iommu_ops() may fail when there is no valid group. Also take the chance to remove the dev_has_iommu() check as it is duplicated to the group check. Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241204122928.11987-2-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 599030e1e890..286b14d008b2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3368,16 +3368,19 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; struct group_device *device; + const struct iommu_ops *ops; int ret; - if (!domain->ops->set_dev_pasid) - return -EOPNOTSUPP; - if (!group) return -ENODEV; - if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner || - pasid == IOMMU_NO_PASID) + ops = dev_iommu_ops(dev); + + if (!domain->ops->set_dev_pasid || + !ops->remove_dev_pasid) + return -EOPNOTSUPP; + + if (ops != domain->owner || pasid == IOMMU_NO_PASID) return -EINVAL; mutex_lock(&group->mutex); From 1fbf73425f5169e2d183b2ca67bfe2f1019d11c0 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:23 -0800 Subject: [PATCH 29/66] iommu: Consolidate the ops->remove_dev_pasid usage into a helper Add a wrapper for the ops->remove_dev_pasid, this consolidates the iommu_ops fetching and callback invoking. It is also a preparation for starting the transition from using remove_dev_pasid op to detach pasid to the way using blocked_domain to detach pasid. Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-3-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 286b14d008b2..2ff4fa60a86a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3312,6 +3312,14 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); +static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + + ops->remove_dev_pasid(dev, pasid, domain); +} + static int __iommu_set_group_pasid(struct iommu_domain *domain, struct iommu_group *group, ioasid_t pasid) { @@ -3330,11 +3338,9 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, err_revert: last_gdev = device; for_each_group_device(group, device) { - const struct iommu_ops *ops = dev_iommu_ops(device->dev); - if (device == last_gdev) break; - ops->remove_dev_pasid(device->dev, pasid, domain); + iommu_remove_dev_pasid(device->dev, pasid, domain); } return ret; } @@ -3344,12 +3350,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, struct iommu_domain *domain) { struct group_device *device; - const struct iommu_ops *ops; - for_each_group_device(group, device) { - ops = dev_iommu_ops(device->dev); - ops->remove_dev_pasid(device->dev, pasid, domain); - } + for_each_group_device(group, device) + iommu_remove_dev_pasid(device->dev, pasid, domain); } /* From b18301b9156a0d8a0094fcd16a1b98816539eab0 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:24 -0800 Subject: [PATCH 30/66] iommu: Detaching pasid by attaching to the blocked_domain The iommu drivers are on the way to detach pasid by attaching to the blocked domain. However, this cannot be done in one shot. During the transition, iommu core would select between the remove_dev_pasid op and the blocked domain. Suggested-by: Kevin Tian Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-4-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2ff4fa60a86a..2064dc8bb8b5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3316,8 +3316,18 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, struct iommu_domain *domain) { const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *blocked_domain = ops->blocked_domain; + int ret = 1; - ops->remove_dev_pasid(dev, pasid, domain); + if (blocked_domain && blocked_domain->ops->set_dev_pasid) { + ret = blocked_domain->ops->set_dev_pasid(blocked_domain, + dev, pasid, domain); + } else { + ops->remove_dev_pasid(dev, pasid, domain); + ret = 0; + } + + WARN_ON(ret); } static int __iommu_set_group_pasid(struct iommu_domain *domain, @@ -3380,7 +3390,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, ops = dev_iommu_ops(dev); if (!domain->ops->set_dev_pasid || - !ops->remove_dev_pasid) + (!ops->remove_dev_pasid && + (!ops->blocked_domain || + !ops->blocked_domain->ops->set_dev_pasid))) return -EOPNOTSUPP; if (ops != domain->owner || pasid == IOMMU_NO_PASID) From ef181762cb544efc8c88b79ea9224e21ca5da533 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Dec 2024 04:29:25 -0800 Subject: [PATCH 31/66] iommu/arm-smmu-v3: Make the blocked domain support PASID The blocked domain is used to park RID to be blocking DMA state. This can be extended to PASID as well. By this, the remove_dev_pasid() op of ARM SMMUv3 can be dropped. Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-5-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a5c7002ff75b..4de1a55747e7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3016,13 +3016,12 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, return ret; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old_domain) { + struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *smmu_domain; - - smmu_domain = to_smmu_domain(domain); mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); @@ -3043,6 +3042,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, sid_domain->type == IOMMU_DOMAIN_BLOCKED) sid_domain->ops->attach_dev(sid_domain, dev); } + return 0; } static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, @@ -3124,6 +3124,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, static const struct iommu_domain_ops arm_smmu_blocked_ops = { .attach_dev = arm_smmu_attach_dev_blocked, + .set_dev_pasid = arm_smmu_blocking_set_dev_pasid, }; static struct iommu_domain arm_smmu_blocked_domain = { @@ -3551,7 +3552,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, From 4f0bdab175d6ea544c97766813db001ba28be3a7 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:26 -0800 Subject: [PATCH 32/66] iommu/vt-d: Make the blocked domain support PASID The blocked domain can be extended to park PASID of a device to be the DMA blocking state. By this the remove_dev_pasid() op is dropped. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-6-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 79e0da9eb626..aecfbcf1206c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3250,10 +3250,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain, return 0; } +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old); + static struct iommu_domain blocking_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocking_domain_attach_dev, + .set_dev_pasid = blocking_domain_set_dev_pasid, } }; @@ -4099,13 +4104,16 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, kfree(dev_pasid); } -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) { struct device_domain_info *info = dev_iommu_priv_get(dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); - domain_remove_dev_pasid(domain, dev, pasid); + domain_remove_dev_pasid(old, dev, pasid); + + return 0; } struct dev_pasid_info * @@ -4478,7 +4486,6 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, - .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, .page_response = intel_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { From 5f53638882391646e203e064c3ef3cc401745da8 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:27 -0800 Subject: [PATCH 33/66] iommu/amd: Make the blocked domain support PASID The blocked domain can be extended to park PASID of a device to be the DMA blocking state. By this the remove_dev_pasid() op is dropped. Remove PASID from old domain and device GCR3 table. No need to attach PASID to the blocked domain as clearing PASID from GCR3 table will make sure all DMAs for that PASID are blocked. Suggested-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-7-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 16f40b8000d7..d6590cdd6d15 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2475,10 +2475,19 @@ static int blocked_domain_attach_device(struct iommu_domain *domain, return 0; } +static int blocked_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) +{ + amd_iommu_remove_dev_pasid(dev, pasid, old); + return 0; +} + static struct iommu_domain blocked_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocked_domain_attach_device, + .set_dev_pasid = blocked_domain_set_dev_pasid, } }; @@ -2901,7 +2910,6 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .dev_enable_feat = amd_iommu_dev_enable_feature, .dev_disable_feat = amd_iommu_dev_disable_feature, - .remove_dev_pasid = amd_iommu_remove_dev_pasid, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, From 647b7aad19490a7b90c52c883bda7df299457491 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 4 Dec 2024 04:29:28 -0800 Subject: [PATCH 34/66] iommu: Remove the remove_dev_pasid op The iommu drivers that supports PASID have supported attaching pasid to the blocked_domain, hence remove the remove_dev_pasid op from the iommu_ops. Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Vasant Hegde Reviewed-by: Lu Baolu Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20241204122928.11987-8-yi.l.liu@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 17 ++++------------- include/linux/iommu.h | 5 ----- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2064dc8bb8b5..28ffd836592b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3317,17 +3317,9 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *blocked_domain = ops->blocked_domain; - int ret = 1; - if (blocked_domain && blocked_domain->ops->set_dev_pasid) { - ret = blocked_domain->ops->set_dev_pasid(blocked_domain, - dev, pasid, domain); - } else { - ops->remove_dev_pasid(dev, pasid, domain); - ret = 0; - } - - WARN_ON(ret); + WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain, + dev, pasid, domain)); } static int __iommu_set_group_pasid(struct iommu_domain *domain, @@ -3390,9 +3382,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, ops = dev_iommu_ops(dev); if (!domain->ops->set_dev_pasid || - (!ops->remove_dev_pasid && - (!ops->blocked_domain || - !ops->blocked_domain->ops->set_dev_pasid))) + !ops->blocked_domain || + !ops->blocked_domain->ops->set_dev_pasid) return -EOPNOTSUPP; if (ops != domain->owner || pasid == IOMMU_NO_PASID) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 318d27841130..38c65e92ecd0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -587,9 +587,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains - * @remove_dev_pasid: Remove any translation configurations of a specific - * pasid, so that any DMA transactions with this pasid - * will be blocked by the hardware. * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -647,8 +644,6 @@ struct iommu_ops { struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); - void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, From 8735e47e3a29fa25439caaa2cf3b9a25f5357ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Sun, 15 Dec 2024 12:14:59 +0100 Subject: [PATCH 35/66] dt-bindings: iommu: qcom,iommu: Add MSM8917 IOMMU to SMMUv1 compatibles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add MSM8917 compatible string with "qcom,msm-iommu-v1" as fallback for the MSM8917 IOMMU which is compatible with Qualcomm's secure fw "SMMU v1" implementation. Acked-by: Krzysztof Kozlowski Signed-off-by: Barnabás Czémán Link: https://lore.kernel.org/r/20241215-msm8917-v9-4-bacaa26f3eef@mainlining.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/qcom,iommu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index f8cebc9e8cd9..5ae9a628261f 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -21,6 +21,7 @@ properties: - items: - enum: - qcom,msm8916-iommu + - qcom,msm8917-iommu - qcom,msm8953-iommu - const: qcom,msm-iommu-v1 - items: From b7b8a63055572f5baa78c1d9d048aad750b02ba5 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Sun, 15 Dec 2024 20:04:11 +0000 Subject: [PATCH 36/66] iommu/io-pgtable-arm: Fix cfg reading in arm_lpae_concat_mandatory() The newly introduced arm_lpae_concat_mandatory() function reads the ias/oas fields from the 'io_pgtable_cfg' copy embedded inside the 'arm_lpae_io_pgtable' structure. However, this copy is not set until later in alloc_io_pgtable_ops() after the alloc() function has been called. Use the address sizes passed in the 'io_pgtable_cfg' structure when deciding whether or not to concatenate the PGD. Fixes: 4dcac8407fe1 ("iommu/io-pgtable-arm: Fix stage-2 concatenation with 16K") Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241215200412.561400-1-smostafa@google.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index c1b62c7d81ba..7e53ee51270b 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -232,12 +232,13 @@ static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data) * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas) * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas) */ -static inline bool arm_lpae_concat_mandatory(struct arm_lpae_io_pgtable *data) +static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg, + struct arm_lpae_io_pgtable *data) { - unsigned int ias = data->iop.cfg.ias; - unsigned int oas = data->iop.cfg.oas; + unsigned int ias = cfg->ias; + unsigned int oas = cfg->oas; - /* Covers 1 and 2.d */ + /* Covers 1 and 2.d */ if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0)) return (oas == 48) || (ias == 48); @@ -1033,7 +1034,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) if (!data) return NULL; - if (arm_lpae_concat_mandatory(data)) { + if (arm_lpae_concat_mandatory(cfg, data)) { if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) > ARM_LPAE_S2_MAX_CONCAT_PAGES)) return NULL; From e94dc6ddda8dd3770879a132d577accd2cce25f9 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 18 Dec 2024 21:14:21 -0800 Subject: [PATCH 37/66] iommu/tegra241-cmdqv: Read SMMU IDR1.CMDQS instead of hardcoding The hardware limitation "max=19" actually comes from SMMU Command Queue. So, it'd be more natural for tegra241-cmdqv driver to read it out rather than hardcoding it itself. This is not an issue yet for a kernel on a baremetal system, but a guest kernel setting the queue base/size in form of IPA/gPA might result in a noncontiguous queue in the physical address space, if underlying physical pages backing up the guest RAM aren't contiguous entirely: e.g. 2MB-page backed guest RAM cannot guarantee a contiguous queue if it is 8MB (capped to VCMDQ_LOG2SIZE_MAX=19). This might lead to command errors when HW does linear-read from a noncontiguous queue memory. Adding this extra IDR1.CMDQS cap (in the guest kernel) allows VMM to set SMMU's IDR1.CMDQS=17 for the case mentioned above, so a guest-level queue will be capped to maximum 2MB, ensuring a contiguous queue memory. Fixes: a3799717b881 ("iommu/tegra241-cmdqv: Fix alignment failure at max_n_shift") Reported-by: Ian Kalinowski Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20241219051421.1850267-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index c8ec74f089f3..dc7af970e9d0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -79,7 +79,6 @@ #define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define VCMDQ_LOG2SIZE_MAX 19 #define TEGRA241_VCMDQ_BASE 0x00000 #define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 @@ -505,12 +504,15 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; struct arm_smmu_queue *q = &cmdq->q; char name[16]; + u32 regval; int ret; snprintf(name, 16, "vcmdq%u", vcmdq->idx); - /* Queue size, capped to ensure natural alignment */ - q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, From 5a0751fc9eab152d20808623bf64c86ee83df976 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Tue, 31 Dec 2024 17:31:50 +0800 Subject: [PATCH 38/66] dt-bindings: iommu: rockchip: Add Rockchip RK3576 Just like RK3588, RK3576 is compatible to the existing rk3568 binding. Signed-off-by: Andy Yan Reviewed-by: Heiko Stuebner Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20241231093154.252595-1-andyshrk@163.com Signed-off-by: Joerg Roedel --- Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml index 621dde0e45d8..6ce41d11ff5e 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml @@ -25,6 +25,7 @@ properties: - rockchip,rk3568-iommu - items: - enum: + - rockchip,rk3576-iommu - rockchip,rk3588-iommu - const: rockchip,rk3568-iommu From 8d8d3752c0a4f7fb072352837cbdbf57c02df239 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Fri, 3 Jan 2025 17:32:19 +0800 Subject: [PATCH 39/66] iommu/riscv: Empty iommu queue before enabling it Changing cqen/fqen/pqen from 0 to 1 sets the cqh/fqt/pqt registers to 0. But the cqt/fqh/pqh registers are left unmodified. This commit resets cqt/fqh/pqh registers to ensure corresponding queues are empty before being enabled during initialization. Signed-off-by: Xu Lu Link: https://lore.kernel.org/r/20250103093220.38106-2-luxu.kernel@bytedance.com Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 8a05def774bd..84806724f568 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -240,6 +240,12 @@ static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu, return rc; } + /* Empty queue before enabling it */ + if (queue->qid == RISCV_IOMMU_INTR_CQ) + riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0); + else + riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0); + /* * Enable queue with interrupts, clear any memory fault if any. * Wait for the hardware to acknowledge request and activate queue From 77a44196abfb39e183be554003e9ee76d95edab1 Mon Sep 17 00:00:00 2001 From: Xu Lu Date: Fri, 3 Jan 2025 17:32:20 +0800 Subject: [PATCH 40/66] iommu/riscv: Add shutdown function for iommu driver This commit supplies shutdown callback for iommu driver. The shutdown callback resets necessary registers so that newly booted kernel can pass riscv_iommu_init_check() after kexec. Also, the shutdown callback resets iommu mode to bare instead of off so that new kernel can still use PCIE devices even when CONFIG_RISCV_IOMMU is not enabled. Signed-off-by: Xu Lu Link: https://lore.kernel.org/r/20250103093220.38106-3-luxu.kernel@bytedance.com Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu-pci.c | 8 ++++++++ drivers/iommu/riscv/iommu-platform.c | 6 ++++++ drivers/iommu/riscv/iommu.c | 6 ++++-- drivers/iommu/riscv/iommu.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c index c7a89143014c..d82d2b00904c 100644 --- a/drivers/iommu/riscv/iommu-pci.c +++ b/drivers/iommu/riscv/iommu-pci.c @@ -101,6 +101,13 @@ static void riscv_iommu_pci_remove(struct pci_dev *pdev) riscv_iommu_remove(iommu); } +static void riscv_iommu_pci_shutdown(struct pci_dev *pdev) +{ + struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev); + + riscv_iommu_disable(iommu); +} + static const struct pci_device_id riscv_iommu_pci_tbl[] = { {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), 0}, {PCI_VDEVICE(RIVOS, PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA), 0}, @@ -112,6 +119,7 @@ static struct pci_driver riscv_iommu_pci_driver = { .id_table = riscv_iommu_pci_tbl, .probe = riscv_iommu_pci_probe, .remove = riscv_iommu_pci_remove, + .shutdown = riscv_iommu_pci_shutdown, .driver = { .suppress_bind_attrs = true, }, diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c index c722eebcdd9b..725e919b97ef 100644 --- a/drivers/iommu/riscv/iommu-platform.c +++ b/drivers/iommu/riscv/iommu-platform.c @@ -140,6 +140,11 @@ static void riscv_iommu_platform_remove(struct platform_device *pdev) platform_device_msi_free_irqs_all(&pdev->dev); }; +static void riscv_iommu_platform_shutdown(struct platform_device *pdev) +{ + riscv_iommu_disable(dev_get_drvdata(&pdev->dev)); +}; + static const struct of_device_id riscv_iommu_of_match[] = { {.compatible = "riscv,iommu",}, {}, @@ -148,6 +153,7 @@ static const struct of_device_id riscv_iommu_of_match[] = { static struct platform_driver riscv_iommu_platform_driver = { .probe = riscv_iommu_platform_probe, .remove = riscv_iommu_platform_remove, + .shutdown = riscv_iommu_platform_shutdown, .driver = { .name = "riscv,iommu", .of_match_table = riscv_iommu_of_match, diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 84806724f568..670b4302aca8 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -651,9 +651,11 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm * This is best effort IOMMU translation shutdown flow. * Disable IOMMU without waiting for hardware response. */ -static void riscv_iommu_disable(struct riscv_iommu_device *iommu) +void riscv_iommu_disable(struct riscv_iommu_device *iommu) { - riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, 0); + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, + FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, + RISCV_IOMMU_DDTP_IOMMU_MODE_BARE)); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0); riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0); diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h index b1c4664542b4..46df79dd5495 100644 --- a/drivers/iommu/riscv/iommu.h +++ b/drivers/iommu/riscv/iommu.h @@ -64,6 +64,7 @@ struct riscv_iommu_device { int riscv_iommu_init(struct riscv_iommu_device *iommu); void riscv_iommu_remove(struct riscv_iommu_device *iommu); +void riscv_iommu_disable(struct riscv_iommu_device *iommu); #define riscv_iommu_readl(iommu, addr) \ readl_relaxed((iommu)->reg + (addr)) From afc0cbc6e25b37dc9ba11d415ea6858902a7f04b Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Fri, 3 Jan 2025 19:30:59 +0800 Subject: [PATCH 41/66] iommu/msm: Use helper function devm_clk_get_prepared() Since commit 7ef9651e9792 ("clk: Provide new devm_clk helpers for prepared and enabled clocks"), devm_clk_get() and clk_prepare() can now be replaced by devm_clk_get_prepared() when driver prepares the clocks for the whole lifetime of the device. Moreover, it is no longer necessary to unprepare the clocks explicitly. Signed-off-by: Zhang Heng Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250103113059.463033-1-zhangheng@kylinos.cn Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 51 +++++++++------------------------------ 1 file changed, 11 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index ce40f0a419ea..2769e4544038 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -725,47 +725,32 @@ static int msm_iommu_probe(struct platform_device *pdev) iommu->dev = &pdev->dev; INIT_LIST_HEAD(&iommu->ctx_list); - iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk"); + iommu->pclk = devm_clk_get_prepared(iommu->dev, "smmu_pclk"); if (IS_ERR(iommu->pclk)) return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk), "could not get smmu_pclk\n"); - ret = clk_prepare(iommu->pclk); - if (ret) - return dev_err_probe(iommu->dev, ret, - "could not prepare smmu_pclk\n"); - - iommu->clk = devm_clk_get(iommu->dev, "iommu_clk"); - if (IS_ERR(iommu->clk)) { - clk_unprepare(iommu->pclk); + iommu->clk = devm_clk_get_prepared(iommu->dev, "iommu_clk"); + if (IS_ERR(iommu->clk)) return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk), "could not get iommu_clk\n"); - } - - ret = clk_prepare(iommu->clk); - if (ret) { - clk_unprepare(iommu->pclk); - return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n"); - } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); iommu->base = devm_ioremap_resource(iommu->dev, r); if (IS_ERR(iommu->base)) { ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n"); - goto fail; + return ret; } ioaddr = r->start; iommu->irq = platform_get_irq(pdev, 0); - if (iommu->irq < 0) { - ret = -ENODEV; - goto fail; - } + if (iommu->irq < 0) + return -ENODEV; ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val); if (ret) { dev_err(iommu->dev, "could not get ncb\n"); - goto fail; + return ret; } iommu->ncb = val; @@ -780,8 +765,7 @@ static int msm_iommu_probe(struct platform_device *pdev) if (!par) { pr_err("Invalid PAR value detected\n"); - ret = -ENODEV; - goto fail; + return -ENODEV; } ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL, @@ -791,7 +775,7 @@ static int msm_iommu_probe(struct platform_device *pdev) iommu); if (ret) { pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret); - goto fail; + return ret; } list_add(&iommu->dev_node, &qcom_iommu_devices); @@ -800,23 +784,19 @@ static int msm_iommu_probe(struct platform_device *pdev) "msm-smmu.%pa", &ioaddr); if (ret) { pr_err("Could not add msm-smmu at %pa to sysfs\n", &ioaddr); - goto fail; + return ret; } ret = iommu_device_register(&iommu->iommu, &msm_iommu_ops, &pdev->dev); if (ret) { pr_err("Could not register msm-smmu at %pa\n", &ioaddr); - goto fail; + return ret; } pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); return ret; -fail: - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); - return ret; } static const struct of_device_id msm_iommu_dt_match[] = { @@ -824,20 +804,11 @@ static const struct of_device_id msm_iommu_dt_match[] = { {} }; -static void msm_iommu_remove(struct platform_device *pdev) -{ - struct msm_iommu_dev *iommu = platform_get_drvdata(pdev); - - clk_unprepare(iommu->clk); - clk_unprepare(iommu->pclk); -} - static struct platform_driver msm_iommu_driver = { .driver = { .name = "msm_iommu", .of_match_table = msm_iommu_dt_match, }, .probe = msm_iommu_probe, - .remove = msm_iommu_remove, }; builtin_platform_driver(msm_iommu_driver); From 5bb494d5cbb9a3403ba8b1c8bc145b42fc119078 Mon Sep 17 00:00:00 2001 From: Gao Shiyuan Date: Sat, 4 Jan 2025 00:58:08 +0800 Subject: [PATCH 42/66] iommu/amd: remove return value of amd_iommu_detect The return value of amd_iommu_detect is not used, so remove it and is consistent with other iommu detect functions. Signed-off-by: Gao Shiyuan Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250103165808.80939-1-gaoshiyuan@baidu.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 10 ++++------ include/linux/amd-iommu.h | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 41294807452d..9db8fd1275be 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3421,25 +3421,23 @@ static bool amd_iommu_sme_check(void) * IOMMUs * ****************************************************************************/ -int __init amd_iommu_detect(void) +void __init amd_iommu_detect(void) { int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; + return; if (!amd_iommu_sme_check()) - return -ENODEV; + return; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return ret; + return; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; - - return 1; } /**************************************************************************** diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 2b90c48a6a87..062fbd4c9b77 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -31,11 +31,11 @@ struct amd_iommu_pi_data { struct task_struct; struct pci_dev; -extern int amd_iommu_detect(void); +extern void amd_iommu_detect(void); #else /* CONFIG_AMD_IOMMU */ -static inline int amd_iommu_detect(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } #endif /* CONFIG_AMD_IOMMU */ From 60f030f7418d3f1d94f2fb207fe3080e1844630b Mon Sep 17 00:00:00 2001 From: Kees Bakker Date: Tue, 7 Jan 2025 10:17:42 +0800 Subject: [PATCH 43/66] iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE There is a WARN_ON_ONCE to catch an unlikely situation when domain_remove_dev_pasid can't find the `pasid`. In case it nevertheless happens we must avoid using a NULL pointer. Signed-off-by: Kees Bakker Link: https://lore.kernel.org/r/20241218201048.E544818E57E@bout3.ijzerbout.nl Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 79e0da9eb626..8f75c11a3ec4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4090,13 +4090,14 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, break; } } - WARN_ON_ONCE(!dev_pasid); spin_unlock_irqrestore(&dmar_domain->lock, flags); cache_tag_unassign_domain(dmar_domain, dev, pasid); domain_detach_iommu(dmar_domain, iommu); - intel_iommu_debugfs_remove_dev_pasid(dev_pasid); - kfree(dev_pasid); + if (!WARN_ON_ONCE(!dev_pasid)) { + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); + kfree(dev_pasid); + } } static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, From de1dda7e0b60c52204c623f288021b2a22636126 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 7 Jan 2025 10:17:43 +0800 Subject: [PATCH 44/66] iommu/vt-d: Remove domain_alloc_paging() This is duplicated by intel_iommu_domain_alloc_paging_flags(), just remove it. Signed-off-by: Jason Gunthorpe Reviewed-by: Yi Liu Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/0-v1-b101d00c5ee5+17645-vtd_paging_flags_jgg@nvidia.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8f75c11a3ec4..113c66dcc32e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4446,21 +4446,6 @@ static struct iommu_domain identity_domain = { }, }; -static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; - struct dmar_domain *dmar_domain; - bool first_stage; - - first_stage = first_level_by_default(iommu); - dmar_domain = paging_domain_alloc(dev, first_stage); - if (IS_ERR(dmar_domain)) - return ERR_CAST(dmar_domain); - - return &dmar_domain->domain; -} - const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, @@ -4469,7 +4454,6 @@ const struct iommu_ops intel_iommu_ops = { .hw_info = intel_iommu_hw_info, .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, .domain_alloc_sva = intel_svm_domain_alloc, - .domain_alloc_paging = intel_iommu_domain_alloc_paging, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, .release_device = intel_iommu_release_device, From c2206299401b23bed9ef87f561d6f85f9351aa84 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 7 Jan 2025 10:17:44 +0800 Subject: [PATCH 45/66] iommu/vt-d: Remove iommu cap audit The capability audit code was introduced by commit "iommu/vt-d: Audit IOMMU Capabilities and add helper functions", aiming to verify the consistency of capabilities across all IOMMUs for supported features. Nowadays, all the kAPIs of the iommu subsystem have evolved to be device oriented, in preparation for supporting heterogeneous IOMMU architectures. There is no longer a need to require capability consistence among IOMMUs for any feature. Remove the iommu cap audit code to make the driver align with the design in the iommu core. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241216071828.22962-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Makefile | 2 +- drivers/iommu/intel/cap_audit.c | 217 ---------------------------- drivers/iommu/intel/cap_audit.h | 131 ----------------- drivers/iommu/intel/iommu.c | 9 -- drivers/iommu/intel/irq_remapping.c | 8 - 5 files changed, 1 insertion(+), 366 deletions(-) delete mode 100644 drivers/iommu/intel/cap_audit.c delete mode 100644 drivers/iommu/intel/cap_audit.h diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index d3bb0798092d..6c7528130cf9 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o -obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o +obj-$(CONFIG_DMAR_TABLE) += trace.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c deleted file mode 100644 index 9862dc20b35e..000000000000 --- a/drivers/iommu/intel/cap_audit.c +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * cap_audit.c - audit iommu capabilities for boot time and hot plug - * - * Copyright (C) 2021 Intel Corporation - * - * Author: Kyung Min Park - * Lu Baolu - */ - -#define pr_fmt(fmt) "DMAR: " fmt - -#include "iommu.h" -#include "cap_audit.h" - -static u64 intel_iommu_cap_sanity; -static u64 intel_iommu_ecap_sanity; - -static inline void check_irq_capabilities(struct intel_iommu *a, - struct intel_iommu *b) -{ - CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK); -} - -static inline void check_dmar_capabilities(struct intel_iommu *a, - struct intel_iommu *b) -{ - MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK); - MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK); - MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK); - - CHECK_FEATURE_MISMATCH(a, b, cap, fl5lp_support, CAP_FL5LP_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK); - CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK); - CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK); -} - -static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type) -{ - bool mismatch = false; - u64 old_cap = intel_iommu_cap_sanity; - u64 old_ecap = intel_iommu_ecap_sanity; - - if (type == CAP_AUDIT_HOTPLUG_IRQR) { - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK); - goto out; - } - - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl5lp_support, CAP_FL5LP_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK); - CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK); - - /* Abort hot plug if the hot plug iommu feature is smaller than global */ - MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch); - MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch); - -out: - if (mismatch) { - intel_iommu_cap_sanity = old_cap; - intel_iommu_ecap_sanity = old_ecap; - return -EFAULT; - } - - return 0; -} - -static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type) -{ - struct dmar_drhd_unit *d; - struct intel_iommu *i; - int rc = 0; - - rcu_read_lock(); - if (list_empty(&dmar_drhd_units)) - goto out; - - for_each_active_iommu(i, d) { - if (!iommu) { - intel_iommu_ecap_sanity = i->ecap; - intel_iommu_cap_sanity = i->cap; - iommu = i; - continue; - } - - if (type == CAP_AUDIT_STATIC_DMAR) - check_dmar_capabilities(iommu, i); - else - check_irq_capabilities(iommu, i); - } - - /* - * If the system is sane to support scalable mode, either SL or FL - * should be sane. - */ - if (intel_cap_smts_sanity() && - !intel_cap_flts_sanity() && !intel_cap_slts_sanity()) - rc = -EOPNOTSUPP; - -out: - rcu_read_unlock(); - return rc; -} - -int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu) -{ - switch (type) { - case CAP_AUDIT_STATIC_DMAR: - case CAP_AUDIT_STATIC_IRQR: - return cap_audit_static(iommu, type); - case CAP_AUDIT_HOTPLUG_DMAR: - case CAP_AUDIT_HOTPLUG_IRQR: - return cap_audit_hotplug(iommu, type); - default: - break; - } - - return -EFAULT; -} - -bool intel_cap_smts_sanity(void) -{ - return ecap_smts(intel_iommu_ecap_sanity); -} - -bool intel_cap_pasid_sanity(void) -{ - return ecap_pasid(intel_iommu_ecap_sanity); -} - -bool intel_cap_nest_sanity(void) -{ - return ecap_nest(intel_iommu_ecap_sanity); -} - -bool intel_cap_flts_sanity(void) -{ - return ecap_flts(intel_iommu_ecap_sanity); -} - -bool intel_cap_slts_sanity(void) -{ - return ecap_slts(intel_iommu_ecap_sanity); -} diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h deleted file mode 100644 index d07b75938961..000000000000 --- a/drivers/iommu/intel/cap_audit.h +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * cap_audit.h - audit iommu capabilities header - * - * Copyright (C) 2021 Intel Corporation - * - * Author: Kyung Min Park - */ - -/* - * Capability Register Mask - */ -#define CAP_FL5LP_MASK BIT_ULL(60) -#define CAP_PI_MASK BIT_ULL(59) -#define CAP_FL1GP_MASK BIT_ULL(56) -#define CAP_RD_MASK BIT_ULL(55) -#define CAP_WD_MASK BIT_ULL(54) -#define CAP_MAMV_MASK GENMASK_ULL(53, 48) -#define CAP_NFR_MASK GENMASK_ULL(47, 40) -#define CAP_PSI_MASK BIT_ULL(39) -#define CAP_SLLPS_MASK GENMASK_ULL(37, 34) -#define CAP_FRO_MASK GENMASK_ULL(33, 24) -#define CAP_ZLR_MASK BIT_ULL(22) -#define CAP_MGAW_MASK GENMASK_ULL(21, 16) -#define CAP_SAGAW_MASK GENMASK_ULL(12, 8) -#define CAP_CM_MASK BIT_ULL(7) -#define CAP_PHMR_MASK BIT_ULL(6) -#define CAP_PLMR_MASK BIT_ULL(5) -#define CAP_RWBF_MASK BIT_ULL(4) -#define CAP_AFL_MASK BIT_ULL(3) -#define CAP_NDOMS_MASK GENMASK_ULL(2, 0) - -/* - * Extended Capability Register Mask - */ -#define ECAP_RPS_MASK BIT_ULL(49) -#define ECAP_SMPWC_MASK BIT_ULL(48) -#define ECAP_FLTS_MASK BIT_ULL(47) -#define ECAP_SLTS_MASK BIT_ULL(46) -#define ECAP_SLADS_MASK BIT_ULL(45) -#define ECAP_VCS_MASK BIT_ULL(44) -#define ECAP_SMTS_MASK BIT_ULL(43) -#define ECAP_PDS_MASK BIT_ULL(42) -#define ECAP_DIT_MASK BIT_ULL(41) -#define ECAP_PASID_MASK BIT_ULL(40) -#define ECAP_PSS_MASK GENMASK_ULL(39, 35) -#define ECAP_EAFS_MASK BIT_ULL(34) -#define ECAP_NWFS_MASK BIT_ULL(33) -#define ECAP_SRS_MASK BIT_ULL(31) -#define ECAP_ERS_MASK BIT_ULL(30) -#define ECAP_PRS_MASK BIT_ULL(29) -#define ECAP_NEST_MASK BIT_ULL(26) -#define ECAP_MTS_MASK BIT_ULL(25) -#define ECAP_MHMV_MASK GENMASK_ULL(23, 20) -#define ECAP_IRO_MASK GENMASK_ULL(17, 8) -#define ECAP_SC_MASK BIT_ULL(7) -#define ECAP_PT_MASK BIT_ULL(6) -#define ECAP_EIM_MASK BIT_ULL(4) -#define ECAP_DT_MASK BIT_ULL(2) -#define ECAP_QI_MASK BIT_ULL(1) -#define ECAP_C_MASK BIT_ULL(0) - -/* - * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each - * IOMMU gets audited. - */ -#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ -do { \ - if (cap##_##feature(a) != cap##_##feature(b)) { \ - intel_iommu_##cap##_sanity &= ~(MASK); \ - pr_info("IOMMU feature %s inconsistent", #feature); \ - } \ -} while (0) - -#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \ - DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK) - -#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \ -do { \ - if (cap##_##feature(intel_iommu_##cap##_sanity)) \ - DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \ - (b)->cap, cap, feature, MASK); \ -} while (0) - -#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \ -do { \ - u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \ - min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \ - intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \ - min_feature; \ -} while (0) - -#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \ -do { \ - if ((intel_iommu_##cap##_sanity & (MASK)) > \ - (cap##_##feature((iommu)->cap))) \ - mismatch = true; \ - else \ - (iommu)->cap = ((iommu)->cap & ~(MASK)) | \ - (intel_iommu_##cap##_sanity & (MASK)); \ -} while (0) - -enum cap_audit_type { - CAP_AUDIT_STATIC_DMAR, - CAP_AUDIT_STATIC_IRQR, - CAP_AUDIT_HOTPLUG_DMAR, - CAP_AUDIT_HOTPLUG_IRQR, -}; - -bool intel_cap_smts_sanity(void); -bool intel_cap_pasid_sanity(void); -bool intel_cap_nest_sanity(void); -bool intel_cap_flts_sanity(void); -bool intel_cap_slts_sanity(void); - -static inline bool scalable_mode_support(void) -{ - return (intel_iommu_sm && intel_cap_smts_sanity()); -} - -static inline bool pasid_mode_support(void) -{ - return scalable_mode_support() && intel_cap_pasid_sanity(); -} - -static inline bool nested_mode_support(void) -{ - return scalable_mode_support() && intel_cap_nest_sanity(); -} - -int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 113c66dcc32e..a1e254417307 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -29,7 +29,6 @@ #include "../irq_remapping.h" #include "../iommu-pages.h" #include "pasid.h" -#include "cap_audit.h" #include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -2118,10 +2117,6 @@ static int __init init_dmars(void) struct intel_iommu *iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); - if (ret) - goto free_iommu; - for_each_iommu(iommu, drhd) { if (drhd->ignored) { iommu_disable_translation(iommu); @@ -2617,10 +2612,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) struct intel_iommu *iommu = dmaru->iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); - if (ret) - goto out; - /* * Disable translation if already enabled prior to OS handover. */ diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 466c1412dd45..4431543bcfe3 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -24,7 +24,6 @@ #include "iommu.h" #include "../irq_remapping.h" #include "../iommu-pages.h" -#include "cap_audit.h" enum irq_mode { IRQ_REMAPPING, @@ -727,9 +726,6 @@ static int __init intel_prepare_irq_remapping(void) if (dmar_table_init() < 0) return -ENODEV; - if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) - return -ENODEV; - if (!dmar_ir_support()) return -ENODEV; @@ -1534,10 +1530,6 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) int ret; int eim = x2apic_enabled(); - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); - if (ret) - return ret; - if (eim && !ecap_eim_support(iommu->ecap)) { pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", iommu->reg_phys, iommu->ecap); From cf08ca81d08a04b3b304e8fb4e052f323a09783d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 7 Jan 2025 10:17:45 +0800 Subject: [PATCH 46/66] iommu/vt-d: Draining PRQ in sva unbind path when FPD bit set When a device uses a PASID for SVA (Shared Virtual Address), it's possible that the PASID entry is marked as non-present and FPD bit set before the device flushes all ongoing DMA requests and removes the SVA domain. This can occur when an exception happens and the process terminates before the device driver stops DMA and calls the iommu driver to unbind the PASID. There's no need to drain the PRQ in the mm release path. Instead, the PRQ will be drained in the SVA unbind path. But in such case, intel_pasid_tear_down_entry() only checks the presence of the pasid entry and returns directly. Add the code to clear the FPD bit and drain the PRQ. Fixes: c43e1ccdebf2 ("iommu/vt-d: Drain PRQs when domain removed from RID") Suggested-by: Kevin Tian Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241217024240.139615-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 22 +++++++++++++++++++++- drivers/iommu/intel/pasid.h | 6 ++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 5b7d85f1e143..fb59a7d35958 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -244,11 +244,31 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); - if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) { + if (WARN_ON(!pte)) { spin_unlock(&iommu->lock); return; } + if (!pasid_pte_is_present(pte)) { + if (!pasid_pte_is_fault_disabled(pte)) { + WARN_ON(READ_ONCE(pte->val[0]) != 0); + spin_unlock(&iommu->lock); + return; + } + + /* + * When a PASID is used for SVA by a device, it's possible + * that the pasid entry is non-present with the Fault + * Processing Disabled bit set. Clear the pasid entry and + * drain the PRQ for the PASID before return. + */ + pasid_clear_entry(pte); + spin_unlock(&iommu->lock); + intel_iommu_drain_pasid_prq(dev, pasid); + + return; + } + did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); intel_pasid_clear_entry(dev, pasid, fault_ignore); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 082f4fe20216..668d8ece6b14 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -73,6 +73,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; } +/* Get FPD(Fault Processing Disable) bit of a PASID table entry */ +static inline bool pasid_pte_is_fault_disabled(struct pasid_entry *pte) +{ + return READ_ONCE(pte->val[0]) & PASID_PTE_FPD; +} + /* Get PGTT field of a PASID table entry */ static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) { From acf5d49aaf862333a7139adff52a6b153af2853a Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 7 Jan 2025 10:17:46 +0800 Subject: [PATCH 47/66] iommu/vt-d: Link cache tags of same iommu unit together Cache tag invalidation requests for a domain are accumulated until a different iommu unit is found when traversing the cache_tags linked list. But cache tags of same iommu unit can be distributed in the linked list, this make batched flush less efficient. E.g., one device backed by iommu0 is attached to a domain in between two devices attaching backed by iommu1. Group cache tags together for same iommu unit in cache_tag_assign() to maximize the performance of batched flush. Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu Signed-off-by: Zhenzhong Duan Link: https://lore.kernel.org/r/20241219054358.8654-1-zhenzhong.duan@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 09694cca8752..fc35cba59145 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -47,6 +47,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct cache_tag *tag, *temp; + struct list_head *prev; unsigned long flags; tag = kzalloc(sizeof(*tag), GFP_KERNEL); @@ -65,6 +66,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, tag->dev = iommu->iommu.dev; spin_lock_irqsave(&domain->cache_lock, flags); + prev = &domain->cache_tags; list_for_each_entry(temp, &domain->cache_tags, node) { if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { temp->users++; @@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did, trace_cache_tag_assign(temp); return 0; } + if (temp->iommu == iommu) + prev = &temp->node; } - list_add_tail(&tag->node, &domain->cache_tags); + /* + * Link cache tags of same iommu unit together, so corresponding + * flush ops can be batched for iommu unit. + */ + list_add(&tag->node, prev); + spin_unlock_irqrestore(&domain->cache_lock, flags); trace_cache_tag_assign(tag); From ef4144b1b47dba61ebf19b9567013afdba5225dd Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Thu, 12 Dec 2024 20:43:58 +0530 Subject: [PATCH 48/66] iommu/arm-smmu: Re-enable context caching in smmu reset operation Default MMU-500 reset operation disables context caching in prefetch buffer. It is however expected for context banks using the ACTLR register to retain their prefetch value during reset and runtime suspend. Add config 'ARM_SMMU_MMU_500_CPRE_ERRATA' to gate this errata workaround in default MMU-500 reset operation which defaults to 'Y' and provide option to disable workaround for context caching in prefetch buffer as and when needed. Suggested-by: Will Deacon Signed-off-by: Bibek Kumar Patro Link: https://lore.kernel.org/r/20241212151402.159102-2-quic_bibekkum@quicinc.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 3 ++- drivers/iommu/Kconfig | 12 ++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 5 +++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index b42fea07c5ce..f074f6219f5c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -198,7 +198,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | MMU-500 | #841119,826419 | N/A | +| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA| +| | | #562869,1047329 | | +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-600 | #1076982,1209401| N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 47c46e4b739e..ec1b5e32b972 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -367,6 +367,18 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT 'arm-smmu.disable_bypass' will continue to override this config. +config ARM_SMMU_MMU_500_CPRE_ERRATA + bool "Enable errata workaround for CPRE in SMMU reset path" + depends on ARM_SMMU + default y + help + Say Y here (by default) to apply workaround to disable + MMU-500's next-page prefetcher for sake of 4 known errata. + + Say N here only when it is sure that any errata related to + prefetch enablement are not applicable on the platform. + Refer silicon-errata.rst for info on errata IDs. + config ARM_SMMU_QCOM def_tristate y depends on ARM_SMMU && ARCH_QCOM diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 99030e6b16e7..db9b9a8e139c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -110,7 +110,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm int arm_mmu500_reset(struct arm_smmu_device *smmu) { u32 reg, major; - int i; /* * On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before * writes to the context bank ACTLRs will stick. And we just hope that @@ -128,11 +127,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg); +#ifdef CONFIG_ARM_SMMU_MMU_500_CPRE_ERRATA /* * Disable MMU-500's not-particularly-beneficial next-page * prefetcher for the sake of at least 5 known errata. */ - for (i = 0; i < smmu->num_context_banks; ++i) { + for (int i = 0; i < smmu->num_context_banks; ++i) { reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); @@ -140,6 +140,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) if (reg & ARM_MMU500_ACTLR_CPRE) dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n"); } +#endif return 0; } From 445d7a8ed90e1725d148f2ce0f87d3ef3dd5f7ff Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Thu, 12 Dec 2024 20:43:59 +0530 Subject: [PATCH 49/66] iommu/arm-smmu: Refactor qcom_smmu structure to include single pointer qcom_smmu_match_data is static and constant so refactor qcom_smmu to store single pointer to qcom_smmu_match_data instead of replicating multiple child members of the same and handle the further dereferences in the places that want them. Suggested-by: Robin Murphy Reviewed-by: Dmitry Baryshkov Reviewed-by: Rob Clark Signed-off-by: Bibek Kumar Patro Link: https://lore.kernel.org/r/20241212151402.159102-3-quic_bibekkum@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 548783f3f8e8..d03b2239baad 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -73,7 +73,7 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) if (__ratelimit(&rs)) { dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); - cfg = qsmmu->cfg; + cfg = qsmmu->data->cfg; if (!cfg) return; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 601fb878d0ef..5f3b5dfdcf05 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -507,7 +507,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = data->cfg; + qsmmu->data = data; return &qsmmu->smmu; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 3c134d1a6277..b55cd3e3ae48 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -8,7 +8,7 @@ struct qcom_smmu { struct arm_smmu_device smmu; - const struct qcom_smmu_config *cfg; + const struct qcom_smmu_match_data *data; bool bypass_quirk; u8 bypass_cbndx; u32 stall_enabled; From 7f2ef1bfc758f0f206eac863ff8ee417d5bb1493 Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Thu, 12 Dec 2024 20:44:00 +0530 Subject: [PATCH 50/66] iommu/arm-smmu: Add support for PRR bit setup Add an adreno-smmu-priv interface for drm/msm to call into arm-smmu-qcom and initiate the "Partially Resident Region" (PRR) bit setup or reset sequence as per request. This will be used by GPU to setup the PRR bit and related configuration registers through adreno-smmu private interface instead of directly poking the smmu hardware. Suggested-by: Rob Clark Signed-off-by: Bibek Kumar Patro Link: https://lore.kernel.org/r/20241212151402.159102-4-quic_bibekkum@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 52 ++++++++++++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu.h | 2 + include/linux/adreno-smmu-priv.h | 7 +++ 3 files changed, 61 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 5f3b5dfdcf05..5a32d5dcbc86 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -16,6 +16,8 @@ #define QCOM_DUMMY_VAL -1 +#define GFX_ACTLR_PRR (1 << 5) + static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); @@ -99,6 +101,47 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg); } +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set) +{ + struct arm_smmu_domain *smmu_domain = (void *)cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + u32 reg = 0; + int ret; + + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } + + reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR); + reg &= ~GFX_ACTLR_PRR; + if (set) + reg |= FIELD_PREP(GFX_ACTLR_PRR, 1); + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg); + pm_runtime_put_autosuspend(smmu->dev); +} + +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr) +{ + struct arm_smmu_domain *smmu_domain = (void *)cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + int ret; + + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } + + writel_relaxed(lower_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR); + writel_relaxed(upper_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR); + pm_runtime_put_autosuspend(smmu->dev); +} + #define QCOM_ADRENO_SMMU_GPU_SID 0 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev) @@ -210,6 +253,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + const struct device_node *np = smmu_domain->smmu->dev->of_node; struct adreno_smmu_priv *priv; smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; @@ -239,6 +283,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, priv->get_fault_info = qcom_adreno_smmu_get_fault_info; priv->set_stall = qcom_adreno_smmu_set_stall; priv->resume_translation = qcom_adreno_smmu_resume_translation; + priv->set_prr_bit = NULL; + priv->set_prr_addr = NULL; + + if (of_device_is_compatible(np, "qcom,smmu-500") && + of_device_is_compatible(np, "qcom,adreno-smmu")) { + priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit; + priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr; + } return 0; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index e2aeb511ae90..2dbf3243b5ad 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 +#define ARM_SMMU_GFX_PRR_CFG_LADDR 0x6008 +#define ARM_SMMU_GFX_PRR_CFG_UADDR 0x600C #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_RESUME_TERMINATE BIT(0) diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h index c637e0997f6d..abec23c7744f 100644 --- a/include/linux/adreno-smmu-priv.h +++ b/include/linux/adreno-smmu-priv.h @@ -50,6 +50,11 @@ struct adreno_smmu_fault_info { * the GPU driver must call resume_translation() * @resume_translation: Resume translation after a fault * + * @set_prr_bit: [optional] Configure the GPU's Partially Resident + * Region (PRR) bit in the ACTLR register. + * @set_prr_addr: [optional] Configure the PRR_CFG_*ADDR register with + * the physical address of PRR page passed from GPU + * driver. * * The GPU driver (drm/msm) and adreno-smmu work together for controlling * the GPU's SMMU instance. This is by necessity, as the GPU is directly @@ -67,6 +72,8 @@ struct adreno_smmu_priv { void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info); void (*set_stall)(const void *cookie, bool enabled); void (*resume_translation)(const void *cookie, bool terminate); + void (*set_prr_bit)(const void *cookie, bool set); + void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr); }; #endif /* __ADRENO_SMMU_PRIV_H */ From 9fe18d825a5854f1ed6d6f91b45190d3a20f9f23 Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Thu, 12 Dec 2024 20:44:01 +0530 Subject: [PATCH 51/66] iommu/arm-smmu: Introduce ACTLR custom prefetcher settings Currently in Qualcomm SoCs the default prefetch is set to 1 which allows the TLB to fetch just the next page table. MMU-500 features ACTLR register which is implementation defined and is used for Qualcomm SoCs to have a custom prefetch setting enabling TLB to prefetch the next set of page tables accordingly allowing for faster translations. ACTLR value is unique for each SMR (Stream matching register) and stored in a pre-populated table. This value is set to the register during context bank initialisation. Reviewed-by: Dmitry Baryshkov Signed-off-by: Bibek Kumar Patro Link: https://lore.kernel.org/r/20241212151402.159102-5-quic_bibekkum@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 33 ++++++++++++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 1 + 2 files changed, 34 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 5a32d5dcbc86..b524d19e52f7 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -250,14 +250,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) return true; } +static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx, + const struct of_device_id *client_match) +{ + const struct of_device_id *match = + of_match_device(client_match, dev); + + if (!match) { + dev_dbg(dev, "no ACTLR settings present\n"); + return; + } + + arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data); +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { const struct device_node *np = smmu_domain->smmu->dev->of_node; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; struct adreno_smmu_priv *priv; smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + /* Only enable split pagetables for the GPU device (SID 0) */ if (!qcom_adreno_smmu_is_gpu_device(dev)) return 0; @@ -321,8 +344,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; + smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + return 0; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index b55cd3e3ae48..8addd453f5f1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -28,6 +28,7 @@ struct qcom_smmu_match_data { const struct qcom_smmu_config *cfg; const struct arm_smmu_impl *impl; const struct arm_smmu_impl *adreno_impl; + const struct of_device_id * const client_match; }; irqreturn_t qcom_smmu_context_fault(int irq, void *dev); From 3e35c3e725de97f7870ef414cd5d6de7945d4fe7 Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Thu, 12 Dec 2024 20:44:02 +0530 Subject: [PATCH 52/66] iommu/arm-smmu: Add ACTLR data and support for qcom_smmu_500 Add ACTLR data table for qcom_smmu_500 including corresponding data entry and set prefetch value by way of a list of compatible strings. Reviewed-by: Dmitry Baryshkov Signed-off-by: Bibek Kumar Patro Link: https://lore.kernel.org/r/20241212151402.159102-6-quic_bibekkum@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index b524d19e52f7..59d02687280e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -16,8 +16,40 @@ #define QCOM_DUMMY_VAL -1 +/* + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch + * buffer). The remaining bits are implementation defined and vary across + * SoCs. + */ + +#define CPRE (1 << 1) +#define CMTLB (1 << 0) +#define PREFETCH_SHIFT 8 +#define PREFETCH_DEFAULT 0 +#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT) +#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT) +#define PREFETCH_DEEP (3 << PREFETCH_SHIFT) #define GFX_ACTLR_PRR (1 << 5) +static const struct of_device_id qcom_smmu_actlr_client_of_match[] = { + { .compatible = "qcom,adreno", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-gmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-smmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,fastrpc", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-mdss", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-venus", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sm8550-mdss", + .data = (const void *) (PREFETCH_DEFAULT | CMTLB) }, + { } +}; + static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); @@ -635,6 +667,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { .impl = &qcom_smmu_500_impl, .adreno_impl = &qcom_adreno_smmu_500_impl, .cfg = &qcom_smmu_impl0_cfg, + .client_match = qcom_smmu_actlr_client_of_match, }; /* From 821500d5c59737708b0959a6c71328ae5ba070ca Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 10 Dec 2024 08:51:19 -0800 Subject: [PATCH 53/66] iommu/io-pgtable-arm: Make pgtable walker more generic We can re-use this basic pgtable walk logic in a few places. Signed-off-by: Rob Clark Reviewed-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241210165127.600817-2-robdclark@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 67 ++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7e53ee51270b..e8e9f77a6170 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -741,33 +741,33 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, } struct io_pgtable_walk_data { - struct iommu_dirty_bitmap *dirty; + struct io_pgtable *iop; + void *data; + int (*visit)(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size); unsigned long flags; u64 addr; const u64 end; }; -static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, - int lvl); +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); -static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, int lvl) +static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) { struct io_pgtable *iop = &data->iop; arm_lpae_iopte pte = READ_ONCE(*ptep); - if (iopte_leaf(pte, lvl, iop->fmt)) { - size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + int ret = walk_data->visit(walk_data, lvl, ptep, size); + if (ret) + return ret; - if (iopte_writeable_dirty(pte)) { - iommu_dirty_bitmap_record(walk_data->dirty, - walk_data->addr, size); - if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) - iopte_set_writeable_clean(ptep); - } + if (iopte_leaf(pte, lvl, iop->fmt)) { walk_data->addr += size; return 0; } @@ -776,13 +776,13 @@ static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, return -EINVAL; ptep = iopte_deref(pte, data); - return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); + return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1); } -static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, - struct io_pgtable_walk_data *walk_data, - arm_lpae_iopte *ptep, - int lvl) +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) { u32 idx; int max_entries, ret; @@ -797,7 +797,7 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { - ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl); if (ret) return ret; } @@ -805,6 +805,23 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, return 0; } +static int visit_dirty(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iommu_dirty_bitmap *dirty = walk_data->data; + + if (!iopte_leaf(*ptep, lvl, walk_data->iop->fmt)) + return 0; + + if (iopte_writeable_dirty(*ptep)) { + iommu_dirty_bitmap_record(dirty, walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + + return 0; +} + static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, @@ -813,7 +830,9 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; struct io_pgtable_walk_data walk_data = { - .dirty = dirty, + .iop = &data->iop, + .data = dirty, + .visit = visit_dirty, .flags = flags, .addr = iova, .end = iova + size, @@ -828,7 +847,7 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, if (data->iop.fmt != ARM_64_LPAE_S1) return -EINVAL; - return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); + return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl); } static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) From d9e589e6ad73e5e13f8ed5df9a3e4049001a95c1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 10 Dec 2024 08:51:20 -0800 Subject: [PATCH 54/66] iommu/io-pgtable-arm: Re-use the pgtable walk for iova_to_phys Re-use the generic pgtable walk path. Signed-off-by: Rob Clark Reviewed-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241210165127.600817-3-robdclark@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 74 +++++++++++++++++----------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e8e9f77a6170..dd2111731e19 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -704,42 +704,6 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov data->start_level, ptep); } -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, - unsigned long iova) -{ - struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); - arm_lpae_iopte pte, *ptep = data->pgd; - int lvl = data->start_level; - - do { - /* Valid IOPTE pointer? */ - if (!ptep) - return 0; - - /* Grab the IOPTE we're interested in */ - ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = READ_ONCE(*ptep); - - /* Valid entry? */ - if (!pte) - return 0; - - /* Leaf entry? */ - if (iopte_leaf(pte, lvl, data->iop.fmt)) - goto found_translation; - - /* Take it to the next level */ - ptep = iopte_deref(pte, data); - } while (++lvl < ARM_LPAE_MAX_LEVELS); - - /* Ran out of page tables to walk */ - return 0; - -found_translation: - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); - return iopte_to_paddr(pte, data) | iova; -} - struct io_pgtable_walk_data { struct io_pgtable *iop; void *data; @@ -755,6 +719,41 @@ static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, arm_lpae_iopte *ptep, int lvl); +struct iova_to_phys_data { + arm_lpae_iopte pte; + int lvl; +}; + +static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct iova_to_phys_data *data = walk_data->data; + data->pte = *ptep; + data->lvl = lvl; + return 0; +} + +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct iova_to_phys_data d; + struct io_pgtable_walk_data walk_data = { + .data = &d, + .visit = visit_iova_to_phys, + .addr = iova, + .end = iova + 1, + }; + int ret; + + ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); + if (ret) + return 0; + + iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1); + return iopte_to_paddr(d.pte, data) | iova; +} + static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, struct io_pgtable_walk_data *walk_data, arm_lpae_iopte *ptep, int lvl) @@ -772,8 +771,9 @@ static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, return 0; } - if (WARN_ON(!iopte_table(pte, lvl))) + if (!iopte_table(pte, lvl)) { return -EINVAL; + } ptep = iopte_deref(pte, data); return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1); From aff028a8192d056d346541c5fc7d88c0eb43412c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 10 Dec 2024 08:51:21 -0800 Subject: [PATCH 55/66] iommu/io-pgtable-arm: Add way to debug pgtable walk Add an io-pgtable method to walk the pgtable returning the raw PTEs that would be traversed for a given iova access. Signed-off-by: Rob Clark Reviewed-by: Mostafa Saleh Link: https://lore.kernel.org/r/20241210165127.600817-4-robdclark@gmail.com [will: Removed 'arm_lpae_io_pgtable_walk_data::level' per Mostafa] Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 23 +++++++++++++++++++++++ include/linux/io-pgtable.h | 11 +++++++++++ 2 files changed, 34 insertions(+) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index dd2111731e19..7632c80edea6 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -754,6 +754,28 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return iopte_to_paddr(d.pte, data) | iova; } +static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl, + arm_lpae_iopte *ptep, size_t size) +{ + struct arm_lpae_io_pgtable_walk_data *data = walk_data->data; + data->ptes[lvl] = *ptep; + return 0; +} + +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova, + void *wd) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_walk_data walk_data = { + .data = wd, + .visit = visit_pgtable_walk, + .addr = iova, + .end = iova + 1, + }; + + return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level); +} + static int io_pgtable_visit(struct arm_lpae_io_pgtable *data, struct io_pgtable_walk_data *walk_data, arm_lpae_iopte *ptep, int lvl) @@ -929,6 +951,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, + .pgtable_walk = arm_lpae_pgtable_walk, }; return data; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ce86b09ae80f..bba2a51c87d2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -180,12 +180,22 @@ struct io_pgtable_cfg { }; }; +/** + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk + * + * @ptes: The recorded PTE values from the walk + */ +struct arm_lpae_io_pgtable_walk_data { + u64 ptes[4]; +}; + /** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. + * @pgtable_walk: (optional) Perform a page table walk for a given iova. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -199,6 +209,7 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd); int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, From f2c77f6e41e68e1b24c165acbf6d4da6b3117e23 Mon Sep 17 00:00:00 2001 From: Pranjal Shrivastava Date: Tue, 7 Jan 2025 16:51:00 +0000 Subject: [PATCH 56/66] iommu/arm-smmu-v3: Use str_read_write helper w/ logs Adopt the `str_read_write` helper in event logging as suggested by the coccinelle tool. Signed-off-by: Pranjal Shrivastava Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/all/20250107130053.GC6991@willie-the-truck/ Link: https://lore.kernel.org/r/20250107165100.1093357-1-praan@google.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ea76f25c0661..6679466911b3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1907,7 +1907,7 @@ static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw, dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x", evt->privileged ? "priv" : "unpriv", evt->instruction ? "inst" : "data", - evt->read ? "read" : "write", + str_read_write(evt->read), evt->s2 ? "s2" : "s1", event_class_str[evt->class], evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "", evt->stall ? " stall" : "", evt->stag); From 1f3dc29d2445c89c85e451c02e41a8e0cd22423c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jan 2025 13:18:55 +0000 Subject: [PATCH 57/66] iommu/arm-smmu-v3: Add missing #include of linux/string_choices.h Commit f2c77f6e41e6 ("iommu/arm-smmu-v3: Use str_read_write helper w/ logs") introduced a call to str_read_write() in the SMMUv3 driver but without an explicit #include of . This breaks the build for custom configurations where CONFIG_ACPI=n: drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:1909:4: error: call to undeclared function 'str_read_write'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 1909 | str_read_write(evt->read), | ^ Add the missing #include. Link: https://lore.kernel.org/r/d07e82a4-2880-4ae3-961b-471bfa7ac6c4@samsung.com Reported-by: Marek Szyprowski Fixes: f2c77f6e41e6 ("iommu/arm-smmu-v3: Use str_read_write helper w/ logs") Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6679466911b3..44c32bb89dd3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include From 10c62c38b073ecea775b7e23fa7c7a3995a84ff3 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 2 Jan 2025 21:46:16 -0500 Subject: [PATCH 58/66] iommu/riscv: Fixup compile warning When __BITS_PER_LONG == 32, size_t is defined as unsigned int rather than unsigned long. Therefore, we should use size_t to avoid type-checking errors. Fixes: 488ffbf18171 ("iommu/riscv: Paging domain support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Tomasz Jeznach Reviewed-by: Charlie Jenkins Reviewed-by: Tomasz Jeznach Link: https://lore.kernel.org/r/20250103024616.3359159-1-guoren@kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/riscv/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 670b4302aca8..8f049d4a0e2c 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1278,7 +1278,7 @@ static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, dma_addr_t iova) { struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - unsigned long pte_size; + size_t pte_size; unsigned long *ptr; ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); From 1a684b099fac9a37e6fe2f0e594adbb1eff5181a Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Fri, 10 Jan 2025 12:34:59 -0400 Subject: [PATCH 59/66] iommu/amd: Remove unused amd_iommu_domain_update() All the callers have been removed by the below commit, remove the implementation and prototypes. Fixes: 322d889ae7d3 ("iommu/amd: Remove amd_iommu_domain_update() from page table freeing") Reviewed-by: Vasant Hegde Signed-off-by: Alejandro Jimenez Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/iommu.c | 9 --------- 2 files changed, 10 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index c4cd272b31f4..cdbfa5f096dd 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -88,7 +88,6 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag); */ void amd_iommu_flush_all_caches(struct amd_iommu *iommu); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); -void amd_iommu_domain_update(struct protection_domain *domain); void amd_iommu_domain_flush_pages(struct protection_domain *domain, u64 address, size_t size); void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3aa80f140cdb..e652990440c6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1766,15 +1766,6 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) domain_flush_complete(domain); } -void amd_iommu_domain_update(struct protection_domain *domain) -{ - /* Update device table */ - amd_iommu_update_and_flush_device_table(domain); - - /* Flush domain TLB(s) and wait for completion */ - amd_iommu_domain_flush_all(domain); -} - int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag) { struct iommu_dev_data *dev_data; From f9b80f941e0e68c3347c5d22a17a0f636a064e2c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:00 -0400 Subject: [PATCH 60/66] iommu/amd: Remove domain_alloc() IOMMU drivers should not be sensitive to the domain type, a paging domain should be created based only on the flags passed in, the same for all callers. AMD was using the domain_alloc() path to force VFIO into a v1 domain type, because v1 gives higher performance. However now that IOMMU_HWPT_ALLOC_PASID is present, and a NULL device is not possible, domain_alloc_paging_flags() will do the right thing for VFIO. When invoked from VFIO flags will be 0 and the amd_iommu_pgtable type of domain will be selected. This is v1 by default unless the kernel command line has overridden it to v2. If the admin is forcing v2 assume they know what they are doing so force it everywhere, including for VFIO. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index e652990440c6..8e9a0445e86f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2565,25 +2565,6 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, return &domain->domain; } -static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type) -{ - struct iommu_domain *domain; - int pgtable = amd_iommu_pgtable; - - /* - * Force IOMMU v1 page table when allocating - * domain for pass-through devices. - */ - if (type == IOMMU_DOMAIN_UNMANAGED) - pgtable = AMD_IOMMU_V1; - - domain = do_iommu_domain_alloc(type, NULL, 0, pgtable); - if (IS_ERR(domain)) - return NULL; - - return domain; -} - static struct iommu_domain * amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) @@ -3059,7 +3040,6 @@ const struct iommu_ops amd_iommu_ops = { .blocked_domain = &blocked_domain, .release_domain = &release_domain, .identity_domain = &identity_domain.domain, - .domain_alloc = amd_iommu_domain_alloc, .domain_alloc_paging_flags = amd_iommu_domain_alloc_paging_flags, .domain_alloc_sva = amd_iommu_domain_alloc_sva, .probe_device = amd_iommu_probe_device, From 02bcd1a8b991c6fc29271fa02250bea1b61fb742 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:01 -0400 Subject: [PATCH 61/66] iommu/amd: Remove dev == NULL checks This is no longer possible, amd_iommu_domain_alloc_paging_flags() is never called with dev = NULL from the core code. Similarly get_amd_iommu_from_dev() can never be NULL either. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8e9a0445e86f..b56b29b3a47d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2523,13 +2523,10 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, u32 flags, int pgtable) { bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); struct protection_domain *domain; - struct amd_iommu *iommu = NULL; int ret; - if (dev) - iommu = get_amd_iommu_from_dev(dev); - /* * Since DTE[Mode]=0 is prohibited on SNP-enabled system, * default to use IOMMU_DOMAIN_DMA[_FQ]. @@ -2537,8 +2534,7 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) return ERR_PTR(-EINVAL); - domain = protection_domain_alloc(type, - dev ? dev_to_node(dev) : NUMA_NO_NODE); + domain = protection_domain_alloc(type, dev_to_node(dev)); if (!domain) return ERR_PTR(-ENOMEM); @@ -2554,13 +2550,11 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, domain->domain.geometry.force_aperture = true; domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap; - if (iommu) { - domain->domain.type = type; - domain->domain.ops = iommu->iommu.ops->default_domain_ops; + domain->domain.type = type; + domain->domain.ops = iommu->iommu.ops->default_domain_ops; - if (dirty_tracking) - domain->domain.dirty_ops = &amd_dirty_ops; - } + if (dirty_tracking) + domain->domain.dirty_ops = &amd_dirty_ops; return &domain->domain; } @@ -2571,13 +2565,10 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, { unsigned int type = IOMMU_DOMAIN_UNMANAGED; - struct amd_iommu *iommu = NULL; + struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID; - if (dev) - iommu = get_amd_iommu_from_dev(dev); - if ((flags & ~supported_flags) || user_data) return ERR_PTR(-EOPNOTSUPP); @@ -2591,10 +2582,9 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, /* Allocate domain with v1 page table for dirty tracking */ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) { - if (iommu && amd_iommu_hd_support(iommu)) { - return do_iommu_domain_alloc(type, dev, - flags, AMD_IOMMU_V1); - } + if (amd_iommu_hd_support(iommu)) + return do_iommu_domain_alloc(type, dev, flags, + AMD_IOMMU_V1); return ERR_PTR(-EOPNOTSUPP); } From 55b237dd7f7ec2ee9c7986e0fc28c5867bf63282 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:02 -0400 Subject: [PATCH 62/66] iommu/amd: Remove type argument from do_iommu_domain_alloc() and related do_iommu_domain_alloc() is only called from amd_iommu_domain_alloc_paging_flags() so type is always IOMMU_DOMAIN_UNMANAGED. Remove type and all the dead conditionals checking it. IOMMU_DOMAIN_IDENTITY checks are similarly obsolete as the conversion to the global static identity domain removed those call paths. The caller of protection_domain_alloc() should set the type, fix the miss in the SVA code. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/iommu.c | 35 ++++++++++------------------------- drivers/iommu/amd/pasid.c | 3 ++- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index cdbfa5f096dd..d6f4cf823599 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -46,7 +46,7 @@ extern unsigned long amd_iommu_pgsize_bitmap; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); -struct protection_domain *protection_domain_alloc(unsigned int type, int nid); +struct protection_domain *protection_domain_alloc(int nid); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b56b29b3a47d..649dfd22904f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2455,7 +2455,7 @@ static void protection_domain_init(struct protection_domain *domain, int nid) domain->iop.pgtbl.cfg.amd.nid = nid; } -struct protection_domain *protection_domain_alloc(unsigned int type, int nid) +struct protection_domain *protection_domain_alloc(int nid) { struct protection_domain *domain; int domid; @@ -2476,15 +2476,10 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid) return domain; } -static int pdom_setup_pgtable(struct protection_domain *domain, - unsigned int type, int pgtable) +static int pdom_setup_pgtable(struct protection_domain *domain, int pgtable) { struct io_pgtable_ops *pgtbl_ops; - /* No need to allocate io pgtable ops in passthrough mode */ - if (!(type & __IOMMU_DOMAIN_PAGING)) - return 0; - switch (pgtable) { case AMD_IOMMU_V1: domain->pd_mode = PD_MODE_V1; @@ -2518,27 +2513,19 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu) return iommu && (iommu->features & FEATURE_HDSUP); } -static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, - struct device *dev, - u32 flags, int pgtable) +static struct iommu_domain *do_iommu_domain_alloc(struct device *dev, u32 flags, + int pgtable) { bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); struct protection_domain *domain; int ret; - /* - * Since DTE[Mode]=0 is prohibited on SNP-enabled system, - * default to use IOMMU_DOMAIN_DMA[_FQ]. - */ - if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) - return ERR_PTR(-EINVAL); - - domain = protection_domain_alloc(type, dev_to_node(dev)); + domain = protection_domain_alloc(dev_to_node(dev)); if (!domain) return ERR_PTR(-ENOMEM); - ret = pdom_setup_pgtable(domain, type, pgtable); + ret = pdom_setup_pgtable(domain, pgtable); if (ret) { pdom_id_free(domain->id); kfree(domain); @@ -2550,7 +2537,7 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, domain->domain.geometry.force_aperture = true; domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap; - domain->domain.type = type; + domain->domain.type = IOMMU_DOMAIN_UNMANAGED; domain->domain.ops = iommu->iommu.ops->default_domain_ops; if (dirty_tracking) @@ -2564,7 +2551,6 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { - unsigned int type = IOMMU_DOMAIN_UNMANAGED; struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID; @@ -2577,20 +2563,19 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, if (!amd_iommu_pasid_supported()) return ERR_PTR(-EOPNOTSUPP); - return do_iommu_domain_alloc(type, dev, flags, AMD_IOMMU_V2); + return do_iommu_domain_alloc(dev, flags, AMD_IOMMU_V2); } /* Allocate domain with v1 page table for dirty tracking */ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) { if (amd_iommu_hd_support(iommu)) - return do_iommu_domain_alloc(type, dev, flags, - AMD_IOMMU_V1); + return do_iommu_domain_alloc(dev, flags, AMD_IOMMU_V1); return ERR_PTR(-EOPNOTSUPP); } /* If nothing specific is required use the kernel commandline default */ - return do_iommu_domain_alloc(type, dev, 0, amd_iommu_pgtable); + return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable); } void amd_iommu_domain_free(struct iommu_domain *dom) diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 8c73a30c2800..9101d07b11d3 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -185,12 +185,13 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev)); + pdom = protection_domain_alloc(dev_to_node(dev)); if (!pdom) return ERR_PTR(-ENOMEM); pdom->domain.ops = &amd_sva_domain_ops; pdom->mn.ops = &sva_mn; + pdom->domain.type = IOMMU_DOMAIN_SVA; ret = mmu_notifier_register(&pdom->mn, mm); if (ret) { From 13b4ec749163710e3d188d2fed7405308b1b1e73 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:03 -0400 Subject: [PATCH 63/66] iommu/amd: Change amd_iommu_pgtable to use enum protection_domain_mode Currently it uses enum io_pgtable_fmt which is from the io pagetable code and most of the enum values are invalid. protection_domain_mode is internal the driver and has the only two valid values. Fix some signatures and variables to use the right type as well. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/init.c | 14 +++++++------- drivers/iommu/amd/iommu.c | 34 +++++++++++++++++----------------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index d6f4cf823599..0fb0c8392b53 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -40,7 +40,7 @@ void amd_iommu_disable(void); int amd_iommu_reenable(int mode); int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; -extern enum io_pgtable_fmt amd_iommu_pgtable; +extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; extern unsigned long amd_iommu_pgsize_bitmap; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9db8fd1275be..1d0a82ab9c1c 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -152,7 +152,7 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; -enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; +enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -2164,7 +2164,7 @@ static void print_iommu_info(void) if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) pr_info("X2APIC enabled\n"); } - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (amd_iommu_pgtable == PD_MODE_V2) { pr_info("V2 page table enabled (Paging mode : %d level)\n", amd_iommu_gpt_level); } @@ -3082,10 +3082,10 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; - if (amd_iommu_pgtable == AMD_IOMMU_V2) { + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } } @@ -3208,7 +3208,7 @@ static void iommu_snp_enable(void) goto disable_snp; } - if (amd_iommu_pgtable != AMD_IOMMU_V1) { + if (amd_iommu_pgtable != PD_MODE_V1) { pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); goto disable_snp; } @@ -3485,9 +3485,9 @@ static int __init parse_amd_iommu_options(char *str) } else if (strncmp(str, "force_isolation", 15) == 0) { amd_iommu_force_isolation = true; } else if (strncmp(str, "pgtbl_v1", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V1; + amd_iommu_pgtable = PD_MODE_V1; } else if (strncmp(str, "pgtbl_v2", 8) == 0) { - amd_iommu_pgtable = AMD_IOMMU_V2; + amd_iommu_pgtable = PD_MODE_V2; } else if (strncmp(str, "irtcachedis", 11) == 0) { amd_iommu_irtcachedis = true; } else if (strncmp(str, "nohugepages", 11) == 0) { diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 649dfd22904f..3a12ef96e7ea 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2476,32 +2476,30 @@ struct protection_domain *protection_domain_alloc(int nid) return domain; } -static int pdom_setup_pgtable(struct protection_domain *domain, int pgtable) +static int pdom_setup_pgtable(struct protection_domain *domain) { struct io_pgtable_ops *pgtbl_ops; + enum io_pgtable_fmt fmt; - switch (pgtable) { - case AMD_IOMMU_V1: - domain->pd_mode = PD_MODE_V1; + switch (domain->pd_mode) { + case PD_MODE_V1: + fmt = AMD_IOMMU_V1; break; - case AMD_IOMMU_V2: - domain->pd_mode = PD_MODE_V2; + case PD_MODE_V2: + fmt = AMD_IOMMU_V2; break; - default: - return -EINVAL; } - pgtbl_ops = - alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain); + pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) return -ENOMEM; return 0; } -static inline u64 dma_max_address(int pgtable) +static inline u64 dma_max_address(enum protection_domain_mode pgtable) { - if (pgtable == AMD_IOMMU_V1) + if (pgtable == PD_MODE_V1) return ~0ULL; /* V2 with 4/5 level page table */ @@ -2513,8 +2511,9 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu) return iommu && (iommu->features & FEATURE_HDSUP); } -static struct iommu_domain *do_iommu_domain_alloc(struct device *dev, u32 flags, - int pgtable) +static struct iommu_domain * +do_iommu_domain_alloc(struct device *dev, u32 flags, + enum protection_domain_mode pgtable) { bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); @@ -2525,7 +2524,8 @@ static struct iommu_domain *do_iommu_domain_alloc(struct device *dev, u32 flags, if (!domain) return ERR_PTR(-ENOMEM); - ret = pdom_setup_pgtable(domain, pgtable); + domain->pd_mode = pgtable; + ret = pdom_setup_pgtable(domain); if (ret) { pdom_id_free(domain->id); kfree(domain); @@ -2563,13 +2563,13 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, if (!amd_iommu_pasid_supported()) return ERR_PTR(-EOPNOTSUPP); - return do_iommu_domain_alloc(dev, flags, AMD_IOMMU_V2); + return do_iommu_domain_alloc(dev, flags, PD_MODE_V2); } /* Allocate domain with v1 page table for dirty tracking */ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) { if (amd_iommu_hd_support(iommu)) - return do_iommu_domain_alloc(dev, flags, AMD_IOMMU_V1); + return do_iommu_domain_alloc(dev, flags, PD_MODE_V1); return ERR_PTR(-EOPNOTSUPP); } From 5a081f7f428cc68c2df8eb281bfbaedf65b749c1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:04 -0400 Subject: [PATCH 64/66] iommu/amd: Move the nid to pdom_setup_pgtable() The only thing that uses the nid is the io_pgtable code, and it should be set before calling alloc_io_pgtable_ops() to ensure that the top levels are allocated on the correct nid. Since dev is never NULL now we can just do this trivially and remove the other uses of nid. SVA and identity code paths never use it since they don't use io_pgtable. Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 +- drivers/iommu/amd/iommu.c | 22 +++++++++------------- drivers/iommu/amd/pasid.c | 2 +- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0fb0c8392b53..68debf5ee2d7 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -46,7 +46,7 @@ extern unsigned long amd_iommu_pgsize_bitmap; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); -struct protection_domain *protection_domain_alloc(int nid); +struct protection_domain *protection_domain_alloc(void); void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3a12ef96e7ea..0600f0682b91 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2177,7 +2177,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu, struct protection_domain *pdom) { struct pdom_iommu_info *pdom_iommu_info, *curr; - struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg; unsigned long flags; int ret = 0; @@ -2206,10 +2205,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu, goto out_unlock; } - /* Update NUMA Node ID */ - if (cfg->amd.nid == NUMA_NO_NODE) - cfg->amd.nid = dev_to_node(&iommu->dev->dev); - out_unlock: spin_unlock_irqrestore(&pdom->lock, flags); return ret; @@ -2446,16 +2441,15 @@ void protection_domain_free(struct protection_domain *domain) kfree(domain); } -static void protection_domain_init(struct protection_domain *domain, int nid) +static void protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); INIT_LIST_HEAD(&domain->dev_list); INIT_LIST_HEAD(&domain->dev_data_list); xa_init(&domain->iommu_array); - domain->iop.pgtbl.cfg.amd.nid = nid; } -struct protection_domain *protection_domain_alloc(int nid) +struct protection_domain *protection_domain_alloc(void) { struct protection_domain *domain; int domid; @@ -2471,12 +2465,13 @@ struct protection_domain *protection_domain_alloc(int nid) } domain->id = domid; - protection_domain_init(domain, nid); + protection_domain_init(domain); return domain; } -static int pdom_setup_pgtable(struct protection_domain *domain) +static int pdom_setup_pgtable(struct protection_domain *domain, + struct device *dev) { struct io_pgtable_ops *pgtbl_ops; enum io_pgtable_fmt fmt; @@ -2490,6 +2485,7 @@ static int pdom_setup_pgtable(struct protection_domain *domain) break; } + domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev); pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain); if (!pgtbl_ops) return -ENOMEM; @@ -2520,12 +2516,12 @@ do_iommu_domain_alloc(struct device *dev, u32 flags, struct protection_domain *domain; int ret; - domain = protection_domain_alloc(dev_to_node(dev)); + domain = protection_domain_alloc(); if (!domain) return ERR_PTR(-ENOMEM); domain->pd_mode = pgtable; - ret = pdom_setup_pgtable(domain); + ret = pdom_setup_pgtable(domain, dev); if (ret) { pdom_id_free(domain->id); kfree(domain); @@ -2624,7 +2620,7 @@ void amd_iommu_init_identity_domain(void) identity_domain.id = pdom_id_alloc(); - protection_domain_init(&identity_domain, NUMA_NO_NODE); + protection_domain_init(&identity_domain); } /* Same as blocked domain except it supports only ops->attach_dev() */ diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 9101d07b11d3..11150cfd6718 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -185,7 +185,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct protection_domain *pdom; int ret; - pdom = protection_domain_alloc(dev_to_node(dev)); + pdom = protection_domain_alloc(); if (!pdom) return ERR_PTR(-ENOMEM); From 082f1bcae8d1b5f76e92e369091176b8d61120ec Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Jan 2025 12:35:05 -0400 Subject: [PATCH 65/66] iommu/amd: Fully decode all combinations of alloc_paging_flags Currently AMD does not support IOMMU_HWPT_ALLOC_PASID | IOMMU_HWPT_ALLOC_DIRTY_TRACKING It should be rejected. Instead it creates a V1 domain without dirty tracking support. Use a switch to fully decode the flags. Fixes: ce2cd175469f ("iommu/amd: Enhance amd_iommu_domain_alloc_user()") Reviewed-by: Vasant Hegde Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v2-9776c53c2966+1c7-amd_paging_flags_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0600f0682b91..12c1245baaf9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2554,24 +2554,24 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, if ((flags & ~supported_flags) || user_data) return ERR_PTR(-EOPNOTSUPP); - /* Allocate domain with v2 page table if IOMMU supports PASID. */ - if (flags & IOMMU_HWPT_ALLOC_PASID) { + switch (flags & supported_flags) { + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: + /* Allocate domain with v1 page table for dirty tracking */ + if (!amd_iommu_hd_support(iommu)) + break; + return do_iommu_domain_alloc(dev, flags, PD_MODE_V1); + case IOMMU_HWPT_ALLOC_PASID: + /* Allocate domain with v2 page table if IOMMU supports PASID. */ if (!amd_iommu_pasid_supported()) - return ERR_PTR(-EOPNOTSUPP); - + break; return do_iommu_domain_alloc(dev, flags, PD_MODE_V2); + case 0: + /* If nothing specific is required use the kernel commandline default */ + return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable); + default: + break; } - - /* Allocate domain with v1 page table for dirty tracking */ - if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) { - if (amd_iommu_hd_support(iommu)) - return do_iommu_domain_alloc(dev, flags, PD_MODE_V1); - - return ERR_PTR(-EOPNOTSUPP); - } - - /* If nothing specific is required use the kernel commandline default */ - return do_iommu_domain_alloc(dev, 0, amd_iommu_pgtable); + return ERR_PTR(-EOPNOTSUPP); } void amd_iommu_domain_free(struct iommu_domain *dom) From 54e7d90089b8100f120d7c2a5f3daee96eb6e203 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Jan 2025 20:26:42 +0100 Subject: [PATCH 66/66] iommu: Use str_enable_disable-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Acked-by: Heiko Stuebner Reviewed-by: Jason Gunthorpe Acked-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250114192642.912331-1-krzysztof.kozlowski@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 ++- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 ++- drivers/iommu/mtk_iommu.c | 9 +++++---- drivers/iommu/mtk_iommu_v1.c | 3 ++- drivers/iommu/rockchip-iommu.c | 3 ++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 4de1a55747e7..a5e9646efc8a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -4239,7 +4240,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) */ if (!!(reg & IDR0_COHACC) != coherent) dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", - coherent ? "true" : "false"); + str_true_false(coherent)); switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { case IDR0_STALL_MODEL_FORCE: diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 650664e0f6e3..4a42cfd75097 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -2117,7 +2118,7 @@ static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu) } dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt, - cnt == 1 ? "" : "s"); + str_plural(cnt)); iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index ab60901f8f92..034b0e670384 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -510,7 +511,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) bank->parent_dev, "fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n", int_state, fault_iova, fault_pa, regval, fault_larb, fault_port, - layer, write ? "write" : "read"); + layer, str_write_read(write)); } /* Interrupt clear */ @@ -602,7 +603,7 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, larb_mmu->bank[portid] = upper_32_bits(region->iova_base); dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n", - enable ? "enable" : "disable", dev_name(larb_mmu->dev), + str_enable_disable(enable), dev_name(larb_mmu->dev), portid_msk, regionid, upper_32_bits(region->iova_base)); if (enable) @@ -630,8 +631,8 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, } if (ret) dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n", - enable ? "enable" : "disable", - dev_name(data->dev), portid_msk, ret); + str_enable_disable(enable), dev_name(data->dev), + portid_msk, ret); } return ret; } diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index b6de1ca00cef..a565b9e40f4a 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -243,7 +244,7 @@ static void mtk_iommu_v1_config(struct mtk_iommu_v1_data *data, larb_mmu = &data->larb_imu[larbid]; dev_dbg(dev, "%s iommu port: %d\n", - enable ? "enable" : "disable", portid); + str_enable_disable(enable), portid); if (enable) larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4b369419b32c..323cc665c357 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "iommu-pages.h" @@ -611,7 +612,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id) dev_err(iommu->dev, "Page fault at %pad of type %s\n", &iova, - (flags == IOMMU_FAULT_WRITE) ? "write" : "read"); + str_write_read(flags == IOMMU_FAULT_WRITE)); log_iova(iommu, i, iova);