diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1d0f96813864..19e47c614ba7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2595,23 +2595,19 @@ static void arm_smmu_tlb_inv_context(void *cookie) arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } -static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, - unsigned long iova, size_t size, - size_t granule, - struct arm_smmu_domain *smmu_domain) +static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *cmd, + unsigned long iova, size_t size, + size_t granule, size_t pgsize) { - struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long end = iova + size, num_pages = 0, tg = 0; + unsigned long end = iova + size, num_pages = 0, tg = pgsize; size_t inv_range = granule; - struct arm_smmu_cmdq_batch cmds; if (!size) return; if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { - /* Get the leaf page size */ - tg = __ffs(smmu_domain->domain.pgsize_bitmap); - num_pages = size >> tg; /* Convert page size of 12,14,16 (log2) to 1,2,3 */ @@ -2631,8 +2627,6 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); - while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* @@ -2660,9 +2654,26 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, } cmd->tlbi.addr = iova; - arm_smmu_cmdq_batch_add(smmu, &cmds, cmd); + arm_smmu_cmdq_batch_add(smmu, cmds, cmd); iova += inv_range; } +} + +static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, + unsigned long iova, size_t size, + size_t granule, + struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cmdq_batch cmds; + size_t pgsize; + + /* Get the leaf page size */ + pgsize = __ffs(smmu_domain->domain.pgsize_bitmap); + + arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); + arm_smmu_cmdq_batch_add_range(smmu, &cmds, cmd, iova, size, granule, + pgsize); arm_smmu_cmdq_batch_submit(smmu, &cmds); } @@ -2718,6 +2729,194 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain); } +static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size, + size_t granule) +{ + size_t max_tlbi_ops; + + /* 0 size means invalidate all */ + if (!size || size == SIZE_MAX) + return true; + + if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) + return false; + + /* + * Borrowed from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, + * this is used as a threshold to replace "size_opcode" commands with a + * single "nsize_opcode" command, when SMMU doesn't implement the range + * invalidation feature, where there can be too many per-granule TLBIs, + * resulting in a soft lockup. + */ + max_tlbi_ops = 1 << (ilog2(granule) - 3); + return size >= max_tlbi_ops * granule; +} + +/* Used by non INV_TYPE_ATS* invalidations */ +static void arm_smmu_inv_to_cmdq_batch(struct arm_smmu_inv *inv, + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *cmd, + unsigned long iova, size_t size, + unsigned int granule) +{ + if (arm_smmu_inv_size_too_big(inv->smmu, size, granule)) { + cmd->opcode = inv->nsize_opcode; + arm_smmu_cmdq_batch_add(inv->smmu, cmds, cmd); + return; + } + + cmd->opcode = inv->size_opcode; + arm_smmu_cmdq_batch_add_range(inv->smmu, cmds, cmd, iova, size, granule, + inv->pgsize); +} + +static inline bool arm_smmu_invs_end_batch(struct arm_smmu_inv *cur, + struct arm_smmu_inv *next) +{ + /* Changing smmu means changing command queue */ + if (cur->smmu != next->smmu) + return true; + /* The batch for S2 TLBI must be done before nested S1 ASIDs */ + if (cur->type != INV_TYPE_S2_VMID_S1_CLEAR && + next->type == INV_TYPE_S2_VMID_S1_CLEAR) + return true; + /* ATS must be after a sync of the S1/S2 invalidations */ + if (!arm_smmu_inv_is_ats(cur) && arm_smmu_inv_is_ats(next)) + return true; + return false; +} + +static void __arm_smmu_domain_inv_range(struct arm_smmu_invs *invs, + unsigned long iova, size_t size, + unsigned int granule, bool leaf) +{ + struct arm_smmu_cmdq_batch cmds = {}; + struct arm_smmu_inv *cur; + struct arm_smmu_inv *end; + + cur = invs->inv; + end = cur + READ_ONCE(invs->num_invs); + /* Skip any leading entry marked as a trash */ + for (; cur != end; cur++) + if (READ_ONCE(cur->users)) + break; + while (cur != end) { + struct arm_smmu_device *smmu = cur->smmu; + struct arm_smmu_cmdq_ent cmd = { + /* + * Pick size_opcode to run arm_smmu_get_cmdq(). This can + * be changed to nsize_opcode, which would result in the + * same CMDQ pointer. + */ + .opcode = cur->size_opcode, + }; + struct arm_smmu_inv *next; + + if (!cmds.num) + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); + + switch (cur->type) { + case INV_TYPE_S1_ASID: + cmd.tlbi.asid = cur->id; + cmd.tlbi.leaf = leaf; + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size, + granule); + break; + case INV_TYPE_S2_VMID: + cmd.tlbi.vmid = cur->id; + cmd.tlbi.leaf = leaf; + arm_smmu_inv_to_cmdq_batch(cur, &cmds, &cmd, iova, size, + granule); + break; + case INV_TYPE_S2_VMID_S1_CLEAR: + /* CMDQ_OP_TLBI_S12_VMALL already flushed S1 entries */ + if (arm_smmu_inv_size_too_big(cur->smmu, size, granule)) + continue; + cmd.tlbi.vmid = cur->id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + break; + case INV_TYPE_ATS: + arm_smmu_atc_inv_to_cmd(cur->ssid, iova, size, &cmd); + cmd.atc.sid = cur->id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + break; + case INV_TYPE_ATS_FULL: + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + cmd.atc.sid = cur->id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + break; + default: + WARN_ON_ONCE(1); + continue; + } + + /* Skip any trash entry in-between */ + for (next = cur + 1; next != end; next++) + if (READ_ONCE(next->users)) + break; + + if (cmds.num && + (next == end || arm_smmu_invs_end_batch(cur, next))) { + arm_smmu_cmdq_batch_submit(smmu, &cmds); + cmds.num = 0; + } + cur = next; + } +} + +void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size, + unsigned int granule, bool leaf) +{ + struct arm_smmu_invs *invs; + + /* + * An invalidation request must follow some IOPTE change and then load + * an invalidation array. In the meantime, a domain attachment mutates + * the array and then stores an STE/CD asking SMMU HW to acquire those + * changed IOPTEs. + * + * When running alone, a domain attachment relies on the dma_wmb() in + * arm_smmu_write_entry() used by arm_smmu_install_ste_for_dev(). + * + * But in a race, these two can be interdependent, making it a special + * case requiring an additional smp_mb() for the write->read ordering. + * Pairing with the dma_wmb() in arm_smmu_install_ste_for_dev(), this + * makes sure that IOPTE update prior to this point is visable to SMMU + * hardware before we load the updated invalidation array. + * + * [CPU0] | [CPU1] + * change IOPTE on new domain: | + * arm_smmu_domain_inv_range() { | arm_smmu_install_new_domain_invs() + * smp_mb(); // ensures IOPTE | arm_smmu_install_ste_for_dev { + * // seen by SMMU | dma_wmb(); // ensures invs update + * // load the updated invs | // before updating STE + * invs = rcu_dereference(); | STE = TTB0; + * ... | ... + * } | } + */ + smp_mb(); + + rcu_read_lock(); + invs = rcu_dereference(smmu_domain->invs); + + /* + * Avoid locking unless ATS is being used. No ATC invalidation can be + * going on after a domain is detached. + */ + if (invs->has_ats) { + unsigned long flags; + + read_lock_irqsave(&invs->rwlock, flags); + __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf); + read_unlock_irqrestore(&invs->rwlock, flags); + } else { + __arm_smmu_domain_inv_range(invs, iova, size, granule, leaf); + } + + rcu_read_unlock(); +} + static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, unsigned long iova, size_t granule, void *cookie) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 83d7e4952dff..534e9a5ddca3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -1087,6 +1087,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size); +void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size, + unsigned int granule, bool leaf); + +static inline void arm_smmu_domain_inv(struct arm_smmu_domain *smmu_domain) +{ + arm_smmu_domain_inv_range(smmu_domain, 0, 0, 0, false); +} + void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq); int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,