mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 01:53:29 +02:00
iommu/arm-smmu-v3: Perform per-domain invalidations using arm_smmu_invs
Replace the old invalidation functions with arm_smmu_domain_inv_range() in all the existing invalidation routines. And deprecate the old functions. The new arm_smmu_domain_inv_range() handles the CMDQ_MAX_TLBI_OPS as well, so drop it in the SVA function. Since arm_smmu_cmdq_batch_add_range() has only one caller now, and it must be given a valid size, add a WARN_ON_ONCE to catch any missed case. Also update the comments in arm_smmu_tlb_inv_context() to clarify things with the new invalidation functions. Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
parent
587bb3e56a
commit
4202fddd01
|
|
@ -122,15 +122,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
|
|||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
|
||||
|
||||
/*
|
||||
* Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
|
||||
* is used as a threshold to replace per-page TLBI commands to issue in the
|
||||
* command queue with an address-space TLBI command, when SMMU w/o a range
|
||||
* invalidation feature handles too many per-page TLBI commands, which will
|
||||
* otherwise result in a soft lockup.
|
||||
*/
|
||||
#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3))
|
||||
|
||||
static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
|
|
@ -146,21 +137,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
|
|||
* range. So do a simple translation here by calculating size correctly.
|
||||
*/
|
||||
size = end - start;
|
||||
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
|
||||
if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
|
||||
size = 0;
|
||||
} else {
|
||||
if (size == ULONG_MAX)
|
||||
size = 0;
|
||||
}
|
||||
|
||||
if (!size)
|
||||
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
|
||||
else
|
||||
arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid,
|
||||
PAGE_SIZE, false, smmu_domain);
|
||||
|
||||
arm_smmu_atc_inv_domain(smmu_domain, start, size);
|
||||
arm_smmu_domain_inv_range(smmu_domain, start, size, PAGE_SIZE, false);
|
||||
}
|
||||
|
||||
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
||||
|
|
@ -191,8 +169,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
|
|||
}
|
||||
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
|
||||
|
||||
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
|
||||
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
|
||||
arm_smmu_domain_inv(smmu_domain);
|
||||
}
|
||||
|
||||
static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
|
||||
|
|
@ -302,7 +279,7 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
|
|||
/*
|
||||
* Ensure the ASID is empty in the iommu cache before allowing reuse.
|
||||
*/
|
||||
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
|
||||
arm_smmu_domain_inv(smmu_domain);
|
||||
|
||||
/*
|
||||
* Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can
|
||||
|
|
|
|||
|
|
@ -1289,16 +1289,6 @@ struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs)
|
|||
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_invs_purge);
|
||||
|
||||
/* Context descriptor manipulation functions */
|
||||
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
|
||||
{
|
||||
struct arm_smmu_cmdq_ent cmd = {
|
||||
.opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
|
||||
CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
|
||||
.tlbi.asid = asid,
|
||||
};
|
||||
|
||||
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Based on the value of ent report which bits of the STE the HW will access. It
|
||||
|
|
@ -2509,90 +2499,27 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
|
|||
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
|
||||
}
|
||||
|
||||
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
struct arm_smmu_master_domain *master_domain;
|
||||
int i;
|
||||
unsigned long flags;
|
||||
struct arm_smmu_cmdq_ent cmd = {
|
||||
.opcode = CMDQ_OP_ATC_INV,
|
||||
};
|
||||
struct arm_smmu_cmdq_batch cmds;
|
||||
|
||||
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Ensure that we've completed prior invalidation of the main TLBs
|
||||
* before we read 'nr_ats_masters' in case of a concurrent call to
|
||||
* arm_smmu_enable_ats():
|
||||
*
|
||||
* // unmap() // arm_smmu_enable_ats()
|
||||
* TLBI+SYNC atomic_inc(&nr_ats_masters);
|
||||
* smp_mb(); [...]
|
||||
* atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
|
||||
*
|
||||
* Ensures that we always see the incremented 'nr_ats_masters' count if
|
||||
* ATS was enabled at the PCI device before completion of the TLBI.
|
||||
*/
|
||||
smp_mb();
|
||||
if (!atomic_read(&smmu_domain->nr_ats_masters))
|
||||
return 0;
|
||||
|
||||
arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
|
||||
|
||||
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
|
||||
list_for_each_entry(master_domain, &smmu_domain->devices,
|
||||
devices_elm) {
|
||||
struct arm_smmu_master *master = master_domain->master;
|
||||
|
||||
if (!master->ats_enabled)
|
||||
continue;
|
||||
|
||||
if (master_domain->nested_ats_flush) {
|
||||
/*
|
||||
* If a S2 used as a nesting parent is changed we have
|
||||
* no option but to completely flush the ATC.
|
||||
*/
|
||||
arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
|
||||
} else {
|
||||
arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
|
||||
&cmd);
|
||||
}
|
||||
|
||||
for (i = 0; i < master->num_streams; i++) {
|
||||
cmd.atc.sid = master->streams[i].id;
|
||||
arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
|
||||
|
||||
return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
|
||||
}
|
||||
|
||||
/* IO_PGTABLE API */
|
||||
static void arm_smmu_tlb_inv_context(void *cookie)
|
||||
{
|
||||
struct arm_smmu_domain *smmu_domain = cookie;
|
||||
struct arm_smmu_device *smmu = smmu_domain->smmu;
|
||||
struct arm_smmu_cmdq_ent cmd;
|
||||
|
||||
/*
|
||||
* NOTE: when io-pgtable is in non-strict mode, we may get here with
|
||||
* PTEs previously cleared by unmaps on the current CPU not yet visible
|
||||
* to the SMMU. We are relying on the dma_wmb() implicit during cmd
|
||||
* insertion to guarantee those are observed before the TLBI. Do be
|
||||
* careful, 007.
|
||||
* If the DMA API is running in non-strict mode then another CPU could
|
||||
* have changed the page table and not invoked any flush op. Instead the
|
||||
* other CPU will do an atomic_read() and this CPU will have done an
|
||||
* atomic_write(). That handshake is enough to acquire the page table
|
||||
* writes from the other CPU.
|
||||
*
|
||||
* All command execution has a dma_wmb() to release all the in-memory
|
||||
* structures written by this CPU, that barrier must also release the
|
||||
* writes acquired from all the other CPUs too.
|
||||
*
|
||||
* There are other barriers and atomics on this path, but the above is
|
||||
* the essential mechanism for ensuring that HW sees the page table
|
||||
* writes from another CPU before it executes the IOTLB invalidation.
|
||||
*/
|
||||
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
|
||||
arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
|
||||
} else {
|
||||
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
|
||||
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
|
||||
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
|
||||
}
|
||||
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
|
||||
arm_smmu_domain_inv(smmu_domain);
|
||||
}
|
||||
|
||||
static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
|
||||
|
|
@ -2604,7 +2531,7 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
|
|||
unsigned long end = iova + size, num_pages = 0, tg = pgsize;
|
||||
size_t inv_range = granule;
|
||||
|
||||
if (!size)
|
||||
if (WARN_ON_ONCE(!size))
|
||||
return;
|
||||
|
||||
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
|
||||
|
|
@ -2659,76 +2586,6 @@ static void arm_smmu_cmdq_batch_add_range(struct arm_smmu_device *smmu,
|
|||
}
|
||||
}
|
||||
|
||||
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
|
||||
unsigned long iova, size_t size,
|
||||
size_t granule,
|
||||
struct arm_smmu_domain *smmu_domain)
|
||||
{
|
||||
struct arm_smmu_device *smmu = smmu_domain->smmu;
|
||||
struct arm_smmu_cmdq_batch cmds;
|
||||
size_t pgsize;
|
||||
|
||||
/* Get the leaf page size */
|
||||
pgsize = __ffs(smmu_domain->domain.pgsize_bitmap);
|
||||
|
||||
arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
|
||||
arm_smmu_cmdq_batch_add_range(smmu, &cmds, cmd, iova, size, granule,
|
||||
pgsize);
|
||||
arm_smmu_cmdq_batch_submit(smmu, &cmds);
|
||||
}
|
||||
|
||||
static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
|
||||
size_t granule, bool leaf,
|
||||
struct arm_smmu_domain *smmu_domain)
|
||||
{
|
||||
struct arm_smmu_cmdq_ent cmd = {
|
||||
.tlbi = {
|
||||
.leaf = leaf,
|
||||
},
|
||||
};
|
||||
|
||||
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
|
||||
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
|
||||
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
|
||||
cmd.tlbi.asid = smmu_domain->cd.asid;
|
||||
} else {
|
||||
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
|
||||
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
|
||||
}
|
||||
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
|
||||
|
||||
if (smmu_domain->nest_parent) {
|
||||
/*
|
||||
* When the S2 domain changes all the nested S1 ASIDs have to be
|
||||
* flushed too.
|
||||
*/
|
||||
cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
|
||||
arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unfortunately, this can't be leaf-only since we may have
|
||||
* zapped an entire table.
|
||||
*/
|
||||
arm_smmu_atc_inv_domain(smmu_domain, iova, size);
|
||||
}
|
||||
|
||||
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
|
||||
size_t granule, bool leaf,
|
||||
struct arm_smmu_domain *smmu_domain)
|
||||
{
|
||||
struct arm_smmu_cmdq_ent cmd = {
|
||||
.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
|
||||
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
|
||||
.tlbi = {
|
||||
.asid = asid,
|
||||
.leaf = leaf,
|
||||
},
|
||||
};
|
||||
|
||||
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
|
||||
}
|
||||
|
||||
static bool arm_smmu_inv_size_too_big(struct arm_smmu_device *smmu, size_t size,
|
||||
size_t granule)
|
||||
{
|
||||
|
|
@ -2930,7 +2787,9 @@ static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
|
|||
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
|
||||
size_t granule, void *cookie)
|
||||
{
|
||||
arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
|
||||
struct arm_smmu_domain *smmu_domain = cookie;
|
||||
|
||||
arm_smmu_domain_inv_range(smmu_domain, iova, size, granule, false);
|
||||
}
|
||||
|
||||
static const struct iommu_flush_ops arm_smmu_flush_ops = {
|
||||
|
|
@ -4201,9 +4060,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
|
|||
if (!gather->pgsize)
|
||||
return;
|
||||
|
||||
arm_smmu_tlb_inv_range_domain(gather->start,
|
||||
gather->end - gather->start + 1,
|
||||
gather->pgsize, true, smmu_domain);
|
||||
arm_smmu_domain_inv_range(smmu_domain, gather->start,
|
||||
gather->end - gather->start + 1,
|
||||
gather->pgsize, true);
|
||||
}
|
||||
|
||||
static phys_addr_t
|
||||
|
|
|
|||
|
|
@ -1080,13 +1080,6 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
|
|||
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
|
||||
struct arm_smmu_cd *cd, struct iommu_domain *old);
|
||||
|
||||
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
|
||||
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
|
||||
size_t granule, bool leaf,
|
||||
struct arm_smmu_domain *smmu_domain);
|
||||
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
|
||||
unsigned long iova, size_t size);
|
||||
|
||||
void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
|
||||
unsigned long iova, size_t size,
|
||||
unsigned int granule, bool leaf);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user