Merge branches 'fixes', 'arm/smmu/updates', 'arm/smmu/bindings', 'riscv', 'intel/vt-d', 'amd/amd-vi' and 'core' into next

This commit is contained in:
Will Deacon 2026-04-09 13:18:27 +01:00
43 changed files with 2167 additions and 799 deletions

View File

@ -207,8 +207,14 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-600 | #1076982,1209401| N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-700 | #2268618,2812531| N/A |
| ARM | MMU-700 | #2133013, | N/A |
| | | #2268618, | |
| | | #2812531, | |
| | | #3777127 | |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU L1 | #3878312 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU S3 | #3995052 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
+----------------+-----------------+-----------------+-----------------------------+

View File

@ -35,6 +35,7 @@ properties:
- description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
items:
- enum:
- qcom,eliza-smmu-500
- qcom,glymur-smmu-500
- qcom,kaanapali-smmu-500
- qcom,milos-smmu-500
@ -92,6 +93,7 @@ properties:
items:
- enum:
- qcom,glymur-smmu-500
- qcom,hawi-smmu-500
- qcom,kaanapali-smmu-500
- qcom,milos-smmu-500
- qcom,qcm2290-smmu-500

View File

@ -26,22 +26,19 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
{
struct seq_file *m = filp->private_data;
struct amd_iommu *iommu = m->private;
int ret;
iommu->dbg_mmio_offset = -1;
int ret, dbg_mmio_offset = iommu->dbg_mmio_offset = -1;
if (cnt > OFS_IN_SZ)
return -EINVAL;
ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset);
ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_mmio_offset);
if (ret)
return ret;
if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) {
iommu->dbg_mmio_offset = -1;
return -EINVAL;
}
if (dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64))
return -EINVAL;
iommu->dbg_mmio_offset = dbg_mmio_offset;
return cnt;
}
@ -49,14 +46,16 @@ static int iommu_mmio_show(struct seq_file *m, void *unused)
{
struct amd_iommu *iommu = m->private;
u64 value;
int dbg_mmio_offset = iommu->dbg_mmio_offset;
if (iommu->dbg_mmio_offset < 0) {
if (dbg_mmio_offset < 0 || dbg_mmio_offset >
iommu->mmio_phys_end - sizeof(u64)) {
seq_puts(m, "Please provide mmio register's offset\n");
return 0;
}
value = readq(iommu->mmio_base + iommu->dbg_mmio_offset);
seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value);
value = readq(iommu->mmio_base + dbg_mmio_offset);
seq_printf(m, "Offset:0x%x Value:0x%016llx\n", dbg_mmio_offset, value);
return 0;
}
@ -67,23 +66,20 @@ static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf
{
struct seq_file *m = filp->private_data;
struct amd_iommu *iommu = m->private;
int ret;
iommu->dbg_cap_offset = -1;
int ret, dbg_cap_offset = iommu->dbg_cap_offset = -1;
if (cnt > OFS_IN_SZ)
return -EINVAL;
ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset);
ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_cap_offset);
if (ret)
return ret;
/* Capability register at offset 0x14 is the last IOMMU capability register. */
if (iommu->dbg_cap_offset > 0x14) {
iommu->dbg_cap_offset = -1;
if (dbg_cap_offset > 0x14)
return -EINVAL;
}
iommu->dbg_cap_offset = dbg_cap_offset;
return cnt;
}
@ -91,21 +87,21 @@ static int iommu_capability_show(struct seq_file *m, void *unused)
{
struct amd_iommu *iommu = m->private;
u32 value;
int err;
int err, dbg_cap_offset = iommu->dbg_cap_offset;
if (iommu->dbg_cap_offset < 0) {
if (dbg_cap_offset < 0 || dbg_cap_offset > 0x14) {
seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n");
return 0;
}
err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value);
err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + dbg_cap_offset, &value);
if (err) {
seq_printf(m, "Not able to read capability register at 0x%x\n",
iommu->dbg_cap_offset);
dbg_cap_offset);
return 0;
}
seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value);
seq_printf(m, "Offset:0x%x Value:0x%08x\n", dbg_cap_offset, value);
return 0;
}
@ -197,10 +193,11 @@ static ssize_t devid_write(struct file *filp, const char __user *ubuf,
static int devid_show(struct seq_file *m, void *unused)
{
u16 devid;
int sbdf_shadow = sbdf;
if (sbdf >= 0) {
devid = PCI_SBDF_TO_DEVID(sbdf);
seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf),
if (sbdf_shadow >= 0) {
devid = PCI_SBDF_TO_DEVID(sbdf_shadow);
seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf_shadow),
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid));
} else
seq_puts(m, "No or Invalid input provided\n");
@ -237,13 +234,14 @@ static int iommu_devtbl_show(struct seq_file *m, void *unused)
{
struct amd_iommu_pci_seg *pci_seg;
u16 seg, devid;
int sbdf_shadow = sbdf;
if (sbdf < 0) {
if (sbdf_shadow < 0) {
seq_puts(m, "Enter a valid device ID to 'devid' file\n");
return 0;
}
seg = PCI_SBDF_TO_SEGID(sbdf);
devid = PCI_SBDF_TO_DEVID(sbdf);
seg = PCI_SBDF_TO_SEGID(sbdf_shadow);
devid = PCI_SBDF_TO_DEVID(sbdf_shadow);
for_each_pci_segment(pci_seg) {
if (pci_seg->id != seg)
@ -336,19 +334,20 @@ static int iommu_irqtbl_show(struct seq_file *m, void *unused)
{
struct amd_iommu_pci_seg *pci_seg;
u16 devid, seg;
int sbdf_shadow = sbdf;
if (!irq_remapping_enabled) {
seq_puts(m, "Interrupt remapping is disabled\n");
return 0;
}
if (sbdf < 0) {
if (sbdf_shadow < 0) {
seq_puts(m, "Enter a valid device ID to 'devid' file\n");
return 0;
}
seg = PCI_SBDF_TO_SEGID(sbdf);
devid = PCI_SBDF_TO_DEVID(sbdf);
seg = PCI_SBDF_TO_SEGID(sbdf_shadow);
devid = PCI_SBDF_TO_DEVID(sbdf_shadow);
for_each_pci_segment(pci_seg) {
if (pci_seg->id != seg)

View File

@ -848,10 +848,11 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
size_t size)
{
int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
void *buf;
size = PAGE_ALIGN(size);
buf = iommu_alloc_pages_sz(gfp, size);
buf = iommu_alloc_pages_node_sz(nid, gfp, size);
if (!buf)
return NULL;
if (check_feature(FEATURE_SNP) &&
@ -954,14 +955,16 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
static int iommu_init_ga_log(struct amd_iommu *iommu)
{
int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
return 0;
iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE);
iommu->ga_log = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, GA_LOG_SIZE);
if (!iommu->ga_log)
goto err_out;
iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8);
iommu->ga_log_tail = iommu_alloc_pages_node_sz(nid, GFP_KERNEL, 8);
if (!iommu->ga_log_tail)
goto err_out;

View File

@ -403,11 +403,12 @@ struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
return NULL;
}
static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
static int clone_alias(struct pci_dev *pdev_origin, u16 alias, void *data)
{
struct dev_table_entry new;
struct amd_iommu *iommu;
struct iommu_dev_data *dev_data, *alias_data;
struct pci_dev *pdev = data;
u16 devid = pci_dev_id(pdev);
int ret = 0;
@ -454,9 +455,9 @@ static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
* part of the PCI DMA aliases if it's bus differs
* from the original device.
*/
clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL);
clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], pdev);
pci_for_each_dma_alias(pdev, clone_alias, NULL);
pci_for_each_dma_alias(pdev, clone_alias, pdev);
}
static void setup_aliases(struct amd_iommu *iommu, struct device *dev)
@ -2991,13 +2992,17 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return amdr_ivrs_remap_support;
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return true;
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_DIRTY_TRACKING: {
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
return amd_iommu_hd_support(iommu);
}
case IOMMU_CAP_PCI_ATS_SUPPORTED: {
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
return amd_iommu_iotlb_sup &&
(dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP);
}
default:
break;
}
@ -3179,26 +3184,44 @@ const struct iommu_ops amd_iommu_ops = {
static struct irq_chip amd_ir_chip;
static DEFINE_SPINLOCK(iommu_table_lock);
static int iommu_flush_dev_irt(struct pci_dev *unused, u16 devid, void *data)
{
int ret;
struct iommu_cmd cmd;
struct amd_iommu *iommu = data;
build_inv_irt(&cmd, devid);
ret = __iommu_queue_command_sync(iommu, &cmd, true);
return ret;
}
static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
{
int ret;
u64 data;
unsigned long flags;
struct iommu_cmd cmd, cmd2;
struct iommu_cmd cmd;
struct pci_dev *pdev = NULL;
struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
if (iommu->irtcachedis_enabled)
return;
build_inv_irt(&cmd, devid);
if (dev_data && dev_data->dev && dev_is_pci(dev_data->dev))
pdev = to_pci_dev(dev_data->dev);
raw_spin_lock_irqsave(&iommu->lock, flags);
data = get_cmdsem_val(iommu);
build_completion_wait(&cmd2, iommu, data);
build_completion_wait(&cmd, iommu, data);
ret = __iommu_queue_command_sync(iommu, &cmd, true);
if (pdev)
ret = pci_for_each_dma_alias(pdev, iommu_flush_dev_irt, iommu);
else
ret = iommu_flush_dev_irt(NULL, devid, iommu);
if (ret)
goto out_err;
ret = __iommu_queue_command_sync(iommu, &cmd2, false);
ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
goto out_err;
raw_spin_unlock_irqrestore(&iommu->lock, flags);

View File

@ -122,15 +122,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
/*
* Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this
* is used as a threshold to replace per-page TLBI commands to issue in the
* command queue with an address-space TLBI command, when SMMU w/o a range
* invalidation feature handles too many per-page TLBI commands, which will
* otherwise result in a soft lockup.
*/
#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3))
static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
@ -146,21 +137,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
* range. So do a simple translation here by calculating size correctly.
*/
size = end - start;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) {
if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE)
size = 0;
} else {
if (size == ULONG_MAX)
size = 0;
}
if (!size)
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
else
arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid,
PAGE_SIZE, false, smmu_domain);
arm_smmu_atc_inv_domain(smmu_domain, start, size);
arm_smmu_domain_inv_range(smmu_domain, start, size, PAGE_SIZE, false);
}
static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@ -191,13 +169,13 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
arm_smmu_domain_inv(smmu_domain);
}
static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
{
kfree(container_of(mn, struct arm_smmu_domain, mmu_notifier));
arm_smmu_domain_free(
container_of(mn, struct arm_smmu_domain, mmu_notifier));
}
static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = {
@ -301,7 +279,7 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
/*
* Ensure the ASID is empty in the iommu cache before allowing reuse.
*/
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid);
arm_smmu_domain_inv(smmu_domain);
/*
* Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can
@ -346,6 +324,7 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
* ARM_SMMU_FEAT_RANGE_INV is present
*/
smmu_domain->domain.pgsize_bitmap = PAGE_SIZE;
smmu_domain->stage = ARM_SMMU_DOMAIN_SVA;
smmu_domain->smmu = smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
@ -364,6 +343,6 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
err_asid:
xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
err_free:
kfree(smmu_domain);
arm_smmu_domain_free(smmu_domain);
return ERR_PTR(ret);
}

View File

@ -637,6 +637,140 @@ static void arm_smmu_v3_write_cd_test_sva_release(struct kunit *test)
NUM_EXPECTED_SYNCS(2));
}
static void arm_smmu_v3_invs_test_verify(struct kunit *test,
struct arm_smmu_invs *invs,
int num_invs, const int num_trashes,
const int *ids, const int *users,
const int *ssids)
{
KUNIT_EXPECT_EQ(test, invs->num_invs, num_invs);
KUNIT_EXPECT_EQ(test, invs->num_trashes, num_trashes);
while (num_invs--) {
KUNIT_EXPECT_EQ(test, invs->inv[num_invs].id, ids[num_invs]);
KUNIT_EXPECT_EQ(test, READ_ONCE(invs->inv[num_invs].users),
users[num_invs]);
KUNIT_EXPECT_EQ(test, invs->inv[num_invs].ssid, ssids[num_invs]);
}
}
static struct arm_smmu_invs invs1 = {
.num_invs = 3,
.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, },
{ .type = INV_TYPE_S2_VMID_S1_CLEAR, .id = 1, },
{ .type = INV_TYPE_ATS, .id = 3, }, },
};
static struct arm_smmu_invs invs2 = {
.num_invs = 3,
.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
{ .type = INV_TYPE_ATS, .id = 4, },
{ .type = INV_TYPE_ATS, .id = 5, }, },
};
static struct arm_smmu_invs invs3 = {
.num_invs = 3,
.inv = { { .type = INV_TYPE_S2_VMID, .id = 1, }, /* duplicated */
{ .type = INV_TYPE_ATS, .id = 5, }, /* recover a trash */
{ .type = INV_TYPE_ATS, .id = 6, }, },
};
static struct arm_smmu_invs invs4 = {
.num_invs = 3,
.inv = { { .type = INV_TYPE_ATS, .id = 10, .ssid = 1 },
{ .type = INV_TYPE_ATS, .id = 10, .ssid = 3 },
{ .type = INV_TYPE_ATS, .id = 12, .ssid = 1 }, },
};
static struct arm_smmu_invs invs5 = {
.num_invs = 3,
.inv = { { .type = INV_TYPE_ATS, .id = 10, .ssid = 2 },
{ .type = INV_TYPE_ATS, .id = 10, .ssid = 3 }, /* duplicate */
{ .type = INV_TYPE_ATS, .id = 12, .ssid = 2 }, },
};
static void arm_smmu_v3_invs_test(struct kunit *test)
{
const int results1[3][3] = { { 1, 1, 3, }, { 1, 1, 1, }, { 0, 0, 0, } };
const int results2[3][5] = { { 1, 1, 3, 4, 5, }, { 2, 1, 1, 1, 1, }, { 0, 0, 0, 0, 0, } };
const int results3[3][3] = { { 1, 1, 3, }, { 1, 1, 1, }, { 0, 0, 0, } };
const int results4[3][5] = { { 1, 1, 3, 5, 6, }, { 2, 1, 1, 1, 1, }, { 0, 0, 0, 0, 0, } };
const int results5[3][5] = { { 1, 1, 3, 5, 6, }, { 1, 0, 0, 1, 1, }, { 0, 0, 0, 0, 0, } };
const int results6[3][3] = { { 1, 5, 6, }, { 1, 1, 1, }, { 0, 0, 0, } };
const int results7[3][3] = { { 10, 10, 12, }, { 1, 1, 1, }, { 1, 3, 1, } };
const int results8[3][5] = { { 10, 10, 10, 12, 12, }, { 1, 1, 2, 1, 1, }, { 1, 2, 3, 1, 2, } };
const int results9[3][4] = { { 10, 10, 10, 12, }, { 1, 0, 1, 1, }, { 1, 2, 3, 1, } };
const int results10[3][3] = { { 10, 10, 12, }, { 1, 1, 1, }, { 1, 3, 1, } };
struct arm_smmu_invs *test_a, *test_b;
/* New array */
test_a = arm_smmu_invs_alloc(0);
KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
/* Test1: merge invs1 (new array) */
test_b = arm_smmu_invs_merge(test_a, &invs1);
kfree(test_a);
arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results1[0]), 0,
results1[0], results1[1], results1[2]);
/* Test2: merge invs2 (new array) */
test_a = arm_smmu_invs_merge(test_b, &invs2);
kfree(test_b);
arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results2[0]), 0,
results2[0], results2[1], results2[2]);
/* Test3: unref invs2 (same array) */
arm_smmu_invs_unref(test_a, &invs2);
arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results3[0]), 0,
results3[0], results3[1], results3[2]);
/* Test4: merge invs3 (new array) */
test_b = arm_smmu_invs_merge(test_a, &invs3);
kfree(test_a);
arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results4[0]), 0,
results4[0], results4[1], results4[2]);
/* Test5: unref invs1 (same array) */
arm_smmu_invs_unref(test_b, &invs1);
arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results5[0]), 2,
results5[0], results5[1], results5[2]);
/* Test6: purge test_b (new array) */
test_a = arm_smmu_invs_purge(test_b);
kfree(test_b);
arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results6[0]), 0,
results6[0], results6[1], results6[2]);
/* Test7: unref invs3 (same array) */
arm_smmu_invs_unref(test_a, &invs3);
KUNIT_EXPECT_EQ(test, test_a->num_invs, 0);
KUNIT_EXPECT_EQ(test, test_a->num_trashes, 0);
/* Test8: merge invs4 (new array) */
test_b = arm_smmu_invs_merge(test_a, &invs4);
kfree(test_a);
arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results7[0]), 0,
results7[0], results7[1], results7[2]);
/* Test9: merge invs5 (new array) */
test_a = arm_smmu_invs_merge(test_b, &invs5);
kfree(test_b);
arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results8[0]), 0,
results8[0], results8[1], results8[2]);
/* Test10: unref invs5 (same array) */
arm_smmu_invs_unref(test_a, &invs5);
arm_smmu_v3_invs_test_verify(test, test_a, ARRAY_SIZE(results9[0]), 1,
results9[0], results9[1], results9[2]);
/* Test11: purge test_a (new array) */
test_b = arm_smmu_invs_purge(test_a);
kfree(test_a);
arm_smmu_v3_invs_test_verify(test, test_b, ARRAY_SIZE(results10[0]), 0,
results10[0], results10[1], results10[2]);
kfree(test_b);
}
static struct kunit_case arm_smmu_v3_test_cases[] = {
KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_abort),
KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_bypass),
@ -662,6 +796,7 @@ static struct kunit_case arm_smmu_v3_test_cases[] = {
KUNIT_CASE(arm_smmu_v3_write_ste_test_nested_s1bypass_to_s1dssbypass),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
KUNIT_CASE(arm_smmu_v3_invs_test),
{},
};

File diff suppressed because it is too large Load Diff

View File

@ -648,6 +648,93 @@ struct arm_smmu_cmdq_batch {
int num;
};
/*
* The order here also determines the sequence in which commands are sent to the
* command queue. E.g. TLBI must be done before ATC_INV.
*/
enum arm_smmu_inv_type {
INV_TYPE_S1_ASID,
INV_TYPE_S2_VMID,
INV_TYPE_S2_VMID_S1_CLEAR,
INV_TYPE_ATS,
INV_TYPE_ATS_FULL,
};
struct arm_smmu_inv {
struct arm_smmu_device *smmu;
u8 type;
u8 size_opcode;
u8 nsize_opcode;
u32 id; /* ASID or VMID or SID */
union {
size_t pgsize; /* ARM_SMMU_FEAT_RANGE_INV */
u32 ssid; /* INV_TYPE_ATS */
};
int users; /* users=0 to mark as a trash to be purged */
};
static inline bool arm_smmu_inv_is_ats(const struct arm_smmu_inv *inv)
{
return inv->type == INV_TYPE_ATS || inv->type == INV_TYPE_ATS_FULL;
}
/**
* struct arm_smmu_invs - Per-domain invalidation array
* @max_invs: maximum capacity of the flexible array
* @num_invs: number of invalidations in the flexible array. May be smaller than
* @max_invs after a tailing trash entry is excluded, but must not be
* greater than @max_invs
* @num_trashes: number of trash entries in the array for arm_smmu_invs_purge().
* Must not be greater than @num_invs
* @rwlock: optional rwlock to fence ATS operations
* @has_ats: flag if the array contains an INV_TYPE_ATS or INV_TYPE_ATS_FULL
* @rcu: rcu head for kfree_rcu()
* @inv: flexible invalidation array
*
* The arm_smmu_invs is an RCU data structure. During a ->attach_dev callback,
* arm_smmu_invs_merge(), arm_smmu_invs_unref() and arm_smmu_invs_purge() will
* be used to allocate a new copy of an old array for addition and deletion in
* the old domain's and new domain's invs arrays.
*
* The arm_smmu_invs_unref() mutates a given array, by internally reducing the
* users counts of some given entries. This exists to support a no-fail routine
* like attaching to an IOMMU_DOMAIN_BLOCKED. And it could pair with a followup
* arm_smmu_invs_purge() call to generate a new clean array.
*
* Concurrent invalidation thread will push every invalidation described in the
* array into the command queue for each invalidation event. It is designed like
* this to optimize the invalidation fast path by avoiding locks.
*
* A domain can be shared across SMMU instances. When an instance gets removed,
* it would delete all the entries that belong to that SMMU instance. Then, a
* synchronize_rcu() would have to be called to sync the array, to prevent any
* concurrent invalidation thread accessing the old array from issuing commands
* to the command queue of a removed SMMU instance.
*/
struct arm_smmu_invs {
size_t max_invs;
size_t num_invs;
size_t num_trashes;
rwlock_t rwlock;
bool has_ats;
struct rcu_head rcu;
struct arm_smmu_inv inv[] __counted_by(max_invs);
};
static inline struct arm_smmu_invs *arm_smmu_invs_alloc(size_t num_invs)
{
struct arm_smmu_invs *new_invs;
new_invs = kzalloc(struct_size(new_invs, inv, num_invs), GFP_KERNEL);
if (!new_invs)
return NULL;
new_invs->max_invs = num_invs;
new_invs->num_invs = num_invs;
rwlock_init(&new_invs->rwlock);
return new_invs;
}
struct arm_smmu_evtq {
struct arm_smmu_queue q;
struct iopf_queue *iopf;
@ -841,6 +928,14 @@ struct arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
struct arm_smmu_stream *streams;
/*
* Scratch memory for a to_merge or to_unref array to build a per-domain
* invalidation array. It'll be pre-allocated with enough enries for all
* possible build scenarios. It can be used by only one caller at a time
* until the arm_smmu_invs_merge/unref() finishes. Must be locked by the
* iommu_group mutex.
*/
struct arm_smmu_invs *build_invs;
struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */
/* Locked by the iommu core using the group mutex */
struct arm_smmu_ctx_desc_cfg cd_table;
@ -856,6 +951,7 @@ struct arm_smmu_master {
enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_S1 = 0,
ARM_SMMU_DOMAIN_S2,
ARM_SMMU_DOMAIN_SVA,
};
struct arm_smmu_domain {
@ -872,6 +968,8 @@ struct arm_smmu_domain {
struct iommu_domain domain;
struct arm_smmu_invs __rcu *invs;
/* List of struct arm_smmu_master_domain */
struct list_head devices;
spinlock_t devices_lock;
@ -924,6 +1022,12 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master, struct mm_struct *mm,
u16 asid);
struct arm_smmu_invs *arm_smmu_invs_merge(struct arm_smmu_invs *invs,
struct arm_smmu_invs *to_merge);
void arm_smmu_invs_unref(struct arm_smmu_invs *invs,
struct arm_smmu_invs *to_unref);
struct arm_smmu_invs *arm_smmu_invs_purge(struct arm_smmu_invs *invs);
#endif
struct arm_smmu_master_domain {
@ -955,6 +1059,13 @@ extern struct mutex arm_smmu_asid_lock;
struct arm_smmu_domain *arm_smmu_domain_alloc(void);
static inline void arm_smmu_domain_free(struct arm_smmu_domain *smmu_domain)
{
/* No concurrency with invalidation is possible at this point */
kfree(rcu_dereference_protected(smmu_domain->invs, true));
kfree(smmu_domain);
}
void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid);
struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
u32 ssid);
@ -969,12 +1080,14 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
struct arm_smmu_cd *cd, struct iommu_domain *old);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain);
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size);
void arm_smmu_domain_inv_range(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size,
unsigned int granule, bool leaf);
static inline void arm_smmu_domain_inv(struct arm_smmu_domain *smmu_domain)
{
arm_smmu_domain_inv_range(smmu_domain, 0, 0, 0, false);
}
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq);
@ -991,6 +1104,21 @@ static inline bool arm_smmu_master_canwbs(struct arm_smmu_master *master)
IOMMU_FWSPEC_PCI_RC_CANWBS;
}
/**
* struct arm_smmu_inv_state - Per-domain invalidation array state
* @invs_ptr: points to the domain->invs (unwinding nesting/etc.) or is NULL if
* no change should be made
* @old_invs: the original invs array
* @new_invs: for new domain, this is the new invs array to update domain->invs;
* for old domain, this is the master->build_invs to pass in as the
* to_unref argument to an arm_smmu_invs_unref() call
*/
struct arm_smmu_inv_state {
struct arm_smmu_invs __rcu **invs_ptr;
struct arm_smmu_invs *old_invs;
struct arm_smmu_invs *new_invs;
};
struct arm_smmu_attach_state {
/* Inputs */
struct iommu_domain *old_domain;
@ -1000,6 +1128,8 @@ struct arm_smmu_attach_state {
ioasid_t ssid;
/* Resulting state */
struct arm_smmu_vmaster *vmaster;
struct arm_smmu_inv_state old_domain_invst;
struct arm_smmu_inv_state new_domain_invst;
bool ats_enabled;
};

View File

@ -479,6 +479,10 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
/* Reset VCMDQ */
tegra241_vcmdq_hw_deinit(vcmdq);
/* vintf->hyp_own is a HW state finalized in tegra241_vintf_hw_init() */
if (!vcmdq->vintf->hyp_own)
vcmdq->cmdq.supports_cmd = tegra241_guest_vcmdq_supports_cmd;
/* Configure and enable VCMDQ */
writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
@ -639,9 +643,6 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
q->q_base = q->base_dma & VCMDQ_ADDR;
q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
if (!vcmdq->vintf->hyp_own)
cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd;
return arm_smmu_cmdq_init(smmu, cmdq);
}

View File

@ -14,6 +14,7 @@
#include <linux/device.h>
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
#include <linux/generic_pt/iommu.h>
#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
@ -648,6 +649,15 @@ static void iommu_dma_init_options(struct iommu_dma_options *options,
}
}
static bool iommu_domain_supports_fq(struct device *dev,
struct iommu_domain *domain)
{
/* iommupt always supports DMA-FQ */
if (iommupt_from_domain(domain))
return true;
return device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH);
}
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@ -706,7 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
(!iommu_domain_supports_fq(dev, domain) ||
iommu_dma_init_fq(domain)))
domain->type = IOMMU_DOMAIN_DMA;
return iova_reserve_iommu_regions(dev, domain);

View File

@ -5,6 +5,7 @@ CONFIG_DEBUG_GENERIC_PT=y
CONFIG_IOMMU_PT=y
CONFIG_IOMMU_PT_AMDV1=y
CONFIG_IOMMU_PT_VTDSS=y
CONFIG_IOMMU_PT_RISCV64=y
CONFIG_IOMMU_PT_X86_64=y
CONFIG_IOMMU_PT_KUNIT_TEST=y

View File

@ -52,6 +52,16 @@ config IOMMU_PT_VTDSS
Selected automatically by an IOMMU driver that uses this format.
config IOMMU_PT_RISCV64
tristate "IOMMU page table for RISC-V 64 bit Sv57/Sv48/Sv39"
depends on !GENERIC_ATOMIC64 # for cmpxchg64
help
iommu_domain implementation for RISC-V 64 bit 3/4/5 level page table.
It supports 4K/2M/1G/512G/256T page sizes and can decode a sign
extended portion of the 64 bit IOVA space.
Selected automatically by an IOMMU driver that uses this format.
config IOMMU_PT_X86_64
tristate "IOMMU page table for x86 64-bit, 4/5 levels"
depends on !GENERIC_ATOMIC64 # for cmpxchg64
@ -66,6 +76,7 @@ config IOMMU_PT_KUNIT_TEST
tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
depends on KUNIT
depends on IOMMU_PT_AMDV1 || !IOMMU_PT_AMDV1
depends on IOMMU_PT_RISCV64 || !IOMMU_PT_RISCV64
depends on IOMMU_PT_X86_64 || !IOMMU_PT_X86_64
depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
default KUNIT_ALL_TESTS

View File

@ -5,6 +5,8 @@ iommu_pt_fmt-$(CONFIG_IOMMUFD_TEST) += mock
iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
iommu_pt_fmt-$(CONFIG_IOMMU_PT_RISCV64) += riscv64
iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86_64) += x86_64
IOMMU_PT_KUNIT_TEST :=

View File

@ -191,7 +191,7 @@ static inline enum pt_entry_type amdv1pt_load_entry_raw(struct pt_state *pts)
}
#define pt_load_entry_raw amdv1pt_load_entry_raw
static inline void
static __always_inline void
amdv1pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
unsigned int oasz_lg2,
const struct pt_write_attrs *attrs)

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
*
*/
#ifndef __GENERIC_PT_FMT_DEFS_RISCV_H
#define __GENERIC_PT_FMT_DEFS_RISCV_H
#include <linux/generic_pt/common.h>
#include <linux/types.h>
#ifdef PT_RISCV_32BIT
typedef u32 pt_riscv_entry_t;
#define riscvpt_write_attrs riscv32pt_write_attrs
#else
typedef u64 pt_riscv_entry_t;
#define riscvpt_write_attrs riscv64pt_write_attrs
#endif
typedef pt_riscv_entry_t pt_vaddr_t;
typedef u64 pt_oaddr_t;
struct riscvpt_write_attrs {
pt_riscv_entry_t descriptor_bits;
gfp_t gfp;
};
#define pt_write_attrs riscvpt_write_attrs
#endif

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
*/
#define PT_FMT riscv
#define PT_FMT_VARIANT 64
#define PT_SUPPORTED_FEATURES \
(BIT(PT_FEAT_SIGN_EXTEND) | BIT(PT_FEAT_FLUSH_RANGE) | \
BIT(PT_FEAT_RISCV_SVNAPOT_64K))
#include "iommu_template.h"

View File

@ -0,0 +1,313 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
*
* RISC-V page table
*
* This is described in Sections:
* 12.3. Sv32: Page-Based 32-bit Virtual-Memory Systems
* 12.4. Sv39: Page-Based 39-bit Virtual-Memory System
* 12.5. Sv48: Page-Based 48-bit Virtual-Memory System
* 12.6. Sv57: Page-Based 57-bit Virtual-Memory System
* of the "The RISC-V Instruction Set Manual: Volume II"
*
* This includes the contiguous page extension from:
* Chapter 13. "Svnapot" Extension for NAPOT Translation Contiguity,
* Version 1.0
*
* The table format is sign extended and supports leafs in every level. The spec
* doesn't talk a lot about levels, but level here is the same as i=LEVELS-1 in
* the spec.
*/
#ifndef __GENERIC_PT_FMT_RISCV_H
#define __GENERIC_PT_FMT_RISCV_H
#include "defs_riscv.h"
#include "../pt_defs.h"
#include <linux/bitfield.h>
#include <linux/container_of.h>
#include <linux/log2.h>
#include <linux/sizes.h>
enum {
PT_ITEM_WORD_SIZE = sizeof(pt_riscv_entry_t),
#ifdef PT_RISCV_32BIT
PT_MAX_VA_ADDRESS_LG2 = 32,
PT_MAX_OUTPUT_ADDRESS_LG2 = 34,
PT_MAX_TOP_LEVEL = 1,
#else
PT_MAX_VA_ADDRESS_LG2 = 57,
PT_MAX_OUTPUT_ADDRESS_LG2 = 56,
PT_MAX_TOP_LEVEL = 4,
#endif
PT_GRANULE_LG2SZ = 12,
PT_TABLEMEM_LG2SZ = 12,
/* fsc.PPN is 44 bits wide, all PPNs are 4k aligned */
PT_TOP_PHYS_MASK = GENMASK_ULL(55, 12),
};
/* PTE bits */
enum {
RISCVPT_V = BIT(0),
RISCVPT_R = BIT(1),
RISCVPT_W = BIT(2),
RISCVPT_X = BIT(3),
RISCVPT_U = BIT(4),
RISCVPT_G = BIT(5),
RISCVPT_A = BIT(6),
RISCVPT_D = BIT(7),
RISCVPT_RSW = GENMASK(9, 8),
RISCVPT_PPN32 = GENMASK(31, 10),
RISCVPT_PPN64 = GENMASK_ULL(53, 10),
RISCVPT_PPN64_64K = GENMASK_ULL(53, 14),
RISCVPT_PBMT = GENMASK_ULL(62, 61),
RISCVPT_N = BIT_ULL(63),
/* Svnapot encodings for ppn[0] */
RISCVPT_PPN64_64K_SZ = BIT(13),
};
#ifdef PT_RISCV_32BIT
#define RISCVPT_PPN RISCVPT_PPN32
#define pt_riscv pt_riscv_32
#else
#define RISCVPT_PPN RISCVPT_PPN64
#define pt_riscv pt_riscv_64
#endif
#define common_to_riscvpt(common_ptr) \
container_of_const(common_ptr, struct pt_riscv, common)
#define to_riscvpt(pts) common_to_riscvpt((pts)->range->common)
static inline pt_oaddr_t riscvpt_table_pa(const struct pt_state *pts)
{
return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
}
#define pt_table_pa riscvpt_table_pa
static inline pt_oaddr_t riscvpt_entry_oa(const struct pt_state *pts)
{
if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) &&
pts->entry & RISCVPT_N) {
PT_WARN_ON(pts->level != 0);
return oalog2_mul(FIELD_GET(RISCVPT_PPN64_64K, pts->entry),
ilog2(SZ_64K));
}
return oalog2_mul(FIELD_GET(RISCVPT_PPN, pts->entry), PT_GRANULE_LG2SZ);
}
#define pt_entry_oa riscvpt_entry_oa
static inline bool riscvpt_can_have_leaf(const struct pt_state *pts)
{
return true;
}
#define pt_can_have_leaf riscvpt_can_have_leaf
/* Body in pt_fmt_defaults.h */
static inline unsigned int pt_table_item_lg2sz(const struct pt_state *pts);
static inline unsigned int
riscvpt_entry_num_contig_lg2(const struct pt_state *pts)
{
if (PT_SUPPORTED_FEATURE(PT_FEAT_RISCV_SVNAPOT_64K) &&
pts->entry & RISCVPT_N) {
PT_WARN_ON(!pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K));
PT_WARN_ON(pts->level);
return ilog2(16);
}
return ilog2(1);
}
#define pt_entry_num_contig_lg2 riscvpt_entry_num_contig_lg2
static inline unsigned int riscvpt_num_items_lg2(const struct pt_state *pts)
{
return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
}
#define pt_num_items_lg2 riscvpt_num_items_lg2
static inline unsigned short
riscvpt_contig_count_lg2(const struct pt_state *pts)
{
if (pts->level == 0 && pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K))
return ilog2(16);
return ilog2(1);
}
#define pt_contig_count_lg2 riscvpt_contig_count_lg2
static inline enum pt_entry_type riscvpt_load_entry_raw(struct pt_state *pts)
{
const pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
pt_riscv_entry_t entry;
pts->entry = entry = READ_ONCE(tablep[pts->index]);
if (!(entry & RISCVPT_V))
return PT_ENTRY_EMPTY;
if (pts->level == 0 ||
((entry & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) != 0))
return PT_ENTRY_OA;
return PT_ENTRY_TABLE;
}
#define pt_load_entry_raw riscvpt_load_entry_raw
static inline void
riscvpt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
unsigned int oasz_lg2,
const struct pt_write_attrs *attrs)
{
pt_riscv_entry_t *tablep = pt_cur_table(pts, pt_riscv_entry_t);
pt_riscv_entry_t entry;
if (!pt_check_install_leaf_args(pts, oa, oasz_lg2))
return;
entry = RISCVPT_V |
FIELD_PREP(RISCVPT_PPN, log2_div(oa, PT_GRANULE_LG2SZ)) |
attrs->descriptor_bits;
if (pts_feature(pts, PT_FEAT_RISCV_SVNAPOT_64K) && pts->level == 0 &&
oasz_lg2 != PT_GRANULE_LG2SZ) {
u64 *end;
entry |= RISCVPT_N | RISCVPT_PPN64_64K_SZ;
tablep += pts->index;
end = tablep + log2_div(SZ_64K, PT_GRANULE_LG2SZ);
for (; tablep != end; tablep++)
WRITE_ONCE(*tablep, entry);
} else {
/* FIXME does riscv need this to be cmpxchg? */
WRITE_ONCE(tablep[pts->index], entry);
}
pts->entry = entry;
}
#define pt_install_leaf_entry riscvpt_install_leaf_entry
static inline bool riscvpt_install_table(struct pt_state *pts,
pt_oaddr_t table_pa,
const struct pt_write_attrs *attrs)
{
pt_riscv_entry_t entry;
entry = RISCVPT_V |
FIELD_PREP(RISCVPT_PPN, log2_div(table_pa, PT_GRANULE_LG2SZ));
return pt_table_install64(pts, entry);
}
#define pt_install_table riscvpt_install_table
static inline void riscvpt_attr_from_entry(const struct pt_state *pts,
struct pt_write_attrs *attrs)
{
attrs->descriptor_bits =
pts->entry & (RISCVPT_R | RISCVPT_W | RISCVPT_X | RISCVPT_U |
RISCVPT_G | RISCVPT_A | RISCVPT_D);
}
#define pt_attr_from_entry riscvpt_attr_from_entry
/* --- iommu */
#include <linux/generic_pt/iommu.h>
#include <linux/iommu.h>
#define pt_iommu_table pt_iommu_riscv_64
/* The common struct is in the per-format common struct */
static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
{
return &container_of(iommu_table, struct pt_iommu_table, iommu)
->riscv_64pt.common;
}
static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
{
return &container_of(common, struct pt_iommu_table, riscv_64pt.common)
->iommu;
}
static inline int riscvpt_iommu_set_prot(struct pt_common *common,
struct pt_write_attrs *attrs,
unsigned int iommu_prot)
{
u64 pte;
pte = RISCVPT_A | RISCVPT_U;
if (iommu_prot & IOMMU_WRITE)
pte |= RISCVPT_W | RISCVPT_R | RISCVPT_D;
if (iommu_prot & IOMMU_READ)
pte |= RISCVPT_R;
if (!(iommu_prot & IOMMU_NOEXEC))
pte |= RISCVPT_X;
/* Caller must specify a supported combination of flags */
if (unlikely((pte & (RISCVPT_X | RISCVPT_W | RISCVPT_R)) == 0))
return -EOPNOTSUPP;
attrs->descriptor_bits = pte;
return 0;
}
#define pt_iommu_set_prot riscvpt_iommu_set_prot
static inline int
riscvpt_iommu_fmt_init(struct pt_iommu_riscv_64 *iommu_table,
const struct pt_iommu_riscv_64_cfg *cfg)
{
struct pt_riscv *table = &iommu_table->riscv_64pt;
switch (cfg->common.hw_max_vasz_lg2) {
case 39:
pt_top_set_level(&table->common, 2);
break;
case 48:
pt_top_set_level(&table->common, 3);
break;
case 57:
pt_top_set_level(&table->common, 4);
break;
default:
return -EINVAL;
}
table->common.max_oasz_lg2 =
min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
return 0;
}
#define pt_iommu_fmt_init riscvpt_iommu_fmt_init
static inline void
riscvpt_iommu_fmt_hw_info(struct pt_iommu_riscv_64 *table,
const struct pt_range *top_range,
struct pt_iommu_riscv_64_hw_info *info)
{
phys_addr_t top_phys = virt_to_phys(top_range->top_table);
info->ppn = oalog2_div(top_phys, PT_GRANULE_LG2SZ);
PT_WARN_ON(top_phys & ~PT_TOP_PHYS_MASK);
/*
* See Table 3. Encodings of iosatp.MODE field" for DC.tx.SXL = 0:
* 8 = Sv39 = top level 2
* 9 = Sv38 = top level 3
* 10 = Sv57 = top level 4
*/
info->fsc_iosatp_mode = top_range->top_level + 6;
}
#define pt_iommu_fmt_hw_info riscvpt_iommu_fmt_hw_info
#if defined(GENERIC_PT_KUNIT)
static const struct pt_iommu_riscv_64_cfg riscv_64_kunit_fmt_cfgs[] = {
[0] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
.common.hw_max_oasz_lg2 = 56,
.common.hw_max_vasz_lg2 = 39 },
[1] = { .common.features = 0,
.common.hw_max_oasz_lg2 = 56,
.common.hw_max_vasz_lg2 = 48 },
[2] = { .common.features = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
.common.hw_max_oasz_lg2 = 56,
.common.hw_max_vasz_lg2 = 57 },
};
#define kunit_fmt_cfgs riscv_64_kunit_fmt_cfgs
enum {
KUNIT_FMT_FEATURES = BIT(PT_FEAT_RISCV_SVNAPOT_64K),
};
#endif
#endif

View File

@ -51,16 +51,27 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
iommu_pages_stop_incoherent_list(free_list,
iommu_table->iommu_device);
if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
/*
* Note that the sync frees the gather's free list, so we must
* not have any pages on that list that are covered by iova/len
*/
/*
* If running in DMA-FQ mode then the unmap will be followed by an IOTLB
* flush all so we need to optimize by never flushing the IOTLB here.
*
* For NO_GAPS the user gets to pick if flushing all or doing micro
* flushes is better for their work load by choosing DMA vs DMA-FQ
* operation. Drivers should also see shadow_on_flush.
*/
if (!iommu_iotlb_gather_queued(iotlb_gather)) {
if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
/*
* Note that the sync frees the gather's free list, so
* we must not have any pages on that list that are
* covered by iova/len
*/
}
iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
}
iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
iommu_pages_list_splice(free_list, &iotlb_gather->freelist);
}
@ -466,6 +477,7 @@ struct pt_iommu_map_args {
pt_oaddr_t oa;
unsigned int leaf_pgsize_lg2;
unsigned int leaf_level;
pt_vaddr_t num_leaves;
};
/*
@ -518,11 +530,15 @@ static int clear_contig(const struct pt_state *start_pts,
static int __map_range_leaf(struct pt_range *range, void *arg,
unsigned int level, struct pt_table_p *table)
{
struct pt_iommu *iommu_table = iommu_from_common(range->common);
struct pt_state pts = pt_init(range, level, table);
struct pt_iommu_map_args *map = arg;
unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;
unsigned int start_index;
pt_oaddr_t oa = map->oa;
unsigned int num_leaves;
unsigned int orig_end;
pt_vaddr_t last_va;
unsigned int step;
bool need_contig;
int ret = 0;
@ -536,6 +552,15 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
_pt_iter_first(&pts);
start_index = pts.index;
orig_end = pts.end_index;
if (pts.index + map->num_leaves < pts.end_index) {
/* Need to stop in the middle of the table to change sizes */
pts.end_index = pts.index + map->num_leaves;
num_leaves = 0;
} else {
num_leaves = map->num_leaves - (pts.end_index - pts.index);
}
do {
pts.type = pt_load_entry_raw(&pts);
if (pts.type != PT_ENTRY_EMPTY || need_contig) {
@ -561,7 +586,40 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
flush_writes_range(&pts, start_index, pts.index);
map->oa = oa;
return ret;
map->num_leaves = num_leaves;
if (ret || num_leaves)
return ret;
/* range->va is not valid if we reached the end of the table */
pts.index -= step;
pt_index_to_va(&pts);
pts.index += step;
last_va = range->va + log2_to_int(leaf_pgsize_lg2);
if (last_va - 1 == range->last_va) {
PT_WARN_ON(pts.index != orig_end);
return 0;
}
/*
* Reached a point where the page size changed, compute the new
* parameters.
*/
map->leaf_pgsize_lg2 = pt_compute_best_pgsize(
iommu_table->domain.pgsize_bitmap, last_va, range->last_va, oa);
map->leaf_level =
pt_pgsz_lg2_to_level(range->common, map->leaf_pgsize_lg2);
map->num_leaves = pt_pgsz_count(iommu_table->domain.pgsize_bitmap,
last_va, range->last_va, oa,
map->leaf_pgsize_lg2);
/* Didn't finish this table level, caller will repeat it */
if (pts.index != orig_end) {
if (pts.index != start_index)
pt_index_to_va(&pts);
return -EAGAIN;
}
return 0;
}
static int __map_range(struct pt_range *range, void *arg, unsigned int level,
@ -584,14 +642,9 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
if (pts.type != PT_ENTRY_EMPTY)
return -EADDRINUSE;
ret = pt_iommu_new_table(&pts, &map->attrs);
if (ret) {
/*
* Racing with another thread installing a table
*/
if (ret == -EAGAIN)
continue;
/* EAGAIN on a race will loop again */
if (ret)
return ret;
}
} else {
pts.table_lower = pt_table_ptr(&pts);
/*
@ -615,10 +668,12 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
* The already present table can possibly be shared with another
* concurrent map.
*/
if (map->leaf_level == level - 1)
ret = pt_descend(&pts, arg, __map_range_leaf);
else
ret = pt_descend(&pts, arg, __map_range);
do {
if (map->leaf_level == level - 1)
ret = pt_descend(&pts, arg, __map_range_leaf);
else
ret = pt_descend(&pts, arg, __map_range);
} while (ret == -EAGAIN);
if (ret)
return ret;
@ -626,6 +681,14 @@ static int __map_range(struct pt_range *range, void *arg, unsigned int level,
pt_index_to_va(&pts);
if (pts.index >= pts.end_index)
break;
/*
* This level is currently running __map_range_leaf() which is
* not correct if the target level has been updated to this
* level. Have the caller invoke __map_range_leaf.
*/
if (map->leaf_level == level)
return -EAGAIN;
} while (true);
return 0;
}
@ -797,12 +860,13 @@ static int check_map_range(struct pt_iommu *iommu_table, struct pt_range *range,
static int do_map(struct pt_range *range, struct pt_common *common,
bool single_page, struct pt_iommu_map_args *map)
{
int ret;
/*
* The __map_single_page() fast path does not support DMA_INCOHERENT
* flushing to keep its .text small.
*/
if (single_page && !pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {
int ret;
ret = pt_walk_range(range, __map_single_page, map);
if (ret != -EAGAIN)
@ -810,50 +874,25 @@ static int do_map(struct pt_range *range, struct pt_common *common,
/* EAGAIN falls through to the full path */
}
if (map->leaf_level == range->top_level)
return pt_walk_range(range, __map_range_leaf, map);
return pt_walk_range(range, __map_range, map);
do {
if (map->leaf_level == range->top_level)
ret = pt_walk_range(range, __map_range_leaf, map);
else
ret = pt_walk_range(range, __map_range, map);
} while (ret == -EAGAIN);
return ret;
}
/**
* map_pages() - Install translation for an IOVA range
* @domain: Domain to manipulate
* @iova: IO virtual address to start
* @paddr: Physical/Output address to start
* @pgsize: Length of each page
* @pgcount: Length of the range in pgsize units starting from @iova
* @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
* @gfp: GFP flags for any memory allocations
* @mapped: Total bytes successfully mapped
*
* The range starting at IOVA will have paddr installed into it. The caller
* must specify a valid pgsize and pgcount to segment the range into compatible
* blocks.
*
* On error the caller will probably want to invoke unmap on the range from iova
* up to the amount indicated by @mapped to return the table back to an
* unchanged state.
*
* Context: The caller must hold a write range lock that includes the whole
* range.
*
* Returns: -ERRNO on failure, 0 on success. The number of bytes of VA that were
* mapped are added to @mapped, @mapped is not zerod first.
*/
int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
phys_addr_t paddr, dma_addr_t len, unsigned int prot,
gfp_t gfp, size_t *mapped)
{
struct pt_iommu *iommu_table =
container_of(domain, struct pt_iommu, domain);
pt_vaddr_t pgsize_bitmap = iommu_table->domain.pgsize_bitmap;
struct pt_common *common = common_from_iommu(iommu_table);
struct iommu_iotlb_gather iotlb_gather;
pt_vaddr_t len = pgsize * pgcount;
struct pt_iommu_map_args map = {
.iotlb_gather = &iotlb_gather,
.oa = paddr,
.leaf_pgsize_lg2 = vaffs(pgsize),
};
bool single_page = false;
struct pt_range range;
@ -881,13 +920,13 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
return ret;
/* Calculate target page size and level for the leaves */
if (pt_has_system_page_size(common) && pgsize == PAGE_SIZE &&
pgcount == 1) {
if (pt_has_system_page_size(common) && len == PAGE_SIZE) {
PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));
if (log2_mod(iova | paddr, PAGE_SHIFT))
return -ENXIO;
map.leaf_pgsize_lg2 = PAGE_SHIFT;
map.leaf_level = 0;
map.num_leaves = 1;
single_page = true;
} else {
map.leaf_pgsize_lg2 = pt_compute_best_pgsize(
@ -896,6 +935,9 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
return -ENXIO;
map.leaf_level =
pt_pgsz_lg2_to_level(common, map.leaf_pgsize_lg2);
map.num_leaves = pt_pgsz_count(pgsize_bitmap, range.va,
range.last_va, paddr,
map.leaf_pgsize_lg2);
}
ret = check_map_range(iommu_table, &range, &map);
@ -918,7 +960,6 @@ int DOMAIN_NS(map_pages)(struct iommu_domain *domain, unsigned long iova,
*mapped += map.oa - paddr;
return ret;
}
EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(map_pages), "GENERIC_PT_IOMMU");
struct pt_unmap_args {
struct iommu_pages_list free_list;
@ -1020,34 +1061,12 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
return ret;
}
/**
* unmap_pages() - Make a range of IOVA empty/not present
* @domain: Domain to manipulate
* @iova: IO virtual address to start
* @pgsize: Length of each page
* @pgcount: Length of the range in pgsize units starting from @iova
* @iotlb_gather: Gather struct that must be flushed on return
*
* unmap_pages() will remove a translation created by map_pages(). It cannot
* subdivide a mapping created by map_pages(), so it should be called with IOVA
* ranges that match those passed to map_pages(). The IOVA range can aggregate
* contiguous map_pages() calls so long as no individual range is split.
*
* Context: The caller must hold a write range lock that includes
* the whole range.
*
* Returns: Number of bytes of VA unmapped. iova + res will be the point
* unmapping stopped.
*/
size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount,
static size_t NS(unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
dma_addr_t len,
struct iommu_iotlb_gather *iotlb_gather)
{
struct pt_iommu *iommu_table =
container_of(domain, struct pt_iommu, domain);
struct pt_unmap_args unmap = { .free_list = IOMMU_PAGES_LIST_INIT(
unmap.free_list) };
pt_vaddr_t len = pgsize * pgcount;
struct pt_range range;
int ret;
@ -1057,12 +1076,11 @@ size_t DOMAIN_NS(unmap_pages)(struct iommu_domain *domain, unsigned long iova,
pt_walk_range(&range, __unmap_range, &unmap);
gather_range_pages(iotlb_gather, iommu_table, iova, len,
gather_range_pages(iotlb_gather, iommu_table, iova, unmap.unmapped,
&unmap.free_list);
return unmap.unmapped;
}
EXPORT_SYMBOL_NS_GPL(DOMAIN_NS(unmap_pages), "GENERIC_PT_IOMMU");
static void NS(get_info)(struct pt_iommu *iommu_table,
struct pt_iommu_info *info)
@ -1110,6 +1128,8 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
}
static const struct pt_iommu_ops NS(ops) = {
.map_range = NS(map_range),
.unmap_range = NS(unmap_range),
#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER) && defined(pt_entry_is_write_dirty) && \
IS_ENABLED(CONFIG_IOMMUFD_TEST) && defined(pt_entry_make_write_dirty)
.set_dirty = NS(set_dirty),
@ -1172,6 +1192,7 @@ static int pt_iommu_init_domain(struct pt_iommu *iommu_table,
domain->type = __IOMMU_DOMAIN_PAGING;
domain->pgsize_bitmap = info.pgsize_bitmap;
domain->is_iommupt = true;
if (pt_feature(common, PT_FEAT_DYNAMIC_TOP))
range = _pt_top_range(common,

View File

@ -312,6 +312,17 @@ static void test_best_pgsize(struct kunit *test)
}
}
static void test_pgsz_count(struct kunit *test)
{
KUNIT_EXPECT_EQ(test,
pt_pgsz_count(SZ_4K, 0, SZ_1G - 1, 0, ilog2(SZ_4K)),
SZ_1G / SZ_4K);
KUNIT_EXPECT_EQ(test,
pt_pgsz_count(SZ_2M | SZ_4K, SZ_4K, SZ_1G - 1, SZ_4K,
ilog2(SZ_4K)),
(SZ_2M - SZ_4K) / SZ_4K);
}
/*
* Check that pt_install_table() and pt_table_pa() match
*/
@ -770,6 +781,7 @@ static struct kunit_case generic_pt_test_cases[] = {
KUNIT_CASE_FMT(test_init),
KUNIT_CASE_FMT(test_bitops),
KUNIT_CASE_FMT(test_best_pgsize),
KUNIT_CASE_FMT(test_pgsz_count),
KUNIT_CASE_FMT(test_table_ptr),
KUNIT_CASE_FMT(test_max_va),
KUNIT_CASE_FMT(test_table_radix),

View File

@ -569,6 +569,28 @@ static inline unsigned int pt_compute_best_pgsize(pt_vaddr_t pgsz_bitmap,
return pgsz_lg2;
}
/*
* Return the number of pgsize_lg2 leaf entries that can be mapped for
* va to oa. This accounts for any requirement to reduce or increase the page
* size across the VA range.
*/
static inline pt_vaddr_t pt_pgsz_count(pt_vaddr_t pgsz_bitmap, pt_vaddr_t va,
pt_vaddr_t last_va, pt_oaddr_t oa,
unsigned int pgsize_lg2)
{
pt_vaddr_t len = last_va - va + 1;
pt_vaddr_t next_pgsizes = log2_set_mod(pgsz_bitmap, 0, pgsize_lg2 + 1);
if (next_pgsizes) {
unsigned int next_pgsize_lg2 = vaffs(next_pgsizes);
if (log2_mod(va ^ oa, next_pgsize_lg2) == 0)
len = min(len, log2_set_mod_max(va, next_pgsize_lg2) -
va + 1);
}
return log2_div(len, pgsize_lg2);
}
#define _PT_MAKE_CALL_LEVEL(fn) \
static __always_inline int fn(struct pt_range *range, void *arg, \
unsigned int level, \

View File

@ -255,7 +255,6 @@ void cache_tag_unassign_domain(struct dmar_domain *domain,
static unsigned long calculate_psi_aligned_address(unsigned long start,
unsigned long end,
unsigned long *_pages,
unsigned long *_mask)
{
unsigned long pages = aligned_nrpages(start, end - start + 1);
@ -281,10 +280,8 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
*/
shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
aligned_pages = 1UL << mask;
}
*_pages = aligned_pages;
*_mask = mask;
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
@ -330,19 +327,19 @@ static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid
qi_batch_increment_index(iommu, batch);
}
static void qi_batch_add_piotlb_all(struct intel_iommu *iommu, u16 did,
u32 pasid, struct qi_batch *batch)
{
qi_desc_piotlb_all(did, pasid, &batch->descs[batch->index]);
qi_batch_increment_index(iommu, batch);
}
static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
u64 addr, unsigned long npages, bool ih,
u64 addr, unsigned int size_order, bool ih,
struct qi_batch *batch)
{
/*
* npages == -1 means a PASID-selective invalidation, otherwise,
* a positive value for Page-selective-within-PASID invalidation.
* 0 is not a valid input.
*/
if (!npages)
return;
qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
qi_desc_piotlb(did, pasid, addr, size_order, ih,
&batch->descs[batch->index]);
qi_batch_increment_index(iommu, batch);
}
@ -371,15 +368,18 @@ static bool intel_domain_use_piotlb(struct dmar_domain *domain)
}
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
unsigned long addr, unsigned long pages,
unsigned long mask, int ih)
unsigned long addr, unsigned long mask, int ih)
{
struct intel_iommu *iommu = tag->iommu;
u64 type = DMA_TLB_PSI_FLUSH;
if (intel_domain_use_piotlb(domain)) {
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
pages, ih, domain->qi_batch);
if (mask >= MAX_AGAW_PFN_WIDTH)
qi_batch_add_piotlb_all(iommu, tag->domain_id,
tag->pasid, domain->qi_batch);
else
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid,
addr, mask, ih, domain->qi_batch);
return;
}
@ -388,7 +388,7 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
* is too big.
*/
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap) || pages == -1) {
mask > cap_max_amask_val(iommu->cap)) {
addr = 0;
mask = 0;
ih = 0;
@ -437,16 +437,15 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long end, int ih)
{
struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
unsigned long mask, addr;
struct cache_tag *tag;
unsigned long flags;
if (start == 0 && end == ULONG_MAX) {
addr = 0;
pages = -1;
mask = MAX_AGAW_PFN_WIDTH;
} else {
addr = calculate_psi_aligned_address(start, end, &pages, &mask);
addr = calculate_psi_aligned_address(start, end, &mask);
}
spin_lock_irqsave(&domain->cache_lock, flags);
@ -458,7 +457,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
cache_tag_flush_iotlb(domain, tag, addr, mask, ih);
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
@ -476,7 +475,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
break;
}
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
trace_cache_tag_flush_range(tag, start, end, addr, mask);
}
qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
@ -506,11 +505,11 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end)
{
struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
unsigned long mask, addr;
struct cache_tag *tag;
unsigned long flags;
addr = calculate_psi_aligned_address(start, end, &pages, &mask);
addr = calculate_psi_aligned_address(start, end, &mask);
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
@ -526,9 +525,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
if (tag->type == CACHE_TAG_IOTLB ||
tag->type == CACHE_TAG_NESTING_IOTLB)
cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
cache_tag_flush_iotlb(domain, tag, addr, mask, 0);
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
trace_cache_tag_flush_range_np(tag, start, end, addr, mask);
}
qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);

View File

@ -133,13 +133,13 @@ static int iommu_regset_show(struct seq_file *m, void *unused)
*/
raw_spin_lock_irqsave(&iommu->register_lock, flag);
for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) {
value = dmar_readl(iommu->reg + iommu_regs_32[i].offset);
value = readl(iommu->reg + iommu_regs_32[i].offset);
seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
iommu_regs_32[i].regs, iommu_regs_32[i].offset,
value);
}
for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) {
value = dmar_readq(iommu->reg + iommu_regs_64[i].offset);
value = readq(iommu->reg + iommu_regs_64[i].offset);
seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
iommu_regs_64[i].regs, iommu_regs_64[i].offset,
value);
@ -247,7 +247,7 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
tbl_wlk.ctx_entry = context;
m->private = &tbl_wlk;
if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {
if (readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {
pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
pasid_dir_size = get_pasid_dir_size(context);
pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size);
@ -285,7 +285,7 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
sts = readl(iommu->reg + DMAR_GSTS_REG);
if (!(sts & DMA_GSTS_TES)) {
seq_printf(m, "DMA Remapping is not enabled on %s\n",
iommu->name);
@ -364,13 +364,13 @@ static int domain_translation_struct_show(struct seq_file *m,
if (seg != iommu->segment)
continue;
sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
sts = readl(iommu->reg + DMAR_GSTS_REG);
if (!(sts & DMA_GSTS_TES)) {
seq_printf(m, "DMA Remapping is not enabled on %s\n",
iommu->name);
continue;
}
if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
if (readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
scalable = true;
else
scalable = false;
@ -538,8 +538,8 @@ static int invalidation_queue_show(struct seq_file *m, void *unused)
raw_spin_lock_irqsave(&qi->q_lock, flags);
seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n",
(u64)virt_to_phys(qi->desc),
dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift,
dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift);
readq(iommu->reg + DMAR_IQH_REG) >> shift,
readq(iommu->reg + DMAR_IQT_REG) >> shift);
invalidation_queue_entry_show(m, iommu);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
seq_putc(m, '\n');
@ -620,7 +620,7 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
iommu->name);
sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
sts = readl(iommu->reg + DMAR_GSTS_REG);
if (iommu->ir_table && (sts & DMA_GSTS_IRES)) {
irta = virt_to_phys(iommu->ir_table->base);
seq_printf(m, " IR table address:%llx\n", irta);

View File

@ -899,8 +899,8 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
return -EINVAL;
}
cap = dmar_readq(addr + DMAR_CAP_REG);
ecap = dmar_readq(addr + DMAR_ECAP_REG);
cap = readq(addr + DMAR_CAP_REG);
ecap = readq(addr + DMAR_ECAP_REG);
if (arg)
iounmap(addr);
@ -982,8 +982,8 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
goto release;
}
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
iommu->cap = readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = readq(iommu->reg + DMAR_ECAP_REG);
if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
err = -EINVAL;
@ -1017,8 +1017,8 @@ static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
int i;
for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) {
iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG +
i * DMA_ECMD_REG_STEP);
iommu->ecmdcap[i] = readq(iommu->reg + DMAR_ECCAP_REG +
i * DMA_ECMD_REG_STEP);
}
}
@ -1239,8 +1239,8 @@ static const char *qi_type_string(u8 type)
static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
{
unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
unsigned int head = readl(iommu->reg + DMAR_IQH_REG);
u64 iqe_err = readq(iommu->reg + DMAR_IQER_REG);
struct qi_desc *desc = iommu->qi->desc + head;
if (fault & DMA_FSTS_IQE)
@ -1321,7 +1321,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
* SID field is valid only when the ITE field is Set in FSTS_REG
* see Intel VT-d spec r4.1, section 11.4.9.9
*/
iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
iqe_err = readq(iommu->reg + DMAR_IQER_REG);
ite_sid = DMAR_IQER_REG_ITESID(iqe_err);
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
@ -1550,23 +1550,12 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
qi_submit_sync(iommu, &desc, 1, 0);
}
/* PASID-based IOTLB invalidation */
void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
unsigned long npages, bool ih)
/* PASID-selective IOTLB invalidation */
void qi_flush_piotlb_all(struct intel_iommu *iommu, u16 did, u32 pasid)
{
struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
struct qi_desc desc = {};
/*
* npages == -1 means a PASID-selective invalidation, otherwise,
* a positive value for Page-selective-within-PASID invalidation.
* 0 is not a valid input.
*/
if (WARN_ON(!npages)) {
pr_err("Invalid input npages = %ld\n", npages);
return;
}
qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
qi_desc_piotlb_all(did, pasid, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@ -1661,7 +1650,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
/* write zero to the tail reg */
writel(0, iommu->reg + DMAR_IQT_REG);
dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
writeq(val, iommu->reg + DMAR_IQA_REG);
iommu->gcmd |= DMA_GCMD_QIE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@ -1980,8 +1969,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
source_id = dma_frcd_source_id(data);
pasid_present = dma_frcd_pasid_present(data);
guest_addr = dmar_readq(iommu->reg + reg +
fault_index * PRIMARY_FAULT_REG_LEN);
guest_addr = readq(iommu->reg + reg +
fault_index * PRIMARY_FAULT_REG_LEN);
guest_addr = dma_frcd_page_addr(guest_addr);
}

View File

@ -697,7 +697,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
addr |= DMA_RTADDR_SMT;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
writeq(addr, iommu->reg + DMAR_RTADDR_REG);
writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
@ -765,11 +765,11 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
val |= DMA_CCMD_ICC;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
writeq(val, iommu->reg + DMAR_CCMD_REG);
/* Make sure hardware complete it */
IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
dmar_readq, (!(val & DMA_CCMD_ICC)), val);
readq, (!(val & DMA_CCMD_ICC)), val);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
@ -806,12 +806,12 @@ void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
raw_spin_lock_irqsave(&iommu->register_lock, flag);
/* Note: Only uses first TLB reg currently */
if (val_iva)
dmar_writeq(iommu->reg + tlb_offset, val_iva);
dmar_writeq(iommu->reg + tlb_offset + 8, val);
writeq(val_iva, iommu->reg + tlb_offset);
writeq(val, iommu->reg + tlb_offset + 8);
/* Make sure hardware complete it */
IOMMU_WAIT_OP(iommu, tlb_offset + 8,
dmar_readq, (!(val & DMA_TLB_IVT)), val);
readq, (!(val & DMA_TLB_IVT)), val);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
@ -1533,7 +1533,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
int bus, ret;
bool new_ext, ext;
rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
rtaddr_reg = readq(iommu->reg + DMAR_RTADDR_REG);
ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
new_ext = !!sm_supported(iommu);
@ -3212,7 +3212,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
@ -3220,6 +3219,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
return ecap_sc_support(info->iommu->ecap);
case IOMMU_CAP_DIRTY_TRACKING:
return ssads_supported(info->iommu);
case IOMMU_CAP_PCI_ATS_SUPPORTED:
return info->ats_supported;
default:
return false;
}
@ -3618,9 +3619,6 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
if (domain->dirty_ops)
return -EINVAL;
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
@ -3684,18 +3682,26 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length,
return vtd;
}
/*
* Set dirty tracking for the device list of a domain. The caller must
* hold the domain->lock when calling it.
*/
static int device_set_dirty_tracking(struct list_head *devices, bool enable)
/* Set dirty tracking for the devices that the domain has been attached. */
static int domain_set_dirty_tracking(struct dmar_domain *domain, bool enable)
{
struct device_domain_info *info;
struct dev_pasid_info *dev_pasid;
int ret = 0;
list_for_each_entry(info, devices, link) {
lockdep_assert_held(&domain->lock);
list_for_each_entry(info, &domain->devices, link) {
ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
IOMMU_NO_PASID, enable);
if (ret)
return ret;
}
list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
info = dev_iommu_priv_get(dev_pasid->dev);
ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev,
dev_pasid->pasid, enable);
if (ret)
break;
}
@ -3713,7 +3719,7 @@ static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
spin_lock(&domain->s1_lock);
list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
spin_lock_irqsave(&s1_domain->lock, flags);
ret = device_set_dirty_tracking(&s1_domain->devices, enable);
ret = domain_set_dirty_tracking(s1_domain, enable);
spin_unlock_irqrestore(&s1_domain->lock, flags);
if (ret)
goto err_unwind;
@ -3724,8 +3730,7 @@ static int parent_domain_set_dirty_tracking(struct dmar_domain *domain,
err_unwind:
list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
spin_lock_irqsave(&s1_domain->lock, flags);
device_set_dirty_tracking(&s1_domain->devices,
domain->dirty_tracking);
domain_set_dirty_tracking(s1_domain, domain->dirty_tracking);
spin_unlock_irqrestore(&s1_domain->lock, flags);
}
spin_unlock(&domain->s1_lock);
@ -3742,7 +3747,7 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
if (dmar_domain->dirty_tracking == enable)
goto out_unlock;
ret = device_set_dirty_tracking(&dmar_domain->devices, enable);
ret = domain_set_dirty_tracking(dmar_domain, enable);
if (ret)
goto err_unwind;
@ -3759,8 +3764,7 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
return 0;
err_unwind:
device_set_dirty_tracking(&dmar_domain->devices,
dmar_domain->dirty_tracking);
domain_set_dirty_tracking(dmar_domain, dmar_domain->dirty_tracking);
spin_unlock(&dmar_domain->lock);
return ret;
}
@ -4185,7 +4189,7 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
raw_spin_lock_irqsave(&iommu->register_lock, flags);
res = dmar_readq(iommu->reg + DMAR_ECRSP_REG);
res = readq(iommu->reg + DMAR_ECRSP_REG);
if (res & DMA_ECMD_ECRSP_IP) {
ret = -EBUSY;
goto err;
@ -4198,10 +4202,10 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob)
* - It's not invoked in any critical path. The extra MMIO
* write doesn't bring any performance concerns.
*/
dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob);
dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT));
writeq(ob, iommu->reg + DMAR_ECEO_REG);
writeq(ecmd | (oa << DMA_ECMD_OA_SHIFT), iommu->reg + DMAR_ECMD_REG);
IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq,
IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, readq,
!(res & DMA_ECMD_ECRSP_IP), res);
if (res & DMA_ECMD_ECRSP_IP) {

View File

@ -148,11 +148,6 @@
#define OFFSET_STRIDE (9)
#define dmar_readq(a) readq(a)
#define dmar_writeq(a,v) writeq(v,a)
#define dmar_readl(a) readl(a)
#define dmar_writel(a, v) writel(v, a)
#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
#define DMAR_VER_MINOR(v) ((v) & 0x0f)
@ -1082,31 +1077,26 @@ static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr,
desc->qw3 = 0;
}
/* PASID-selective IOTLB invalidation */
static inline void qi_desc_piotlb_all(u16 did, u32 pasid, struct qi_desc *desc)
{
desc->qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
desc->qw1 = 0;
}
/* Page-selective-within-PASID IOTLB invalidation */
static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr,
unsigned long npages, bool ih,
unsigned int size_order, bool ih,
struct qi_desc *desc)
{
if (npages == -1) {
desc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
desc->qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(npages));
unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
addr = ALIGN_DOWN(addr, align);
desc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
QI_EIOTLB_TYPE;
desc->qw1 = QI_EIOTLB_ADDR(addr) |
QI_EIOTLB_IH(ih) |
QI_EIOTLB_AM(mask);
}
/*
* calculate_psi_aligned_address() must be used for addr and size_order
*/
desc->qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
desc->qw1 = QI_EIOTLB_ADDR(addr) | QI_EIOTLB_IH(ih) |
QI_EIOTLB_AM(size_order);
}
static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid,
@ -1168,8 +1158,7 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask);
void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
unsigned long npages, bool ih);
void qi_flush_piotlb_all(struct intel_iommu *iommu, u16 did, u32 pasid);
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr,

View File

@ -422,7 +422,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
u64 irta;
/* Check whether the old ir-table has the same size as ours */
irta = dmar_readq(iommu->reg + DMAR_IRTA_REG);
irta = readq(iommu->reg + DMAR_IRTA_REG);
if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK)
!= INTR_REMAP_TABLE_REG_SIZE)
return -EINVAL;
@ -465,8 +465,8 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
raw_spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writeq(iommu->reg + DMAR_IRTA_REG,
(addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
writeq((addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE,
iommu->reg + DMAR_IRTA_REG);
/* Set interrupt-remapping table pointer */
writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG);

View File

@ -282,7 +282,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
qi_flush_piotlb_all(iommu, did, pasid);
else
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
@ -308,7 +308,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
if (cap_caching_mode(iommu->cap)) {
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
qi_flush_piotlb_all(iommu, did, pasid);
} else {
iommu_flush_write_buffer(iommu);
}
@ -342,7 +342,7 @@ static void intel_pasid_flush_present(struct intel_iommu *iommu,
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
*/
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
qi_flush_piotlb_all(iommu, did, pasid);
devtlb_invalidation_with_pasid(iommu, dev, pasid);
}

View File

@ -99,20 +99,20 @@ IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE);
#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \
{ \
if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \
dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
IOMMU_PMU_CFG_SIZE + \
(ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
writel(iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN, \
iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
IOMMU_PMU_CFG_SIZE + \
(ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET); \
} \
}
#define iommu_pmu_clear_filter(_filter, _idx) \
{ \
if (iommu_pmu->filter & _filter) { \
dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
IOMMU_PMU_CFG_SIZE + \
(ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \
0); \
writel(0, \
iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \
IOMMU_PMU_CFG_SIZE + \
(ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET); \
} \
}
@ -307,7 +307,7 @@ static void iommu_pmu_event_update(struct perf_event *event)
again:
prev_count = local64_read(&hwc->prev_count);
new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
new_count = readq(iommu_event_base(iommu_pmu, hwc->idx));
if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
goto again;
@ -340,7 +340,7 @@ static void iommu_pmu_start(struct perf_event *event, int flags)
hwc->state = 0;
/* Always reprogram the period */
count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
count = readq(iommu_event_base(iommu_pmu, hwc->idx));
local64_set((&hwc->prev_count), count);
/*
@ -411,7 +411,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
hwc->idx = idx;
/* config events */
dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
writeq(hwc->config, iommu_config_base(iommu_pmu, idx));
iommu_pmu_set_filter(requester_id, event->attr.config1,
IOMMU_PMU_FILTER_REQUESTER_ID, idx,
@ -496,7 +496,7 @@ static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
* Two counters may be overflowed very close. Always check
* whether there are more to handle.
*/
while ((status = dmar_readq(iommu_pmu->overflow))) {
while ((status = readq(iommu_pmu->overflow))) {
for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
/*
* Find the assigned event of the counter.
@ -510,7 +510,7 @@ static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
iommu_pmu_event_update(event);
}
dmar_writeq(iommu_pmu->overflow, status);
writeq(status, iommu_pmu->overflow);
}
}
@ -518,13 +518,13 @@ static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
{
struct intel_iommu *iommu = dev_id;
if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
if (!readl(iommu->reg + DMAR_PERFINTRSTS_REG))
return IRQ_NONE;
iommu_pmu_counter_overflow(iommu->pmu);
/* Clear the status bit */
dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
writel(DMA_PERFINTRSTS_PIS, iommu->reg + DMAR_PERFINTRSTS_REG);
return IRQ_HANDLED;
}
@ -555,7 +555,7 @@ static int __iommu_pmu_register(struct intel_iommu *iommu)
static inline void __iomem *
get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
{
u32 off = dmar_readl(iommu->reg + offset);
u32 off = readl(iommu->reg + offset);
return iommu->reg + off;
}
@ -574,7 +574,7 @@ int alloc_iommu_pmu(struct intel_iommu *iommu)
if (!cap_ecmds(iommu->cap))
return -ENODEV;
perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
perfcap = readq(iommu->reg + DMAR_PERFCAP_REG);
/* The performance monitoring is not supported. */
if (!perfcap)
return -ENODEV;
@ -617,8 +617,8 @@ int alloc_iommu_pmu(struct intel_iommu *iommu)
for (i = 0; i < iommu_pmu->num_eg; i++) {
u64 pcap;
pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
i * IOMMU_PMU_CAP_REGS_STEP);
pcap = readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
i * IOMMU_PMU_CAP_REGS_STEP);
iommu_pmu->evcap[i] = pecap_es(pcap);
}
@ -651,9 +651,9 @@ int alloc_iommu_pmu(struct intel_iommu *iommu)
* Width.
*/
for (i = 0; i < iommu_pmu->num_cntr; i++) {
cap = dmar_readl(iommu_pmu->cfg_reg +
i * IOMMU_PMU_CFG_OFFSET +
IOMMU_PMU_CFG_CNTRCAP_OFFSET);
cap = readl(iommu_pmu->cfg_reg +
i * IOMMU_PMU_CFG_OFFSET +
IOMMU_PMU_CFG_CNTRCAP_OFFSET);
if (!iommu_cntrcap_pcc(cap))
continue;
@ -675,9 +675,9 @@ int alloc_iommu_pmu(struct intel_iommu *iommu)
/* Override with per-counter event capabilities */
for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
(j * IOMMU_PMU_OFF_REGS_STEP));
cap = readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
(j * IOMMU_PMU_OFF_REGS_STEP));
iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
/*
* Some events may only be supported by a specific counter.

View File

@ -81,8 +81,8 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
*/
prq_retry:
reinit_completion(&iommu->prq_complete);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
struct page_req_dsc *req;
@ -113,7 +113,7 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
qi_desc_dev_iotlb(sid, info->pfsid, info->ats_qdep, 0,
MAX_AGAW_PFN_WIDTH, &desc[2]);
} else {
qi_desc_piotlb(did, pasid, 0, -1, 0, &desc[1]);
qi_desc_piotlb_all(did, pasid, &desc[1]);
qi_desc_dev_iotlb_pasid(sid, info->pfsid, pasid, info->ats_qdep,
0, MAX_AGAW_PFN_WIDTH, &desc[2]);
}
@ -208,8 +208,8 @@ static irqreturn_t prq_event_thread(int irq, void *d)
*/
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
handled = (head != tail);
while (head != tail) {
req = &iommu->prq[head / sizeof(*req)];
@ -259,7 +259,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
writeq(tail, iommu->reg + DMAR_PQH_REG);
/*
* Clear the page request overflow bit and wake up all threads that
@ -268,8 +268,8 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
iommu->name);
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
@ -325,9 +325,9 @@ int intel_iommu_enable_prq(struct intel_iommu *iommu)
iommu->name);
goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
writeq(0ULL, iommu->reg + DMAR_PQH_REG);
writeq(0ULL, iommu->reg + DMAR_PQT_REG);
writeq(virt_to_phys(iommu->prq) | PRQ_ORDER, iommu->reg + DMAR_PQA_REG);
init_completion(&iommu->prq_complete);
@ -348,9 +348,9 @@ int intel_iommu_enable_prq(struct intel_iommu *iommu)
int intel_iommu_finish_prq(struct intel_iommu *iommu)
{
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
writeq(0ULL, iommu->reg + DMAR_PQH_REG);
writeq(0ULL, iommu->reg + DMAR_PQT_REG);
writeq(0ULL, iommu->reg + DMAR_PQA_REG);
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);

View File

@ -132,8 +132,8 @@ DEFINE_EVENT(cache_tag_log, cache_tag_unassign,
DECLARE_EVENT_CLASS(cache_tag_flush,
TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
unsigned long addr, unsigned long pages, unsigned long mask),
TP_ARGS(tag, start, end, addr, pages, mask),
unsigned long addr, unsigned long mask),
TP_ARGS(tag, start, end, addr, mask),
TP_STRUCT__entry(
__string(iommu, tag->iommu->name)
__string(dev, dev_name(tag->dev))
@ -143,7 +143,6 @@ DECLARE_EVENT_CLASS(cache_tag_flush,
__field(unsigned long, start)
__field(unsigned long, end)
__field(unsigned long, addr)
__field(unsigned long, pages)
__field(unsigned long, mask)
),
TP_fast_assign(
@ -155,10 +154,9 @@ DECLARE_EVENT_CLASS(cache_tag_flush,
__entry->start = start;
__entry->end = end;
__entry->addr = addr;
__entry->pages = pages;
__entry->mask = mask;
),
TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx pages 0x%lx mask 0x%lx",
TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx mask 0x%lx",
__get_str(iommu), __get_str(dev), __entry->pasid,
__print_symbolic(__entry->type,
{ CACHE_TAG_IOTLB, "iotlb" },
@ -166,20 +164,20 @@ DECLARE_EVENT_CLASS(cache_tag_flush,
{ CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" },
{ CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }),
__entry->domain_id, __entry->start, __entry->end,
__entry->addr, __entry->pages, __entry->mask
__entry->addr, __entry->mask
)
);
DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range,
TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
unsigned long addr, unsigned long pages, unsigned long mask),
TP_ARGS(tag, start, end, addr, pages, mask)
unsigned long addr, unsigned long mask),
TP_ARGS(tag, start, end, addr, mask)
);
DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range_np,
TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
unsigned long addr, unsigned long pages, unsigned long mask),
TP_ARGS(tag, start, end, addr, pages, mask)
unsigned long addr, unsigned long mask),
TP_ARGS(tag, start, end, addr, mask)
);
#endif /* _TRACE_INTEL_IOMMU_H */

View File

@ -34,6 +34,7 @@
#include <linux/sched/mm.h>
#include <linux/msi.h>
#include <uapi/linux/iommufd.h>
#include <linux/generic_pt/iommu.h>
#include "dma-iommu.h"
#include "iommu-priv.h"
@ -2572,14 +2573,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
return pgsize;
}
int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,
unsigned long iova, phys_addr_t paddr,
size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
unsigned long orig_iova = iova;
unsigned int min_pagesz;
size_t orig_size = size;
phys_addr_t orig_paddr = paddr;
int ret = 0;
might_sleep_if(gfpflags_allow_blocking(gfp));
@ -2636,12 +2637,9 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
/* unroll mapping in case something went wrong */
if (ret) {
iommu_unmap(domain, orig_iova, orig_size - size);
} else {
trace_map(orig_iova, orig_paddr, orig_size);
iommu_debug_map(domain, orig_paddr, orig_size);
return ret;
}
return ret;
return 0;
}
int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
@ -2653,6 +2651,32 @@ int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
return ops->iotlb_sync_map(domain, iova, size);
}
int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct pt_iommu *pt = iommupt_from_domain(domain);
int ret;
if (pt) {
size_t mapped = 0;
ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp,
&mapped);
if (ret) {
iommu_unmap(domain, iova, mapped);
return ret;
}
return 0;
}
ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp);
if (!ret)
return ret;
trace_map(iova, paddr, size);
iommu_debug_map(domain, paddr, size);
return 0;
}
int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
@ -2670,13 +2694,12 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
}
EXPORT_SYMBOL_GPL(iommu_map);
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
static size_t
__iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova,
size_t size, struct iommu_iotlb_gather *iotlb_gather)
{
const struct iommu_domain_ops *ops = domain->ops;
size_t unmapped_page, unmapped = 0;
unsigned long orig_iova = iova;
unsigned int min_pagesz;
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
@ -2717,13 +2740,34 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
/*
* If the driver itself isn't using the gather, make sure
* it looks non-empty so iotlb_sync will still be called.
*/
if (iotlb_gather->start >= iotlb_gather->end)
iommu_iotlb_gather_add_range(iotlb_gather, iova, size);
iova += unmapped_page;
unmapped += unmapped_page;
}
trace_unmap(orig_iova, size, unmapped);
iommu_debug_unmap_end(domain, orig_iova, size, unmapped);
return unmapped;
}
static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size,
struct iommu_iotlb_gather *iotlb_gather)
{
struct pt_iommu *pt = iommupt_from_domain(domain);
size_t unmapped;
if (pt)
unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather);
else
unmapped = __iommu_unmap_domain_pgtbl(domain, iova, size,
iotlb_gather);
trace_unmap(iova, size, unmapped);
iommu_debug_unmap_end(domain, iova, size, unmapped);
return unmapped;
}

View File

@ -1624,6 +1624,10 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
/* Report when ATS cannot be used for this device */
if (!device_iommu_capable(idev->dev, IOMMU_CAP_PCI_ATS_SUPPORTED))
cmd->out_capabilities |= IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED;
cmd->out_max_pasid_log2 = 0;
/*
* Currently, all iommu drivers enable PASID in the probe_device()

View File

@ -611,7 +611,8 @@ static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
static void iova_magazine_free(struct iova_magazine *mag)
{
kmem_cache_free(iova_magazine_cache, mag);
if (mag)
kmem_cache_free(iova_magazine_cache, mag);
}
static void

View File

@ -3,9 +3,13 @@
config RISCV_IOMMU
bool "RISC-V IOMMU Support"
depends on RISCV && 64BIT
default y
default RISCV
depends on GENERIC_MSI_IRQ
depends on (RISCV || COMPILE_TEST) && 64BIT
select IOMMU_API
select GENERIC_PT
select IOMMU_PT
select IOMMU_PT_RISCV64
help
Support for implementations of the RISC-V IOMMU architecture that
complements the RISC-V MMU capabilities, providing similar address

View File

@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/bitfield.h>
#include <linux/bits.h>
#include <asm/page.h>
/*
* Chapter 5: Memory Mapped register interface
@ -718,7 +719,8 @@ static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
u64 addr)
{
cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, phys_to_pfn(addr));
cmd->dword1 =
FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, PHYS_PFN(addr));
cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
}

View File

@ -68,12 +68,7 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
iommu->irqs_count = platform_irq_count(pdev);
if (iommu->irqs_count <= 0)
return dev_err_probe(dev, -ENODEV,
"no IRQ resources provided\n");
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps);
switch (igs) {
@ -120,6 +115,16 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
fallthrough;
case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
ret = platform_irq_count(pdev);
if (ret <= 0)
return dev_err_probe(dev, -ENODEV,
"no IRQ resources provided\n");
iommu->irqs_count = ret;
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
for (vec = 0; vec < iommu->irqs_count; vec++)
iommu->irqs[vec] = platform_get_irq(pdev, vec);

View File

@ -21,6 +21,7 @@
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/generic_pt/iommu.h>
#include "../iommu-pages.h"
#include "iommu-bits.h"
@ -159,7 +160,7 @@ static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {
const size_t queue_size = entry_size << (logsz + 1);
queue->phys = pfn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
queue->phys = PFN_PHYS(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);
} else {
do {
@ -368,6 +369,8 @@ static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue,
unsigned int timeout_us)
{
unsigned int cons = atomic_read(&queue->head);
unsigned int flags = RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO |
RISCV_IOMMU_CQCSR_CMD_ILL;
/* Already processed by the consumer */
if ((int)(cons - index) > 0)
@ -375,6 +378,7 @@ static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue,
/* Monitor consumer index */
return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons,
(riscv_iommu_readl(queue->iommu, queue->qcr) & flags) ||
(int)(cons - index) > 0, 0, timeout_us);
}
@ -435,7 +439,9 @@ static unsigned int riscv_iommu_queue_send(struct riscv_iommu_queue *queue,
* 6. Make sure the doorbell write to the device has finished before updating
* the shadow tail index in normal memory. 'fence o, w'
*/
#ifdef CONFIG_MMIOWB
mmiowb();
#endif
atomic_inc(&queue->tail);
/* 7. Complete submission and restore local interrupts */
@ -806,15 +812,15 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
/* This struct contains protection domain specific IOMMU driver data. */
struct riscv_iommu_domain {
struct iommu_domain domain;
union {
struct iommu_domain domain;
struct pt_iommu_riscv_64 riscvpt;
};
struct list_head bonds;
spinlock_t lock; /* protect bonds list updates. */
int pscid;
bool amo_enabled;
int numa_node;
unsigned int pgd_mode;
unsigned long *pgd_root;
};
PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain);
#define iommu_domain_to_riscv(iommu_domain) \
container_of(iommu_domain, struct riscv_iommu_domain, domain)
@ -928,8 +934,6 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
struct riscv_iommu_bond *bond;
struct riscv_iommu_device *iommu, *prev;
struct riscv_iommu_command cmd;
unsigned long len = end - start + 1;
unsigned long iova;
/*
* For each IOMMU linked with this protection domain (via bonds->dev),
@ -972,11 +976,14 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
riscv_iommu_cmd_inval_vma(&cmd);
riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
if (len && len < RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
for (iova = start; iova < end; iova += PAGE_SIZE) {
if (end - start < RISCV_IOMMU_IOTLB_INVAL_LIMIT - 1) {
unsigned long iova = start;
do {
riscv_iommu_cmd_inval_set_addr(&cmd, iova);
riscv_iommu_cmd_send(iommu, &cmd);
}
} while (!check_add_overflow(iova, PAGE_SIZE, &iova) &&
iova < end);
} else {
riscv_iommu_cmd_send(iommu, &cmd);
}
@ -996,7 +1003,67 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
}
#define RISCV_IOMMU_FSC_BARE 0
/*
* This function sends IOTINVAL commands as required by the RISC-V
* IOMMU specification (Section 6.3.1 and 6.3.2 in 1.0 spec version)
* after modifying DDT or PDT entries
*/
static void riscv_iommu_iodir_iotinval(struct riscv_iommu_device *iommu,
bool inval_pdt, unsigned long iohgatp,
struct riscv_iommu_dc *dc,
struct riscv_iommu_pc *pc)
{
struct riscv_iommu_command cmd;
riscv_iommu_cmd_inval_vma(&cmd);
if (FIELD_GET(RISCV_IOMMU_DC_IOHGATP_MODE, iohgatp) ==
RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
if (inval_pdt) {
/*
* IOTINVAL.VMA with GV=AV=0, and PSCV=1, and
* PSCID=PC.PSCID
*/
riscv_iommu_cmd_inval_set_pscid(&cmd,
FIELD_GET(RISCV_IOMMU_PC_TA_PSCID, pc->ta));
} else {
if (!FIELD_GET(RISCV_IOMMU_DC_TC_PDTV, dc->tc) &&
FIELD_GET(RISCV_IOMMU_DC_FSC_MODE, dc->fsc) !=
RISCV_IOMMU_DC_FSC_MODE_BARE) {
/*
* DC.tc.PDTV == 0 && DC.fsc.MODE != Bare
* IOTINVAL.VMA with GV=AV=0, and PSCV=1, and
* PSCID=DC.ta.PSCID
*/
riscv_iommu_cmd_inval_set_pscid(&cmd,
FIELD_GET(RISCV_IOMMU_DC_TA_PSCID, dc->ta));
}
/* else: IOTINVAL.VMA with GV=AV=PSCV=0 */
}
} else {
riscv_iommu_cmd_inval_set_gscid(&cmd,
FIELD_GET(RISCV_IOMMU_DC_IOHGATP_GSCID, iohgatp));
if (inval_pdt) {
/*
* IOTINVAL.VMA with GV=1, AV=0, and PSCV=1, and
* GSCID=DC.iohgatp.GSCID, PSCID=PC.PSCID
*/
riscv_iommu_cmd_inval_set_pscid(&cmd,
FIELD_GET(RISCV_IOMMU_PC_TA_PSCID, pc->ta));
}
/*
* else: IOTINVAL.VMA with GV=1,AV=PSCV=0,and
* GSCID=DC.iohgatp.GSCID
*
* IOTINVAL.GVMA with GV=1,AV=0,and
* GSCID=DC.iohgatp.GSCID
* TODO: For now, the Second-Stage feature have not yet been merged,
* also issue IOTINVAL.GVMA once second-stage support is merged.
*/
}
riscv_iommu_cmd_send(iommu, &cmd);
}
/*
* Update IODIR for the device.
*
@ -1031,6 +1098,11 @@ static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
riscv_iommu_cmd_iodir_inval_ddt(&cmd);
riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);
riscv_iommu_cmd_send(iommu, &cmd);
/*
* For now, the SVA and PASID features have not yet been merged, the
* default configuration is inval_pdt=false and pc=NULL.
*/
riscv_iommu_iodir_iotinval(iommu, false, dc->iohgatp, dc, NULL);
sync_required = true;
}
@ -1056,6 +1128,11 @@ static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
riscv_iommu_cmd_iodir_inval_ddt(&cmd);
riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);
riscv_iommu_cmd_send(iommu, &cmd);
/*
* For now, the SVA and PASID features have not yet been merged, the
* default configuration is inval_pdt=false and pc=NULL.
*/
riscv_iommu_iodir_iotinval(iommu, false, dc->iohgatp, dc, NULL);
}
riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
@ -1077,158 +1154,9 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
}
#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
#define _io_pte_none(pte) ((pte) == 0)
#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
unsigned long pte,
struct iommu_pages_list *freelist)
{
unsigned long *ptr;
int i;
if (!_io_pte_present(pte) || _io_pte_leaf(pte))
return;
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
/* Recursively free all sub page table pages */
for (i = 0; i < PTRS_PER_PTE; i++) {
pte = READ_ONCE(ptr[i]);
if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
riscv_iommu_pte_free(domain, pte, freelist);
}
if (freelist)
iommu_pages_list_add(freelist, ptr);
else
iommu_free_pages(ptr);
}
static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
unsigned long iova, size_t pgsize,
gfp_t gfp)
{
unsigned long *ptr = domain->pgd_root;
unsigned long pte, old;
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
void *addr;
do {
const int shift = PAGE_SHIFT + PT_SHIFT * level;
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
/*
* Note: returned entry might be a non-leaf if there was
* existing mapping with smaller granularity. Up to the caller
* to replace and invalidate.
*/
if (((size_t)1 << shift) == pgsize)
return ptr;
pte_retry:
pte = READ_ONCE(*ptr);
/*
* This is very likely incorrect as we should not be adding
* new mapping with smaller granularity on top
* of existing 2M/1G mapping. Fail.
*/
if (_io_pte_present(pte) && _io_pte_leaf(pte))
return NULL;
/*
* Non-leaf entry is missing, allocate and try to add to the
* page table. This might race with other mappings, retry.
*/
if (_io_pte_none(pte)) {
addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
SZ_4K);
if (!addr)
return NULL;
old = pte;
pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
if (cmpxchg_relaxed(ptr, old, pte) != old) {
iommu_free_pages(addr);
goto pte_retry;
}
}
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
} while (level-- > 0);
return NULL;
}
static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
unsigned long iova, size_t *pte_pgsize)
{
unsigned long *ptr = domain->pgd_root;
unsigned long pte;
int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
do {
const int shift = PAGE_SHIFT + PT_SHIFT * level;
ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
pte = READ_ONCE(*ptr);
if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
*pte_pgsize = (size_t)1 << shift;
return ptr;
}
if (_io_pte_none(pte))
return NULL;
ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
} while (level-- > 0);
return NULL;
}
static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
unsigned long iova, phys_addr_t phys,
size_t pgsize, size_t pgcount, int prot,
gfp_t gfp, size_t *mapped)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t size = 0;
unsigned long *ptr;
unsigned long pte, old, pte_prot;
int rc = 0;
struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
if (!(prot & IOMMU_WRITE))
pte_prot = _PAGE_BASE | _PAGE_READ;
else if (domain->amo_enabled)
pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE;
else
pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
while (pgcount) {
ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);
if (!ptr) {
rc = -ENOMEM;
break;
}
old = READ_ONCE(*ptr);
pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
if (cmpxchg_relaxed(ptr, old, pte) != old)
continue;
riscv_iommu_pte_free(domain, old, &freelist);
size += pgsize;
iova += pgsize;
phys += pgsize;
--pgcount;
}
*mapped = size;
if (!iommu_pages_list_empty(&freelist)) {
if (iommu_pages_list_empty(&gather->freelist)) {
riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
} else {
/*
* In 1.0 spec version, the smallest scope we can use to
* invalidate all levels of page table (i.e. leaf and non-leaf)
@ -1237,71 +1165,20 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
* capability.NL (non-leaf) IOTINVAL command.
*/
riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
iommu_put_pages_list(&freelist);
iommu_put_pages_list(&gather->freelist);
}
return rc;
}
static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
unsigned long iova, size_t pgsize,
size_t pgcount,
struct iommu_iotlb_gather *gather)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t size = pgcount << __ffs(pgsize);
unsigned long *ptr, old;
size_t unmapped = 0;
size_t pte_size;
while (unmapped < size) {
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
if (!ptr)
return unmapped;
/* partial unmap is not allowed, fail. */
if (iova & (pte_size - 1))
return unmapped;
old = READ_ONCE(*ptr);
if (cmpxchg_relaxed(ptr, old, 0) != old)
continue;
iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
pte_size);
iova += pte_size;
unmapped += pte_size;
}
return unmapped;
}
static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
dma_addr_t iova)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
size_t pte_size;
unsigned long *ptr;
ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
if (!ptr)
return 0;
return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
}
static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
{
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
const unsigned long pfn = virt_to_pfn(domain->pgd_root);
WARN_ON(!list_empty(&domain->bonds));
if ((int)domain->pscid > 0)
ida_free(&riscv_iommu_pscids, domain->pscid);
riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
pt_iommu_deinit(&domain->riscvpt.iommu);
kfree(domain);
}
@ -1327,13 +1204,16 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
struct riscv_iommu_device *iommu = dev_to_iommu(dev);
struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
struct pt_iommu_riscv_64_hw_info pt_info;
u64 fsc, ta;
if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info);
if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode))
return -ENODEV;
fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) |
FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn);
ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
RISCV_IOMMU_PC_TA_V;
@ -1348,37 +1228,32 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
}
static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
IOMMU_PT_DOMAIN_OPS(riscv_64),
.attach_dev = riscv_iommu_attach_paging_domain,
.free = riscv_iommu_free_paging_domain,
.map_pages = riscv_iommu_map_pages,
.unmap_pages = riscv_iommu_unmap_pages,
.iova_to_phys = riscv_iommu_iova_to_phys,
.iotlb_sync = riscv_iommu_iotlb_sync,
.flush_iotlb_all = riscv_iommu_iotlb_flush_all,
};
static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
{
struct pt_iommu_riscv_64_cfg cfg = {};
struct riscv_iommu_domain *domain;
struct riscv_iommu_device *iommu;
unsigned int pgd_mode;
dma_addr_t va_mask;
int va_bits;
int ret;
iommu = dev_to_iommu(dev);
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;
va_bits = 57;
cfg.common.hw_max_vasz_lg2 = 57;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;
va_bits = 48;
cfg.common.hw_max_vasz_lg2 = 48;
} else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {
pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;
va_bits = 39;
cfg.common.hw_max_vasz_lg2 = 39;
} else {
dev_err(dev, "cannot find supported page table mode\n");
return ERR_PTR(-ENODEV);
}
cfg.common.hw_max_oasz_lg2 = 56;
domain = kzalloc_obj(*domain);
if (!domain)
@ -1386,43 +1261,28 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
INIT_LIST_HEAD_RCU(&domain->bonds);
spin_lock_init(&domain->lock);
domain->numa_node = dev_to_node(iommu->dev);
domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD);
domain->pgd_mode = pgd_mode;
domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
GFP_KERNEL_ACCOUNT, SZ_4K);
if (!domain->pgd_root) {
kfree(domain);
return ERR_PTR(-ENOMEM);
}
/*
* 6.4 IOMMU capabilities [..] IOMMU implementations must support the
* Svnapot standard extension for NAPOT Translation Contiguity.
*/
cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
BIT(PT_FEAT_FLUSH_RANGE) |
BIT(PT_FEAT_RISCV_SVNAPOT_64K);
domain->riscvpt.iommu.nid = dev_to_node(iommu->dev);
domain->domain.ops = &riscv_iommu_paging_domain_ops;
domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
if (domain->pscid < 0) {
iommu_free_pages(domain->pgd_root);
kfree(domain);
riscv_iommu_free_paging_domain(&domain->domain);
return ERR_PTR(-ENOMEM);
}
/*
* Note: RISC-V Privilege spec mandates that virtual addresses
* need to be sign-extended, so if (VA_BITS - 1) is set, all
* bits >= VA_BITS need to also be set or else we'll get a
* page fault. However the code that creates the mappings
* above us (e.g. iommu_dma_alloc_iova()) won't do that for us
* for now, so we'll end up with invalid virtual addresses
* to map. As a workaround until we get this sorted out
* limit the available virtual addresses to VA_BITS - 1.
*/
va_mask = DMA_BIT_MASK(va_bits - 1);
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = va_mask;
domain->domain.geometry.force_aperture = true;
domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
domain->domain.ops = &riscv_iommu_paging_domain_ops;
ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL);
if (ret) {
riscv_iommu_free_paging_domain(&domain->domain);
return ERR_PTR(ret);
}
return &domain->domain;
}
@ -1512,8 +1372,6 @@ static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
* the device directory. Do not mark the context valid yet.
*/
tc = 0;
if (iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD)
tc |= RISCV_IOMMU_DC_TC_SADE;
for (i = 0; i < fwspec->num_ids; i++) {
dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);
if (!dc) {
@ -1680,3 +1538,5 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
riscv_iommu_queue_disable(&iommu->cmdq);
return rc;
}
MODULE_IMPORT_NS("GENERIC_PT_IOMMU");

View File

@ -175,6 +175,22 @@ enum {
PT_FEAT_VTDSS_FORCE_WRITEABLE,
};
struct pt_riscv_32 {
struct pt_common common;
};
struct pt_riscv_64 {
struct pt_common common;
};
enum {
/*
* Support the 64k contiguous page size following the Svnapot extension.
*/
PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START,
};
struct pt_x86_64 {
struct pt_common common;
};

View File

@ -66,6 +66,13 @@ struct pt_iommu {
struct device *iommu_device;
};
static inline struct pt_iommu *iommupt_from_domain(struct iommu_domain *domain)
{
if (!IS_ENABLED(CONFIG_IOMMU_PT) || !domain->is_iommupt)
return NULL;
return container_of(domain, struct pt_iommu, domain);
}
/**
* struct pt_iommu_info - Details about the IOMMU page table
*
@ -80,6 +87,56 @@ struct pt_iommu_info {
};
struct pt_iommu_ops {
/**
* @map_range: Install translation for an IOVA range
* @iommu_table: Table to manipulate
* @iova: IO virtual address to start
* @paddr: Physical/Output address to start
* @len: Length of the range starting from @iova
* @prot: A bitmap of IOMMU_READ/WRITE/CACHE/NOEXEC/MMIO
* @gfp: GFP flags for any memory allocations
*
* The range starting at IOVA will have paddr installed into it. The
* rage is automatically segmented into optimally sized table entries,
* and can have any valid alignment.
*
* On error the caller will probably want to invoke unmap on the range
* from iova up to the amount indicated by @mapped to return the table
* back to an unchanged state.
*
* Context: The caller must hold a write range lock that includes
* the whole range.
*
* Returns: -ERRNO on failure, 0 on success. The number of bytes of VA
* that were mapped are added to @mapped, @mapped is not zerod first.
*/
int (*map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
phys_addr_t paddr, dma_addr_t len, unsigned int prot,
gfp_t gfp, size_t *mapped);
/**
* @unmap_range: Make a range of IOVA empty/not present
* @iommu_table: Table to manipulate
* @iova: IO virtual address to start
* @len: Length of the range starting from @iova
* @iotlb_gather: Gather struct that must be flushed on return
*
* unmap_range() will remove a translation created by map_range(). It
* cannot subdivide a mapping created by map_range(), so it should be
* called with IOVA ranges that match those passed to map_pages. The
* IOVA range can aggregate contiguous map_range() calls so long as no
* individual range is split.
*
* Context: The caller must hold a write range lock that includes
* the whole range.
*
* Returns: Number of bytes of VA unmapped. iova + res will be the
* point unmapping stopped.
*/
size_t (*unmap_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
dma_addr_t len,
struct iommu_iotlb_gather *iotlb_gather);
/**
* @set_dirty: Make the iova write dirty
* @iommu_table: Table to manipulate
@ -194,14 +251,6 @@ struct pt_iommu_cfg {
#define IOMMU_PROTOTYPES(fmt) \
phys_addr_t pt_iommu_##fmt##_iova_to_phys(struct iommu_domain *domain, \
dma_addr_t iova); \
int pt_iommu_##fmt##_map_pages(struct iommu_domain *domain, \
unsigned long iova, phys_addr_t paddr, \
size_t pgsize, size_t pgcount, \
int prot, gfp_t gfp, size_t *mapped); \
size_t pt_iommu_##fmt##_unmap_pages( \
struct iommu_domain *domain, unsigned long iova, \
size_t pgsize, size_t pgcount, \
struct iommu_iotlb_gather *iotlb_gather); \
int pt_iommu_##fmt##_read_and_clear_dirty( \
struct iommu_domain *domain, unsigned long iova, size_t size, \
unsigned long flags, struct iommu_dirty_bitmap *dirty); \
@ -222,9 +271,7 @@ struct pt_iommu_cfg {
* iommu_pt
*/
#define IOMMU_PT_DOMAIN_OPS(fmt) \
.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys, \
.map_pages = &pt_iommu_##fmt##_map_pages, \
.unmap_pages = &pt_iommu_##fmt##_unmap_pages
.iova_to_phys = &pt_iommu_##fmt##_iova_to_phys
#define IOMMU_PT_DIRTY_OPS(fmt) \
.read_and_clear_dirty = &pt_iommu_##fmt##_read_and_clear_dirty
@ -275,6 +322,17 @@ struct pt_iommu_vtdss_hw_info {
IOMMU_FORMAT(vtdss, vtdss_pt);
struct pt_iommu_riscv_64_cfg {
struct pt_iommu_cfg common;
};
struct pt_iommu_riscv_64_hw_info {
u64 ppn;
u8 fsc_iosatp_mode;
};
IOMMU_FORMAT(riscv_64, riscv_64pt);
struct pt_iommu_x86_64_cfg {
struct pt_iommu_cfg common;
/* 4 is a 57 bit 5 level table */

View File

@ -223,6 +223,7 @@ enum iommu_domain_cookie_type {
struct iommu_domain {
unsigned type;
enum iommu_domain_cookie_type cookie_type;
bool is_iommupt;
const struct iommu_domain_ops *ops;
const struct iommu_dirty_ops *dirty_ops;
const struct iommu_ops *owner; /* Whose domain_alloc we came from */
@ -271,6 +272,8 @@ enum iommu_cap {
*/
IOMMU_CAP_DEFERRED_FLUSH,
IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */
/* ATS is supported and may be enabled for this device */
IOMMU_CAP_PCI_ATS_SUPPORTED,
};
/* These are the possible reserved region types */
@ -980,7 +983,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain)
static inline void iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather)
{
if (domain->ops->iotlb_sync)
if (domain->ops->iotlb_sync &&
likely(iotlb_gather->start < iotlb_gather->end))
domain->ops->iotlb_sync(domain, iotlb_gather);
iommu_iotlb_gather_init(iotlb_gather);

View File

@ -695,11 +695,15 @@ enum iommu_hw_info_type {
* @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
* when the struct
* iommu_hw_info::out_max_pasid_log2 is zero.
* @IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED: ATS is not supported or cannot be used
* on this device (absence implies ATS
* may be enabled)
*/
enum iommufd_hw_capabilities {
IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
IOMMU_HW_CAP_PCI_ATS_NOT_SUPPORTED = 1 << 3,
};
/**
@ -1052,6 +1056,11 @@ struct iommu_fault_alloc {
enum iommu_viommu_type {
IOMMU_VIOMMU_TYPE_DEFAULT = 0,
IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
/*
* TEGRA241_CMDQV requirements (otherwise, VCMDQs will not work)
* - Kernel will allocate a VINTF (HYP_OWN=0) to back this VIOMMU. So,
* VMM must wire the HYP_OWN bit to 0 in guest VINTF_CONFIG register
*/
IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
};

View File

@ -2275,6 +2275,33 @@ TEST_F(iommufd_dirty_tracking, set_dirty_tracking)
test_ioctl_destroy(hwpt_id);
}
TEST_F(iommufd_dirty_tracking, pasid_set_dirty_tracking)
{
uint32_t stddev_id, ioas_id, hwpt_id, pasid = 100;
uint32_t dev_flags = MOCK_FLAGS_DEVICE_PASID;
/* Regular case */
test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
IOMMU_HWPT_ALLOC_PASID | IOMMU_HWPT_ALLOC_DIRTY_TRACKING,
&hwpt_id);
test_cmd_mock_domain_flags(hwpt_id, dev_flags, &stddev_id, NULL, NULL);
ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, stddev_id, pasid, hwpt_id));
test_cmd_set_dirty_tracking(hwpt_id, true);
test_cmd_set_dirty_tracking(hwpt_id, false);
ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, stddev_id, pasid));
test_ioctl_destroy(stddev_id);
/* IOMMU device does not support dirty tracking */
dev_flags |= MOCK_FLAGS_DEVICE_NO_DIRTY;
test_ioctl_ioas_alloc(&ioas_id);
test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, NULL, NULL);
EXPECT_ERRNO(EINVAL, _test_cmd_pasid_attach(self->fd, stddev_id, pasid, hwpt_id));
test_ioctl_destroy(stddev_id);
test_ioctl_destroy(hwpt_id);
}
TEST_F(iommufd_dirty_tracking, device_dirty_capability)
{
uint32_t caps = 0;