diff --git a/drivers/iommu/riscv/Kconfig b/drivers/iommu/riscv/Kconfig index c071816f59a6..a329ec634cf1 100644 --- a/drivers/iommu/riscv/Kconfig +++ b/drivers/iommu/riscv/Kconfig @@ -6,6 +6,9 @@ config RISCV_IOMMU depends on RISCV && 64BIT default y select IOMMU_API + select GENERIC_PT + select IOMMU_PT + select IOMMU_PT_RISCV64 help Support for implementations of the RISC-V IOMMU architecture that complements the RISC-V MMU capabilities, providing similar address diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 0b2903372470..6ceca9bed917 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "../iommu-pages.h" #include "iommu-bits.h" @@ -806,14 +807,15 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu, /* This struct contains protection domain specific IOMMU driver data. */ struct riscv_iommu_domain { - struct iommu_domain domain; + union { + struct iommu_domain domain; + struct pt_iommu_riscv_64 riscvpt; + }; struct list_head bonds; spinlock_t lock; /* protect bonds list updates. */ int pscid; - int numa_node; - unsigned int pgd_mode; - unsigned long *pgd_root; }; +PT_IOMMU_CHECK_DOMAIN(struct riscv_iommu_domain, riscvpt.iommu, domain); #define iommu_domain_to_riscv(iommu_domain) \ container_of(iommu_domain, struct riscv_iommu_domain, domain) @@ -1076,156 +1078,9 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain, { struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - riscv_iommu_iotlb_inval(domain, gather->start, gather->end); -} - -#define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t))) - -#define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) -#define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF) -#define _io_pte_none(pte) ((pte) == 0) -#define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot)) - -static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain, - unsigned long pte, - struct iommu_pages_list *freelist) -{ - unsigned long *ptr; - int i; - - if (!_io_pte_present(pte) || _io_pte_leaf(pte)) - return; - - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); - - /* Recursively free all sub page table pages */ - for (i = 0; i < PTRS_PER_PTE; i++) { - pte = READ_ONCE(ptr[i]); - if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte) - riscv_iommu_pte_free(domain, pte, freelist); - } - - if (freelist) - iommu_pages_list_add(freelist, ptr); - else - iommu_free_pages(ptr); -} - -static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain, - unsigned long iova, size_t pgsize, - gfp_t gfp) -{ - unsigned long *ptr = domain->pgd_root; - unsigned long pte, old; - int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; - void *addr; - - do { - const int shift = PAGE_SHIFT + PT_SHIFT * level; - - ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); - /* - * Note: returned entry might be a non-leaf if there was - * existing mapping with smaller granularity. Up to the caller - * to replace and invalidate. - */ - if (((size_t)1 << shift) == pgsize) - return ptr; -pte_retry: - pte = READ_ONCE(*ptr); - /* - * This is very likely incorrect as we should not be adding - * new mapping with smaller granularity on top - * of existing 2M/1G mapping. Fail. - */ - if (_io_pte_present(pte) && _io_pte_leaf(pte)) - return NULL; - /* - * Non-leaf entry is missing, allocate and try to add to the - * page table. This might race with other mappings, retry. - */ - if (_io_pte_none(pte)) { - addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp, - SZ_4K); - if (!addr) - return NULL; - old = pte; - pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE); - if (cmpxchg_relaxed(ptr, old, pte) != old) { - iommu_free_pages(addr); - goto pte_retry; - } - } - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); - } while (level-- > 0); - - return NULL; -} - -static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain, - unsigned long iova, size_t *pte_pgsize) -{ - unsigned long *ptr = domain->pgd_root; - unsigned long pte; - int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2; - - do { - const int shift = PAGE_SHIFT + PT_SHIFT * level; - - ptr += ((iova >> shift) & (PTRS_PER_PTE - 1)); - pte = READ_ONCE(*ptr); - if (_io_pte_present(pte) && _io_pte_leaf(pte)) { - *pte_pgsize = (size_t)1 << shift; - return ptr; - } - if (_io_pte_none(pte)) - return NULL; - ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte)); - } while (level-- > 0); - - return NULL; -} - -static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, - unsigned long iova, phys_addr_t phys, - size_t pgsize, size_t pgcount, int prot, - gfp_t gfp, size_t *mapped) -{ - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - size_t size = 0; - unsigned long *ptr; - unsigned long pte, old, pte_prot; - int rc = 0; - struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); - - if (!(prot & IOMMU_WRITE)) - pte_prot = _PAGE_BASE | _PAGE_READ; - else - pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY; - - while (pgcount) { - ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp); - if (!ptr) { - rc = -ENOMEM; - break; - } - - old = READ_ONCE(*ptr); - pte = _io_pte_entry(phys_to_pfn(phys), pte_prot); - if (cmpxchg_relaxed(ptr, old, pte) != old) - continue; - - riscv_iommu_pte_free(domain, old, &freelist); - - size += pgsize; - iova += pgsize; - phys += pgsize; - --pgcount; - } - - *mapped = size; - - if (!iommu_pages_list_empty(&freelist)) { + if (iommu_pages_list_empty(&gather->freelist)) { + riscv_iommu_iotlb_inval(domain, gather->start, gather->end); + } else { /* * In 1.0 spec version, the smallest scope we can use to * invalidate all levels of page table (i.e. leaf and non-leaf) @@ -1234,71 +1089,20 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, * capability.NL (non-leaf) IOTINVAL command. */ riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX); - iommu_put_pages_list(&freelist); + iommu_put_pages_list(&gather->freelist); } - - return rc; -} - -static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain, - unsigned long iova, size_t pgsize, - size_t pgcount, - struct iommu_iotlb_gather *gather) -{ - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - size_t size = pgcount << __ffs(pgsize); - unsigned long *ptr, old; - size_t unmapped = 0; - size_t pte_size; - - while (unmapped < size) { - ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); - if (!ptr) - return unmapped; - - /* partial unmap is not allowed, fail. */ - if (iova & (pte_size - 1)) - return unmapped; - - old = READ_ONCE(*ptr); - if (cmpxchg_relaxed(ptr, old, 0) != old) - continue; - - iommu_iotlb_gather_add_page(&domain->domain, gather, iova, - pte_size); - - iova += pte_size; - unmapped += pte_size; - } - - return unmapped; -} - -static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain, - dma_addr_t iova) -{ - struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - size_t pte_size; - unsigned long *ptr; - - ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size); - if (!ptr) - return 0; - - return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1)); } static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain) { struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); - const unsigned long pfn = virt_to_pfn(domain->pgd_root); WARN_ON(!list_empty(&domain->bonds)); if ((int)domain->pscid > 0) ida_free(&riscv_iommu_pscids, domain->pscid); - riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL); + pt_iommu_deinit(&domain->riscvpt.iommu); kfree(domain); } @@ -1324,13 +1128,16 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain, struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain); struct riscv_iommu_device *iommu = dev_to_iommu(dev); struct riscv_iommu_info *info = dev_iommu_priv_get(dev); + struct pt_iommu_riscv_64_hw_info pt_info; u64 fsc, ta; - if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode)) + pt_iommu_riscv_64_hw_info(&domain->riscvpt, &pt_info); + + if (!riscv_iommu_pt_supported(iommu, pt_info.fsc_iosatp_mode)) return -ENODEV; - fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) | - FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root)); + fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, pt_info.fsc_iosatp_mode) | + FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, pt_info.ppn); ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) | RISCV_IOMMU_PC_TA_V; @@ -1345,37 +1152,32 @@ static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain, } static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = { + IOMMU_PT_DOMAIN_OPS(riscv_64), .attach_dev = riscv_iommu_attach_paging_domain, .free = riscv_iommu_free_paging_domain, - .map_pages = riscv_iommu_map_pages, - .unmap_pages = riscv_iommu_unmap_pages, - .iova_to_phys = riscv_iommu_iova_to_phys, .iotlb_sync = riscv_iommu_iotlb_sync, .flush_iotlb_all = riscv_iommu_iotlb_flush_all, }; static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) { + struct pt_iommu_riscv_64_cfg cfg = {}; struct riscv_iommu_domain *domain; struct riscv_iommu_device *iommu; - unsigned int pgd_mode; - dma_addr_t va_mask; - int va_bits; + int ret; iommu = dev_to_iommu(dev); if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) { - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57; - va_bits = 57; + cfg.common.hw_max_vasz_lg2 = 57; } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) { - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48; - va_bits = 48; + cfg.common.hw_max_vasz_lg2 = 48; } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) { - pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39; - va_bits = 39; + cfg.common.hw_max_vasz_lg2 = 39; } else { dev_err(dev, "cannot find supported page table mode\n"); return ERR_PTR(-ENODEV); } + cfg.common.hw_max_oasz_lg2 = 56; domain = kzalloc_obj(*domain); if (!domain) @@ -1383,42 +1185,23 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) INIT_LIST_HEAD_RCU(&domain->bonds); spin_lock_init(&domain->lock); - domain->numa_node = dev_to_node(iommu->dev); - domain->pgd_mode = pgd_mode; - domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node, - GFP_KERNEL_ACCOUNT, SZ_4K); - if (!domain->pgd_root) { - kfree(domain); - return ERR_PTR(-ENOMEM); - } + cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) | + BIT(PT_FEAT_FLUSH_RANGE); + domain->riscvpt.iommu.nid = dev_to_node(iommu->dev); + domain->domain.ops = &riscv_iommu_paging_domain_ops; domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1, RISCV_IOMMU_MAX_PSCID, GFP_KERNEL); if (domain->pscid < 0) { - iommu_free_pages(domain->pgd_root); - kfree(domain); + riscv_iommu_free_paging_domain(&domain->domain); return ERR_PTR(-ENOMEM); } - /* - * Note: RISC-V Privilege spec mandates that virtual addresses - * need to be sign-extended, so if (VA_BITS - 1) is set, all - * bits >= VA_BITS need to also be set or else we'll get a - * page fault. However the code that creates the mappings - * above us (e.g. iommu_dma_alloc_iova()) won't do that for us - * for now, so we'll end up with invalid virtual addresses - * to map. As a workaround until we get this sorted out - * limit the available virtual addresses to VA_BITS - 1. - */ - va_mask = DMA_BIT_MASK(va_bits - 1); - - domain->domain.geometry.aperture_start = 0; - domain->domain.geometry.aperture_end = va_mask; - domain->domain.geometry.force_aperture = true; - domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G); - - domain->domain.ops = &riscv_iommu_paging_domain_ops; - + ret = pt_iommu_riscv_64_init(&domain->riscvpt, &cfg, GFP_KERNEL); + if (ret) { + riscv_iommu_free_paging_domain(&domain->domain); + return ERR_PTR(ret); + } return &domain->domain; } @@ -1674,3 +1457,5 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu) riscv_iommu_queue_disable(&iommu->cmdq); return rc; } + +MODULE_IMPORT_NS("GENERIC_PT_IOMMU");