From 3f29f6537f54d74e64bac0a390fb2e26da25800d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:46 +0200 Subject: [PATCH 01/54] s390/uv: Don't call folio_wait_writeback() without a folio reference folio_wait_writeback() requires that no spinlocks are held and that a folio reference is held, as documented. After we dropped the PTL, the folio could get freed concurrently. So grab a temporary reference. Fixes: 214d9bbcd3a6 ("s390/mm: provide memory management functions for protected KVM guests") Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-2-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/uv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 265fea37e030..016993e9eb72 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -318,6 +318,13 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) rc = make_folio_secure(folio, uvcb); folio_unlock(folio); } + + /* + * Once we drop the PTL, the folio may get unmapped and + * freed immediately. We need a temporary reference. + */ + if (rc == -EAGAIN) + folio_get(folio); } unlock: pte_unmap_unlock(ptep, ptelock); @@ -330,6 +337,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) * completion, this is just a useless check, but it is safe. */ folio_wait_writeback(folio); + folio_put(folio); } else if (rc == -EBUSY) { /* * If we have tried a local drain and the folio refcount From 68ad4743beaa7187eb026f1f9bec8848ecf5ed43 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:47 +0200 Subject: [PATCH 02/54] s390/uv: gmap_make_secure() cleanups for further changes Let's factor out handling of LRU cache draining and convert the if-else chain to a switch-case. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-3-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/uv.c | 66 ++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 016993e9eb72..25fe28d189df 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -266,6 +266,36 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str return atomic_read(&mm->context.protected_count) > 1; } +/* + * Drain LRU caches: the local one on first invocation and the ones of all + * CPUs on successive invocations. Returns "true" on the first invocation. + */ +static bool drain_lru(bool *drain_lru_called) +{ + /* + * If we have tried a local drain and the folio refcount + * still does not match our expected safe value, try with a + * system wide drain. This is needed if the pagevecs holding + * the page are on a different CPU. + */ + if (*drain_lru_called) { + lru_add_drain_all(); + /* We give up here, don't retry immediately. */ + return false; + } + /* + * We are here if the folio refcount does not match the + * expected safe value. The main culprits are usually + * pagevecs. With lru_add_drain() we drain the pagevecs + * on the local CPU so that hopefully the refcount will + * reach the expected safe value. + */ + lru_add_drain(); + *drain_lru_called = true; + /* The caller should try again immediately */ + return true; +} + /* * Requests the Ultravisor to make a page accessible to a guest. * If it's brought in the first time, it will be cleared. If @@ -275,7 +305,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) { struct vm_area_struct *vma; - bool local_drain = false; + bool drain_lru_called = false; spinlock_t *ptelock; unsigned long uaddr; struct folio *folio; @@ -331,37 +361,21 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) out: mmap_read_unlock(gmap->mm); - if (rc == -EAGAIN) { + switch (rc) { + case -EAGAIN: /* * If we are here because the UVC returned busy or partial * completion, this is just a useless check, but it is safe. */ folio_wait_writeback(folio); folio_put(folio); - } else if (rc == -EBUSY) { - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (local_drain) { - lru_add_drain_all(); - /* We give up here, and let the caller try again */ - return -EAGAIN; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - local_drain = true; - /* And now we try again immediately after draining */ - goto again; - } else if (rc == -ENXIO) { + return -EAGAIN; + case -EBUSY: + /* Additional folio references. */ + if (drain_lru(&drain_lru_called)) + goto again; + return -EAGAIN; + case -ENXIO: if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) return -EFAULT; return -EAGAIN; From eef88fe45ac9783e0dd108b033c1e933c38cab34 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:48 +0200 Subject: [PATCH 03/54] s390/uv: Split large folios in gmap_make_secure() While s390x makes sure to never have PMD-mapped THP in processes that use KVM -- by remapping them using PTEs in thp_split_walk_pmd_entry()->split_huge_pmd() -- there is still the possibility of having PTE-mapped THPs (large folios) mapped into guest memory. This would happen if user space allocates memory before calling KVM_CREATE_VM (which would call s390_enable_sie()). With upstream QEMU, this currently doesn't happen, because guest memory is setup and conditionally preallocated after KVM_CREATE_VM. Could it happen with shmem/file-backed memory when another process allocated memory in the pagecache? Likely, although currently not a common setup. Trying to split any PTE-mapped large folios sounds like the right and future-proof thing to do here. So let's call split_folio() and handle the return values accordingly. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-4-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/uv.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 25fe28d189df..3c6d86e3e828 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -338,11 +338,10 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) goto out; if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { folio = page_folio(pte_page(*ptep)); - rc = -EINVAL; - if (folio_test_large(folio)) - goto unlock; rc = -EAGAIN; - if (folio_trylock(folio)) { + if (folio_test_large(folio)) { + rc = -E2BIG; + } else if (folio_trylock(folio)) { if (should_export_before_import(uvcb, gmap->mm)) uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); rc = make_folio_secure(folio, uvcb); @@ -353,15 +352,35 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) * Once we drop the PTL, the folio may get unmapped and * freed immediately. We need a temporary reference. */ - if (rc == -EAGAIN) + if (rc == -EAGAIN || rc == -E2BIG) folio_get(folio); } -unlock: pte_unmap_unlock(ptep, ptelock); out: mmap_read_unlock(gmap->mm); switch (rc) { + case -E2BIG: + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + folio_put(folio); + + switch (rc) { + case 0: + /* Splitting succeeded, try again immediately. */ + goto again; + case -EAGAIN: + /* Additional folio references. */ + if (drain_lru(&drain_lru_called)) + goto again; + return -EAGAIN; + case -EBUSY: + /* Unexpected race. */ + return -EAGAIN; + } + WARN_ON_ONCE(1); + return -ENXIO; case -EAGAIN: /* * If we are here because the UVC returned busy or partial From 036c0e104bd2cf45304a3795fd8ec985ea01bfe8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:49 +0200 Subject: [PATCH 04/54] s390/uv: Convert PG_arch_1 users to only work on small folios Now that make_folio_secure() may only set PG_arch_1 for small folios, let's convert relevant remaining UV code to only work on (small) folios and simply reject large folios early. This way, we'll never end up touching PG_arch_1 on tail pages of a large folio in UV code. The folio_get()/folio_put() for functions that are documented to already hold a folio reference look weird; likely they are required to make concurrent gmap_make_secure() back off because the caller might only hold an implicit reference due to the page mapping. So leave that alone for now. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-5-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/page.h | 2 ++ arch/s390/kernel/uv.c | 41 ++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 224ff9d433ea..ecbf4b626f46 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -247,7 +247,9 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit) #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) +#define phys_to_folio(phys) page_folio(phys_to_page(phys)) #define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define folio_to_phys(page) pfn_to_phys(folio_pfn(folio)) static inline void *pfn_to_virt(unsigned long pfn) { diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 3c6d86e3e828..914dcec27329 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -135,14 +135,18 @@ static int uv_destroy_page(unsigned long paddr) */ int uv_destroy_owned_page(unsigned long paddr) { - struct page *page = phys_to_page(paddr); + struct folio *folio = phys_to_folio(paddr); int rc; - get_page(page); + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); rc = uv_destroy_page(paddr); if (!rc) - clear_bit(PG_arch_1, &page->flags); - put_page(page); + clear_bit(PG_arch_1, &folio->flags); + folio_put(folio); return rc; } @@ -170,14 +174,18 @@ int uv_convert_from_secure(unsigned long paddr) */ int uv_convert_owned_from_secure(unsigned long paddr) { - struct page *page = phys_to_page(paddr); + struct folio *folio = phys_to_folio(paddr); int rc; - get_page(page); + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); rc = uv_convert_from_secure(paddr); if (!rc) - clear_bit(PG_arch_1, &page->flags); - put_page(page); + clear_bit(PG_arch_1, &folio->flags); + folio_put(folio); return rc; } @@ -479,33 +487,34 @@ EXPORT_SYMBOL_GPL(gmap_destroy_page); */ int arch_make_page_accessible(struct page *page) { + struct folio *folio = page_folio(page); int rc = 0; - /* Hugepage cannot be protected, so nothing to do */ - if (PageHuge(page)) + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) return 0; /* * PG_arch_1 is used in 3 places: * 1. for kernel page tables during early boot * 2. for storage keys of huge pages and KVM - * 3. As an indication that this page might be secure. This can + * 3. As an indication that this small folio might be secure. This can * overindicate, e.g. we set the bit before calling * convert_to_secure. * As secure pages are never huge, all 3 variants can co-exists. */ - if (!test_bit(PG_arch_1, &page->flags)) + if (!test_bit(PG_arch_1, &folio->flags)) return 0; - rc = uv_pin_shared(page_to_phys(page)); + rc = uv_pin_shared(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &page->flags); + clear_bit(PG_arch_1, &folio->flags); return 0; } - rc = uv_convert_from_secure(page_to_phys(page)); + rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &page->flags); + clear_bit(PG_arch_1, &folio->flags); return 0; } From 80cf817949264eff32642aa90da00f03e84e3c0f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:50 +0200 Subject: [PATCH 05/54] s390/uv: Update PG_arch_1 comment We removed the usage of PG_arch_1 for page tables in commit a51324c430db ("s390/cmma: rework no-dat handling"). Let's update the comment in UV to reflect that. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-6-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/uv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 914dcec27329..ecfc08902215 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -495,13 +495,12 @@ int arch_make_page_accessible(struct page *page) return 0; /* - * PG_arch_1 is used in 3 places: - * 1. for kernel page tables during early boot - * 2. for storage keys of huge pages and KVM - * 3. As an indication that this small folio might be secure. This can + * PG_arch_1 is used in 2 places: + * 1. for storage keys of hugetlb folios and KVM + * 2. As an indication that this small folio might be secure. This can * overindicate, e.g. we set the bit before calling * convert_to_secure. - * As secure pages are never huge, all 3 variants can co-exists. + * As secure pages are never large folios, both variants can co-exists. */ if (!test_bit(PG_arch_1, &folio->flags)) return 0; From e58623fbc178d6c074074c0107103c5d3c8041b1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:51 +0200 Subject: [PATCH 06/54] s390/uv: Make uv_convert_from_secure() a static function It's not used outside of uv.c, so let's make it a static function. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-7-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uv.h | 6 ------ arch/s390/kernel/uv.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0e7bd3873907..d2205ff97007 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -484,7 +484,6 @@ int uv_pin_shared(unsigned long paddr); int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_owned_page(unsigned long paddr); -int uv_convert_from_secure(unsigned long paddr); int uv_convert_owned_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); @@ -503,11 +502,6 @@ static inline int uv_destroy_owned_page(unsigned long paddr) return 0; } -static inline int uv_convert_from_secure(unsigned long paddr) -{ - return 0; -} - static inline int uv_convert_owned_from_secure(unsigned long paddr) { return 0; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index ecfc08902215..3d3250b406a6 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -156,7 +156,7 @@ int uv_destroy_owned_page(unsigned long paddr) * * @paddr: Absolute host address of page to be exported */ -int uv_convert_from_secure(unsigned long paddr) +static int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, From 7063150650d828ad3ca77b06531b37bb1aed82b6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:52 +0200 Subject: [PATCH 07/54] s390/uv: Convert uv_destroy_owned_page() to uv_destroy_(folio|pte)() Let's have the following variants for destroying pages: (1) uv_destroy(): Like uv_pin_shared() and uv_convert_from_secure(), "low level" helper that operates on paddr and doesn't mess with folios. (2) uv_destroy_folio(): Consumes a folio to which we hold a reference. (3) uv_destroy_pte(): Consumes a PTE that holds a reference through the mapping. Unfortunately we need uv_destroy_pte(), because pfn_folio() and friends are not available in pgtable.h. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-8-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/include/asm/uv.h | 10 ++++++++-- arch/s390/kernel/uv.c | 24 +++++++++++++++++------- arch/s390/mm/gmap.c | 6 ++++-- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 70b6ee557eb2..e9eaa4281d94 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1217,7 +1217,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * The notifier should have destroyed all protected vCPUs at this * point, so the destroy should be successful. */ - if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK)) + if (full && !uv_destroy_pte(res)) return res; /* * If something went wrong and the page could not be destroyed, or diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index d2205ff97007..a1bef30066ef 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -483,7 +483,8 @@ static inline int is_prot_virt_host(void) int uv_pin_shared(unsigned long paddr); int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); -int uv_destroy_owned_page(unsigned long paddr); +int uv_destroy_folio(struct folio *folio); +int uv_destroy_pte(pte_t pte); int uv_convert_owned_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); @@ -497,7 +498,12 @@ static inline int uv_pin_shared(unsigned long paddr) return 0; } -static inline int uv_destroy_owned_page(unsigned long paddr) +static inline int uv_destroy_folio(struct folio *folio) +{ + return 0; +} + +static inline int uv_destroy_pte(pte_t pte) { return 0; } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 3d3250b406a6..61c1ce51c883 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(uv_pin_shared); * * @paddr: Absolute host address of page to be destroyed */ -static int uv_destroy_page(unsigned long paddr) +static int uv_destroy(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_DESTR_SEC_STOR, @@ -131,11 +131,10 @@ static int uv_destroy_page(unsigned long paddr) } /* - * The caller must already hold a reference to the page + * The caller must already hold a reference to the folio */ -int uv_destroy_owned_page(unsigned long paddr) +int uv_destroy_folio(struct folio *folio) { - struct folio *folio = phys_to_folio(paddr); int rc; /* See gmap_make_secure(): large folios cannot be secure */ @@ -143,13 +142,22 @@ int uv_destroy_owned_page(unsigned long paddr) return 0; folio_get(folio); - rc = uv_destroy_page(paddr); + rc = uv_destroy(folio_to_phys(folio)); if (!rc) clear_bit(PG_arch_1, &folio->flags); folio_put(folio); return rc; } +/* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_destroy_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_destroy_folio(pfn_folio(pte_pfn(pte))); +} + /* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). @@ -437,6 +445,7 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) { struct vm_area_struct *vma; unsigned long uaddr; + struct folio *folio; struct page *page; int rc; @@ -460,7 +469,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET); if (IS_ERR_OR_NULL(page)) goto out; - rc = uv_destroy_owned_page(page_to_phys(page)); + folio = page_folio(page); + rc = uv_destroy_folio(folio); /* * Fault handlers can race; it is possible that two CPUs will fault * on the same secure page. One CPU can destroy the page, reboot, @@ -472,7 +482,7 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) */ if (rc) rc = uv_convert_owned_from_secure(page_to_phys(page)); - put_page(page); + folio_put(folio); out: mmap_read_unlock(gmap->mm); return rc; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 474a25ca5c48..7537e7b4be39 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2841,13 +2841,15 @@ static const struct mm_walk_ops gather_pages_ops = { */ void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) { + struct folio *folio; unsigned long i; for (i = 0; i < count; i++) { + folio = pfn_folio(pfns[i]); /* we always have an extra reference */ - uv_destroy_owned_page(pfn_to_phys(pfns[i])); + uv_destroy_folio(folio); /* get rid of the extra reference */ - put_page(pfn_to_page(pfns[i])); + folio_put(folio); cond_resched(); } } From 7d17143469879409692dc6279794e71be4bb1196 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:53 +0200 Subject: [PATCH 08/54] s390/uv: Convert uv_convert_owned_from_secure() to uv_convert_from_secure_(folio|pte)() Let's do the same as we did for uv_destroy_(folio|pte)() and have the following variants: (1) uv_convert_from_secure(): "low level" helper that operates on paddr and does not mess with folios. (2) uv_convert_from_secure_folio(): Consumes a folio to which we hold a reference. (3) uv_convert_from_secure_pte(): Consumes a PTE that holds a reference through the mapping. Unfortunately we need uv_convert_from_secure_pte(), because pfn_folio() and friends are not available in pgtable.h. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-9-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 6 +++--- arch/s390/include/asm/uv.h | 4 ++-- arch/s390/kernel/uv.c | 18 +++++++++++++----- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e9eaa4281d94..0fd5a9c7e901 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1167,7 +1167,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); /* At this point the reference through the mapping is still present */ if (mm_is_protected(mm) && pte_present(res)) - uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_from_secure_pte(res); return res; } @@ -1185,7 +1185,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); /* At this point the reference through the mapping is still present */ if (mm_is_protected(vma->vm_mm) && pte_present(res)) - uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_from_secure_pte(res); return res; } @@ -1224,7 +1224,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * if this is not a mm teardown, the slower export is used as * fallback instead. */ - uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK); + uv_convert_from_secure_pte(res); return res; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index a1bef30066ef..0679445cac0b 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -485,7 +485,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); -int uv_convert_owned_from_secure(unsigned long paddr); +int uv_convert_from_secure_pte(pte_t pte); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); @@ -508,7 +508,7 @@ static inline int uv_destroy_pte(pte_t pte) return 0; } -static inline int uv_convert_owned_from_secure(unsigned long paddr) +static inline int uv_convert_from_secure_pte(pte_t pte) { return 0; } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 61c1ce51c883..b456066d72da 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -178,11 +178,10 @@ static int uv_convert_from_secure(unsigned long paddr) } /* - * The caller must already hold a reference to the page + * The caller must already hold a reference to the folio. */ -int uv_convert_owned_from_secure(unsigned long paddr) +static int uv_convert_from_secure_folio(struct folio *folio) { - struct folio *folio = phys_to_folio(paddr); int rc; /* See gmap_make_secure(): large folios cannot be secure */ @@ -190,13 +189,22 @@ int uv_convert_owned_from_secure(unsigned long paddr) return 0; folio_get(folio); - rc = uv_convert_from_secure(paddr); + rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) clear_bit(PG_arch_1, &folio->flags); folio_put(folio); return rc; } +/* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_convert_from_secure_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -481,7 +489,7 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) * we instead try to export the page. */ if (rc) - rc = uv_convert_owned_from_secure(page_to_phys(page)); + rc = uv_convert_from_secure_folio(folio); folio_put(folio); out: mmap_read_unlock(gmap->mm); From 99b3f8f76f7a9eb2816d2af5b0b9de4a11dbc721 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:54 +0200 Subject: [PATCH 09/54] s390/uv: Implement HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE Let's also implement HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE, so we can convert arch_make_page_accessible() to be a simple wrapper around arch_make_folio_accessible(). Unfortunately, we cannot do that in the header. There are only two arch_make_page_accessible() calls remaining in gup.c. We can now drop HAVE_ARCH_MAKE_PAGE_ACCESSIBLE completely form core-MM. We'll handle that separately, once the s390x part landed. Suggested-by: Matthew Wilcox Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-10-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/page.h | 3 +++ arch/s390/kernel/uv.c | 18 +++++++++++------- arch/s390/mm/fault.c | 14 ++++++++------ 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index ecbf4b626f46..5ec41ec3d761 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -162,6 +162,7 @@ static inline int page_reset_referenced(unsigned long addr) #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ struct page; +struct folio; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); @@ -174,6 +175,8 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_ALLOC_PAGE #if IS_ENABLED(CONFIG_PGSTE) +int arch_make_folio_accessible(struct folio *folio); +#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE #endif diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index b456066d72da..fa62fa0e369f 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -498,14 +498,13 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) EXPORT_SYMBOL_GPL(gmap_destroy_page); /* - * To be called with the page locked or with an extra reference! This will - * prevent gmap_make_secure from touching the page concurrently. Having 2 - * parallel make_page_accessible is fine, as the UV calls will become a - * no-op if the page is already exported. + * To be called with the folio locked or with an extra reference! This will + * prevent gmap_make_secure from touching the folio concurrently. Having 2 + * parallel arch_make_folio_accessible is fine, as the UV calls will become a + * no-op if the folio is already exported. */ -int arch_make_page_accessible(struct page *page) +int arch_make_folio_accessible(struct folio *folio) { - struct folio *folio = page_folio(page); int rc = 0; /* See gmap_make_secure(): large folios cannot be secure */ @@ -537,8 +536,13 @@ int arch_make_page_accessible(struct page *page) return rc; } -EXPORT_SYMBOL_GPL(arch_make_page_accessible); +EXPORT_SYMBOL_GPL(arch_make_folio_accessible); +int arch_make_page_accessible(struct page *page) +{ + return arch_make_folio_accessible(page_folio(page)); +} +EXPORT_SYMBOL_GPL(arch_make_page_accessible); #endif #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 65747f15dbec..7cd50ad3b4ad 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -492,6 +492,7 @@ void do_secure_storage_access(struct pt_regs *regs) unsigned long addr = get_fault_address(regs); struct vm_area_struct *vma; struct mm_struct *mm; + struct folio *folio; struct page *page; struct gmap *gmap; int rc; @@ -539,17 +540,18 @@ void do_secure_storage_access(struct pt_regs *regs) mmap_read_unlock(mm); break; } - if (arch_make_page_accessible(page)) + folio = page_folio(page); + if (arch_make_folio_accessible(folio)) send_sig(SIGSEGV, current, 0); - put_page(page); + folio_put(folio); mmap_read_unlock(mm); break; case KERNEL_FAULT: - page = phys_to_page(addr); - if (unlikely(!try_get_page(page))) + folio = phys_to_folio(addr); + if (unlikely(!folio_try_get(folio))) break; - rc = arch_make_page_accessible(page); - put_page(page); + rc = arch_make_folio_accessible(folio); + folio_put(folio); if (rc) BUG(); break; From 1433b36e3ab67772a37db624fc1f8e66d443690d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 May 2024 20:29:55 +0200 Subject: [PATCH 10/54] s390/hugetlb: Convert PG_arch_1 code to work on folio->flags Let's make it clearer that we are always working on folio flags and never page flags of tail pages by converting remaining PG_arch_1 users that modify page->flags to modify folio->flags instead. No functional change intended, because we would always have worked with the head page (where page->flags corresponds to folio->flags) and never with tail pages. Reviewed-by: Claudio Imbrenda Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20240508182955.358628-11-david@redhat.com Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/mm/gmap.c | 4 ++-- arch/s390/mm/hugetlbpage.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 7537e7b4be39..d5a5756dd69f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2733,7 +2733,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, { pmd_t *pmd = (pmd_t *)pte; unsigned long start, end; - struct page *page = pmd_page(*pmd); + struct folio *folio = page_folio(pmd_page(*pmd)); /* * The write check makes sure we do not set a key on shared @@ -2748,7 +2748,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, start = pmd_val(*pmd) & HPAGE_MASK; end = start + HPAGE_SIZE; __storage_key_init_range(start, end); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_arch_1, &folio->flags); cond_resched(); return 0; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 2675aab4acc7..34d558164f0d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -121,7 +121,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) { - struct page *page; + struct folio *folio; unsigned long size, paddr; if (!mm_uses_skeys(mm) || @@ -129,16 +129,16 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) return; if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { - page = pud_page(__pud(rste)); + folio = page_folio(pud_page(__pud(rste))); size = PUD_SIZE; paddr = rste & PUD_MASK; } else { - page = pmd_page(__pmd(rste)); + folio = page_folio(pmd_page(__pmd(rste))); size = PMD_SIZE; paddr = rste & PMD_MASK; } - if (!test_and_set_bit(PG_arch_1, &page->flags)) + if (!test_and_set_bit(PG_arch_1, &folio->flags)) __storage_key_init_range(paddr, paddr + size); } From fb412c6241dccc530416917efe8e9fea5fa1fda2 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 29 Feb 2024 15:13:48 +0100 Subject: [PATCH 11/54] s390/pai_crypto: Enable concurrent system-wide counting/sampling event The PMU for PAI crypto counters enforces the following restriction: - No system wide counting while system wide sampling is active. This restriction is removed. One or more system wide counting events can now be active at the same time while at most one system wide sampling event is active. Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 4ad472d130a3..d3a64f041819 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -97,6 +97,8 @@ static void paicrypt_event_destroy(struct perf_event *event) event->attr.config, event->cpu, cpump->active_events, cpump->mode, refcount_read(&cpump->refcnt)); + if (event->attr.sample_period) + cpump->mode &= ~PAI_MODE_SAMPLING; free_page(PAI_SAVE_AREA(event)); if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", @@ -160,9 +162,7 @@ static u64 paicrypt_getall(struct perf_event *event) * sampling for crypto events * * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is - * allowed and when this event is running, no counting event is allowed. - * Several counting events are allowed in parallel, but no sampling event - * is allowed while one (or more) counting events are running. + * allowed. Several counting events are allowed in parallel. * * This function is called in process context and it is save to block. * When the event initialization functions fails, no other call back will @@ -196,12 +196,12 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event) } if (a->sample_period) { /* Sampling requested */ - if (cpump->mode != PAI_MODE_NONE) - rc = -EBUSY; /* ... sampling/counting active */ - } else { /* Counting requested */ - if (cpump->mode == PAI_MODE_SAMPLING) + if (cpump->mode & PAI_MODE_SAMPLING) rc = -EBUSY; /* ... and sampling active */ + else + cpump->mode |= PAI_MODE_SAMPLING; } + /* * This error case triggers when there is a conflict: * Either sampling requested and counting already active, or visa @@ -235,7 +235,6 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event) /* Set mode and reference count */ rc = 0; refcount_set(&cpump->refcnt, 1); - cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; mp->mapptr = cpump; debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" " mode %d refcnt %u page %#lx save %p rc %d\n", @@ -249,7 +248,6 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event) mp->mapptr = NULL; free_root: paicrypt_root_free(); - unlock: mutex_unlock(&pai_reserve_mutex); return rc ? ERR_PTR(rc) : cpump; @@ -332,6 +330,7 @@ static void paicrypt_start(struct perf_event *event, int flags) local64_set(&event->hw.prev_count, sum); } else { /* Sampling */ cpump->event = event; + memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); perf_sched_cb_inc(event->pmu); } } @@ -480,7 +479,7 @@ static int paicrypt_have_sample(void) static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, clear values. + * results on schedule_out and if page was dirty, save old values. */ if (!sched_in) paicrypt_have_sample(); From 92ea686840ac5e2e74ad5b1a569b07123d753e5e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 29 Feb 2024 15:45:04 +0100 Subject: [PATCH 12/54] s390/pai_crypto: Enable per-task counting event The PMU for PAI crypto counters enforces the following restriction: - No per-task context for PAI crypto counters events. This restriction is removed. One or more per-task/system-wide counting events can now be active at the same time while at most one system wide sampling event is active. Example for per-task context of a PAI crypto counter event: # perf stat -e pai_crypto/KM_AES_128/ -- true Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pai.h | 1 + arch/s390/kernel/perf_pai_crypto.c | 81 ++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 3f609565734b..7da1cec42016 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -82,4 +82,5 @@ enum paievt_mode { }; #define PAI_SAVE_AREA(x) ((x)->hw.event_base) +#define PAI_CPU_MASK(x) ((x)->hw.addr_filters) #endif diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index d3a64f041819..0e296a1482bc 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -84,13 +84,11 @@ static DEFINE_MUTEX(pai_reserve_mutex); /* Adjust usage counters and remove allocated memory when all users are * gone. */ -static void paicrypt_event_destroy(struct perf_event *event) +static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu) { - struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, - event->cpu); + struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); struct paicrypt_map *cpump = mp->mapptr; - static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" " mode %d refcnt %u\n", __func__, @@ -99,7 +97,6 @@ static void paicrypt_event_destroy(struct perf_event *event) refcount_read(&cpump->refcnt)); if (event->attr.sample_period) cpump->mode &= ~PAI_MODE_SAMPLING; - free_page(PAI_SAVE_AREA(event)); if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, @@ -113,6 +110,23 @@ static void paicrypt_event_destroy(struct perf_event *event) mutex_unlock(&pai_reserve_mutex); } +static void paicrypt_event_destroy(struct perf_event *event) +{ + int cpu; + + static_branch_dec(&pai_key); + free_page(PAI_SAVE_AREA(event)); + if (event->cpu == -1) { + struct cpumask *mask = PAI_CPU_MASK(event); + + for_each_cpu(cpu, mask) + paicrypt_event_destroy_cpu(event, cpu); + kfree(mask); + } else { + paicrypt_event_destroy_cpu(event, event->cpu); + } +} + static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel) { if (kernel) @@ -170,7 +184,7 @@ static u64 paicrypt_getall(struct perf_event *event) * * Allocate the memory for the event. */ -static struct paicrypt_map *paicrypt_busy(struct perf_event *event) +static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) { struct perf_event_attr *a = &event->attr; struct paicrypt_map *cpump = NULL; @@ -185,7 +199,7 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event) goto unlock; /* Allocate node for this event */ - mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu); + mp = per_cpu_ptr(paicrypt_root.mapptr, cpu); cpump = mp->mapptr; if (!cpump) { /* Paicrypt_map allocated? */ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); @@ -253,6 +267,40 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event) return rc ? ERR_PTR(rc) : cpump; } +static int paicrypt_event_init_all(struct perf_event *event) +{ + struct paicrypt_map *cpump; + struct cpumask *maskptr; + int cpu, rc = -ENOMEM; + + maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); + if (!maskptr) + goto out; + + for_each_online_cpu(cpu) { + cpump = paicrypt_busy(event, cpu); + if (IS_ERR(cpump)) { + for_each_cpu(cpu, maskptr) + paicrypt_event_destroy_cpu(event, cpu); + kfree(maskptr); + rc = PTR_ERR(cpump); + goto out; + } + cpumask_set_cpu(cpu, maskptr); + } + + /* + * On error all cpumask are freed and all events have been destroyed. + * Save of which CPUs data structures have been allocated for. + * Release them in paicrypt_event_destroy call back function + * for this event. + */ + PAI_CPU_MASK(event) = maskptr; + rc = 0; +out: + return rc; +} + /* Might be called on different CPU than the one the event is intended for. */ static int paicrypt_event_init(struct perf_event *event) { @@ -267,8 +315,9 @@ static int paicrypt_event_init(struct perf_event *event) if (a->config < PAI_CRYPTO_BASE || a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; - /* Allow only CPU wide operation, no process context for now. */ - if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1) + /* Allow only CPU wide operation for sampling */ + if (a->sample_period && + ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)) return -ENOENT; /* Allow only CRYPTO_ALL for sampling. */ if (a->sample_period && a->config != PAI_CRYPTO_BASE) @@ -282,13 +331,17 @@ static int paicrypt_event_init(struct perf_event *event) } } - cpump = paicrypt_busy(event); - if (IS_ERR(cpump)) { + if (event->cpu >= 0) { + cpump = paicrypt_busy(event, event->cpu); + if (IS_ERR(cpump)) + rc = PTR_ERR(cpump); + } else { + rc = paicrypt_event_init_all(event); + } + if (rc) { free_page(PAI_SAVE_AREA(event)); - rc = PTR_ERR(cpump); goto out; } - event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -526,7 +579,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = { /* Performance monitoring unit for mapped counters */ static struct pmu paicrypt = { - .task_ctx_nr = perf_invalid_context, + .task_ctx_nr = perf_hw_context, .event_init = paicrypt_event_init, .add = paicrypt_add, .del = paicrypt_del, From 9f66572f2889a5e72a9d7e17787e52f03b1f7bd8 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 11 Apr 2024 13:15:47 +0200 Subject: [PATCH 13/54] s390/pai_crypto: Enable per-task and system-wide sampling event The PMU for PAI crypto counters enforces the following restrictions: - No per-task context for PAI crypto sampling event CRYPTO_ALL - No multiple system-wide PAI crypto sampling event CRYPTO_ALL Both restrictions are removed. One or more per-task sampling events are supported. Also one or more system-wide sampling events are supported. Example for per-task context of sampling event CRYPTO_ALL: # perf record -e pai_crypto/CRYPTO_ALL/ -- true Example for system-wide context of sampling event CRYPTO_ALL: # perf record -e pai_crypto/CRYPTO_ALL/ -a -- sleep 4 Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 107 +++++++++++++---------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 0e296a1482bc..c0b7f5422e5e 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -24,6 +24,8 @@ static debug_info_t *cfm_dbg; static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */ /* extracted with QPACI instruction */ +#define PAI_SWLIST(x) (&(x)->hw.tp_list) + DEFINE_STATIC_KEY_FALSE(pai_key); struct pai_userdata { @@ -36,8 +38,8 @@ struct paicrypt_map { struct pai_userdata *save; /* Page to store no-zero counters */ unsigned int active_events; /* # of PAI crypto users */ refcount_t refcnt; /* Reference count mapped buffers */ - enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ + struct list_head syswide_list; /* List system-wide sampling events */ }; struct paicrypt_mapptr { @@ -90,13 +92,10 @@ static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu) struct paicrypt_map *cpump = mp->mapptr; mutex_lock(&pai_reserve_mutex); - debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" - " mode %d refcnt %u\n", __func__, - event->attr.config, event->cpu, - cpump->active_events, cpump->mode, + debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d " + "refcnt %u\n", __func__, event->attr.config, + event->cpu, cpump->active_events, refcount_read(&cpump->refcnt)); - if (event->attr.sample_period) - cpump->mode &= ~PAI_MODE_SAMPLING; if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, @@ -172,12 +171,7 @@ static u64 paicrypt_getall(struct perf_event *event) return sum; } -/* Used to avoid races in checking concurrent access of counting and - * sampling for crypto events - * - * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is - * allowed. Several counting events are allowed in parallel. - * +/* Check concurrent access of counting and sampling for crypto events. * This function is called in process context and it is save to block. * When the event initialization functions fails, no other call back will * be invoked. @@ -186,7 +180,6 @@ static u64 paicrypt_getall(struct perf_event *event) */ static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) { - struct perf_event_attr *a = &event->attr; struct paicrypt_map *cpump = NULL; struct paicrypt_mapptr *mp; int rc; @@ -207,25 +200,9 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) rc = -ENOMEM; goto free_root; } + INIT_LIST_HEAD(&cpump->syswide_list); } - if (a->sample_period) { /* Sampling requested */ - if (cpump->mode & PAI_MODE_SAMPLING) - rc = -EBUSY; /* ... and sampling active */ - else - cpump->mode |= PAI_MODE_SAMPLING; - } - - /* - * This error case triggers when there is a conflict: - * Either sampling requested and counting already active, or visa - * versa. Therefore the struct paicrypto_map for this CPU is - * needed or the error could not have occurred. Only adjust root - * node refcount. - */ - if (rc) - goto free_root; - /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. */ @@ -250,14 +227,14 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu) rc = 0; refcount_set(&cpump->refcnt, 1); mp->mapptr = cpump; - debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" - " mode %d refcnt %u page %#lx save %p rc %d\n", - __func__, a->sample_period, cpump->active_events, - cpump->mode, refcount_read(&cpump->refcnt), + debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx " + "save %p rc %d\n", __func__, cpump->active_events, + refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); goto unlock; free_paicrypt_map: + /* Undo memory allocation */ kfree(cpump); mp->mapptr = NULL; free_root: @@ -315,11 +292,7 @@ static int paicrypt_event_init(struct perf_event *event) if (a->config < PAI_CRYPTO_BASE || a->config > PAI_CRYPTO_BASE + paicrypt_cnt) return -EINVAL; - /* Allow only CPU wide operation for sampling */ - if (a->sample_period && - ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)) - return -ENOENT; - /* Allow only CRYPTO_ALL for sampling. */ + /* Allow only CRYPTO_ALL for sampling */ if (a->sample_period && a->config != PAI_CRYPTO_BASE) return -EINVAL; /* Get a page to store last counter values for sampling */ @@ -382,9 +355,14 @@ static void paicrypt_start(struct perf_event *event, int flags) sum = paicrypt_getall(event); /* Get current value */ local64_set(&event->hw.prev_count, sum); } else { /* Sampling */ - cpump->event = event; memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE); - perf_sched_cb_inc(event->pmu); + /* Enable context switch callback for system-wide sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); + perf_sched_cb_inc(event->pmu); + } else { + cpump->event = event; + } } } @@ -405,6 +383,7 @@ static int paicrypt_add(struct perf_event *event, int flags) return 0; } +static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *); static void paicrypt_stop(struct perf_event *event, int flags) { struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); @@ -413,8 +392,13 @@ static void paicrypt_stop(struct perf_event *event, int flags) if (!event->attr.sample_period) { /* Counting */ paicrypt_read(event); } else { /* Sampling */ - perf_sched_cb_dec(event->pmu); - cpump->event = NULL; + if (!(event->attach_state & PERF_ATTACH_TASK)) { + perf_sched_cb_dec(event->pmu); + list_del(PAI_SWLIST(event)); + } else { + paicrypt_have_sample(event, cpump); + cpump->event = NULL; + } } event->hw.state = PERF_HES_STOPPED; } @@ -507,23 +491,30 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, } /* Check if there is data to be saved on schedule out of a task. */ -static int paicrypt_have_sample(void) +static void paicrypt_have_sample(struct perf_event *event, + struct paicrypt_map *cpump) +{ + size_t rawsize; + + if (!event) /* No event active */ + return; + rawsize = paicrypt_copy(cpump->save, cpump->page, + (unsigned long *)PAI_SAVE_AREA(event), + event->attr.exclude_user, + event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + paicrypt_push_sample(rawsize, cpump, event); +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void paicrypt_have_samples(void) { struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); struct paicrypt_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; - size_t rawsize; - int rc = 0; + struct perf_event *event; - if (!event) /* No event active */ - return 0; - rawsize = paicrypt_copy(cpump->save, cpump->page, - (unsigned long *)PAI_SAVE_AREA(event), - cpump->event->attr.exclude_user, - cpump->event->attr.exclude_kernel); - if (rawsize) /* No incremented counters */ - rc = paicrypt_push_sample(rawsize, cpump, event); - return rc; + list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) + paicrypt_have_sample(event, cpump); } /* Called on schedule-in and schedule-out. No access to event structure, @@ -535,7 +526,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch * results on schedule_out and if page was dirty, save old values. */ if (!sched_in) - paicrypt_have_sample(); + paicrypt_have_samples(); } /* Attribute definitions for paicrypt interface. As with other CPU From 14e3768435da557079216f51d774165c70aad9b1 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Apr 2024 14:40:03 +0200 Subject: [PATCH 14/54] s390/pai_ext: Enable concurrent system-wide counting/sampling The PMU for PAI NNPA counters enforces the following restriction: - No system wide counting while system wide sampling is active. This restriction is removed. One or more system wide counting events can now be active at the same time while at most one system wide sampling event is active. Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_ext.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index a6da7e0cc7a6..5d00f77a64a5 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -120,8 +120,10 @@ static void paiext_event_destroy(struct perf_event *event) struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); struct paiext_map *cpump = mp->mapptr; - free_page(PAI_SAVE_AREA(event)); mutex_lock(&paiext_reserve_mutex); + if (event->attr.sample_period) + cpump->mode &= ~PAI_MODE_SAMPLING; + free_page(PAI_SAVE_AREA(event)); if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -186,21 +188,19 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) goto undo; } refcount_set(&cpump->refcnt, 1); - cpump->mode = a->sample_period ? PAI_MODE_SAMPLING - : PAI_MODE_COUNTING; } else { /* Multiple invocation, check what is active. - * Supported are multiple counter events or only one sampling + * Supported are multiple counter events and only one sampling * event concurrently at any one time. */ - if (cpump->mode == PAI_MODE_SAMPLING || - (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { + if (a->sample_period && (cpump->mode & PAI_MODE_SAMPLING)) { rc = -EBUSY; goto undo; } refcount_inc(&cpump->refcnt); } - + if (a->sample_period) + cpump->mode |= PAI_MODE_SAMPLING; rc = 0; undo: @@ -335,6 +335,8 @@ static void paiext_start(struct perf_event *event, int flags) local64_set(&event->hw.prev_count, sum); } else { /* Sampling */ cpump->event = event; + memcpy((void *)PAI_SAVE_AREA(event), cpump->area, + PAIE1_CTRBLOCK_SZ); perf_sched_cb_inc(event->pmu); } } @@ -493,7 +495,7 @@ static int paiext_have_sample(void) static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { /* We started with a clean page on event installation. So read out - * results on schedule_out and if page was dirty, clear values. + * results on schedule_out and if page was dirty, save old values. */ if (!sched_in) paiext_have_sample(); From 3f9ff4c5a073294131818a26d1ac2b77b7104695 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Apr 2024 14:50:52 +0200 Subject: [PATCH 15/54] s390/pai_ext: Enable per-task counting event The PMU for PAI NNPA counters enforces the following restriction: - No per-task context for PAI NNPA counters. This restriction is removed. One or more per-task/system-wide counting events can now be active at the same time while one system wide sampling event is active. Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_ext.c | 79 ++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 5d00f77a64a5..20fc12dd9d0d 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -115,22 +115,36 @@ static void paiext_free(struct paiext_mapptr *mp) } /* Release the PMU if event is the last perf event */ -static void paiext_event_destroy(struct perf_event *event) +static void paiext_event_destroy_cpu(struct perf_event *event, int cpu) { - struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); + struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu); struct paiext_map *cpump = mp->mapptr; mutex_lock(&paiext_reserve_mutex); if (event->attr.sample_period) cpump->mode &= ~PAI_MODE_SAMPLING; - free_page(PAI_SAVE_AREA(event)); if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); mutex_unlock(&paiext_reserve_mutex); - debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__, - event->cpu, mp->mapptr); +} +static void paiext_event_destroy(struct perf_event *event) +{ + int cpu; + + free_page(PAI_SAVE_AREA(event)); + if (event->cpu == -1) { + struct cpumask *mask = PAI_CPU_MASK(event); + + for_each_cpu(cpu, mask) + paiext_event_destroy_cpu(event, cpu); + kfree(mask); + } else { + paiext_event_destroy_cpu(event, event->cpu); + } + debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__, + event->cpu); } /* Used to avoid races in checking concurrent access of counting and @@ -147,19 +161,18 @@ static void paiext_event_destroy(struct perf_event *event) * * Allocate the memory for the event. */ -static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) +static int paiext_alloc_cpu(struct perf_event_attr *a, int cpu) { struct paiext_mapptr *mp; struct paiext_map *cpump; int rc; mutex_lock(&paiext_reserve_mutex); - rc = paiext_root_alloc(); if (rc) goto unlock; - mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); + mp = per_cpu_ptr(paiext_root.mapptr, cpu); cpump = mp->mapptr; if (!cpump) { /* Paiext_map allocated? */ rc = -ENOMEM; @@ -217,6 +230,40 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) return rc; } +static int paiext_alloc(struct perf_event *event) +{ + struct cpumask *maskptr; + int cpu, rc = -ENOMEM; + + maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL); + if (!maskptr) + goto out; + + for_each_online_cpu(cpu) { + rc = paiext_alloc_cpu(&event->attr, cpu); + if (rc) { + for_each_cpu(cpu, maskptr) + paiext_event_destroy_cpu(event, cpu); + kfree(maskptr); + goto out; + } + cpumask_set_cpu(cpu, maskptr); + } + + /* + * On error all cpumask are freed and all events have been destroyed. + * Save of which CPUs data structures have been allocated for. + * Release them in paicrypt_event_destroy call back function + * for this event. + */ + PAI_CPU_MASK(event) = maskptr; + rc = 0; +out: + debug_sprintf_event(paiext_dbg, 5, "%s cpu %u rc %d\n", __func__, + cpu, rc); + return rc; +} + /* The PAI extension 1 control block supports up to 128 entries. Return * the index within PAIE1_CB given the event number. Also validate event * number. @@ -246,8 +293,9 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_event_valid(event); if (rc) return rc; - /* Allow only CPU wide operation, no process context for now. */ - if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1) + /* Allow only CPU wide operation for sampling */ + if (a->sample_period && + ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)) return -ENOENT; /* Allow only event NNPA_ALL for sampling. */ if (a->sample_period && a->config != PAI_NNPA_BASE) @@ -262,7 +310,10 @@ static int paiext_event_init(struct perf_event *event) return -ENOMEM; } - rc = paiext_alloc(a, event); + if (event->cpu >= 0) + rc = paiext_alloc_cpu(a, event->cpu); + else + rc = paiext_alloc(event); if (rc) { free_page(PAI_SAVE_AREA(event)); return rc; @@ -352,8 +403,6 @@ static int paiext_add(struct perf_event *event, int flags) pcb->acc = virt_to_phys(cpump->area) | 0x1; /* Enable CPU instruction lookup for PAIE1 control block */ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); - debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", - __func__, S390_lowcore.aicd, pcb->acc); } if (flags & PERF_EF_START) paiext_start(event, PERF_EF_RELOAD); @@ -387,8 +436,6 @@ static void paiext_del(struct perf_event *event, int flags) local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); pcb->acc = 0; S390_lowcore.aicd = 0; - debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", - __func__, S390_lowcore.aicd, pcb->acc); } } @@ -544,7 +591,7 @@ static const struct attribute_group *paiext_attr_groups[] = { /* Performance monitoring unit for mapped counters */ static struct pmu paiext = { - .task_ctx_nr = perf_invalid_context, + .task_ctx_nr = perf_hw_context, .event_init = paiext_event_init, .add = paiext_add, .del = paiext_del, From 582cc1b28e8c8cb25f9a5f2595ca4b6d90737d03 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Apr 2024 16:41:23 +0200 Subject: [PATCH 16/54] s390/pai_ext: Enable per-task and system-wide sampling event The PMU for PAI NNPA counters enforces the following restriction: - No per-task context for PAI sampling event NNPA_ALL - No multiple system-wide PAI sampling event NNPA_ALL Both restrictions are removed. One or more per-task sampling events are supported. Also one or more system-wide sampling events are supported. Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pai.h | 8 +--- arch/s390/kernel/perf_pai_crypto.c | 2 - arch/s390/kernel/perf_pai_ext.c | 75 ++++++++++++++++-------------- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 7da1cec42016..8e1dd389ae88 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -75,12 +75,8 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs) WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET); } -enum paievt_mode { - PAI_MODE_NONE, - PAI_MODE_SAMPLING, - PAI_MODE_COUNTING, -}; - #define PAI_SAVE_AREA(x) ((x)->hw.event_base) #define PAI_CPU_MASK(x) ((x)->hw.addr_filters) +#define PAI_SWLIST(x) (&(x)->hw.tp_list) + #endif diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index c0b7f5422e5e..95079a289109 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -24,8 +24,6 @@ static debug_info_t *cfm_dbg; static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */ /* extracted with QPACI instruction */ -#define PAI_SWLIST(x) (&(x)->hw.tp_list) - DEFINE_STATIC_KEY_FALSE(pai_key); struct pai_userdata { diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 20fc12dd9d0d..6040f3c2b942 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -47,11 +47,11 @@ struct paiext_cb { /* PAI extension 1 control block */ struct paiext_map { unsigned long *area; /* Area for CPU to store counters */ struct pai_userdata *save; /* Area to store non-zero counters */ - enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ refcount_t refcnt; struct perf_event *event; /* Perf event for sampling */ struct paiext_cb *paiext_cb; /* PAI extension control block area */ + struct list_head syswide_list; /* List system-wide sampling events */ }; struct paiext_mapptr { @@ -70,6 +70,8 @@ static void paiext_root_free(void) free_percpu(paiext_root.mapptr); paiext_root.mapptr = NULL; } + debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__, + refcount_read(&paiext_root.refcnt)); } /* On initialization of first event also allocate per CPU data dynamically. @@ -121,8 +123,6 @@ static void paiext_event_destroy_cpu(struct perf_event *event, int cpu) struct paiext_map *cpump = mp->mapptr; mutex_lock(&paiext_reserve_mutex); - if (event->attr.sample_period) - cpump->mode &= ~PAI_MODE_SAMPLING; if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -161,7 +161,7 @@ static void paiext_event_destroy(struct perf_event *event) * * Allocate the memory for the event. */ -static int paiext_alloc_cpu(struct perf_event_attr *a, int cpu) +static int paiext_alloc_cpu(struct perf_event *event, int cpu) { struct paiext_mapptr *mp; struct paiext_map *cpump; @@ -200,21 +200,12 @@ static int paiext_alloc_cpu(struct perf_event_attr *a, int cpu) paiext_free(mp); goto undo; } + INIT_LIST_HEAD(&cpump->syswide_list); refcount_set(&cpump->refcnt, 1); + rc = 0; } else { - /* Multiple invocation, check what is active. - * Supported are multiple counter events and only one sampling - * event concurrently at any one time. - */ - if (a->sample_period && (cpump->mode & PAI_MODE_SAMPLING)) { - rc = -EBUSY; - goto undo; - } refcount_inc(&cpump->refcnt); } - if (a->sample_period) - cpump->mode |= PAI_MODE_SAMPLING; - rc = 0; undo: if (rc) { @@ -240,7 +231,7 @@ static int paiext_alloc(struct perf_event *event) goto out; for_each_online_cpu(cpu) { - rc = paiext_alloc_cpu(&event->attr, cpu); + rc = paiext_alloc_cpu(event, cpu); if (rc) { for_each_cpu(cpu, maskptr) paiext_event_destroy_cpu(event, cpu); @@ -259,8 +250,6 @@ static int paiext_alloc(struct perf_event *event) PAI_CPU_MASK(event) = maskptr; rc = 0; out: - debug_sprintf_event(paiext_dbg, 5, "%s cpu %u rc %d\n", __func__, - cpu, rc); return rc; } @@ -293,10 +282,6 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_event_valid(event); if (rc) return rc; - /* Allow only CPU wide operation for sampling */ - if (a->sample_period && - ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)) - return -ENOENT; /* Allow only event NNPA_ALL for sampling. */ if (a->sample_period && a->config != PAI_NNPA_BASE) return -EINVAL; @@ -311,7 +296,7 @@ static int paiext_event_init(struct perf_event *event) } if (event->cpu >= 0) - rc = paiext_alloc_cpu(a, event->cpu); + rc = paiext_alloc_cpu(event, event->cpu); else rc = paiext_alloc(event); if (rc) { @@ -385,10 +370,15 @@ static void paiext_start(struct perf_event *event, int flags) sum = paiext_getall(event); /* Get current value */ local64_set(&event->hw.prev_count, sum); } else { /* Sampling */ - cpump->event = event; memcpy((void *)PAI_SAVE_AREA(event), cpump->area, PAIE1_CTRBLOCK_SZ); - perf_sched_cb_inc(event->pmu); + /* Enable context switch callback for system-wide sampling */ + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_add_tail(PAI_SWLIST(event), &cpump->syswide_list); + perf_sched_cb_inc(event->pmu); + } else { + cpump->event = event; + } } } @@ -410,6 +400,7 @@ static int paiext_add(struct perf_event *event, int flags) return 0; } +static void paiext_have_sample(struct perf_event *, struct paiext_map *); static void paiext_stop(struct perf_event *event, int flags) { struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); @@ -418,8 +409,13 @@ static void paiext_stop(struct perf_event *event, int flags) if (!event->attr.sample_period) { /* Counting */ paiext_read(event); } else { /* Sampling */ - perf_sched_cb_dec(event->pmu); - cpump->event = NULL; + if (!(event->attach_state & PERF_ATTACH_TASK)) { + list_del(PAI_SWLIST(event)); + perf_sched_cb_dec(event->pmu); + } else { + paiext_have_sample(event, cpump); + cpump->event = NULL; + } } event->hw.state = PERF_HES_STOPPED; } @@ -519,21 +515,28 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, } /* Check if there is data to be saved on schedule out of a task. */ -static int paiext_have_sample(void) +static void paiext_have_sample(struct perf_event *event, + struct paiext_map *cpump) { - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; size_t rawsize; - int rc = 0; if (!event) - return 0; + return; rawsize = paiext_copy(cpump->save, cpump->area, (unsigned long *)PAI_SAVE_AREA(event)); if (rawsize) /* Incremented counters */ - rc = paiext_push_sample(rawsize, cpump, event); - return rc; + paiext_push_sample(rawsize, cpump, event); +} + +/* Check if there is data to be saved on schedule out of a task. */ +static void paiext_have_samples(void) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct perf_event *event; + + list_for_each_entry(event, &cpump->syswide_list, hw.tp_list) + paiext_have_sample(event, cpump); } /* Called on schedule-in and schedule-out. No access to event structure, @@ -545,7 +548,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * results on schedule_out and if page was dirty, save old values. */ if (!sched_in) - paiext_have_sample(); + paiext_have_samples(); } /* Attribute definitions for pai extension1 interface. As with other CPU From 7e8f89e5e05983089be1d431f1c82e5abc37574a Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:24 +0200 Subject: [PATCH 17/54] s390: Add get_lowcore() function Add a get_lowcore() function which returns the address of lowcore (currently always NULL). This function will be used as a replacement of the S390_lowcore macro. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 8c5f16857539..94f954250ae0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -215,6 +215,11 @@ struct lowcore { #define S390_lowcore (*((struct lowcore *) 0)) +static __always_inline struct lowcore *get_lowcore(void) +{ + return NULL; +} + extern struct lowcore *lowcore_ptr[]; static inline void set_prefix(__u32 address) From 208da1d5fc3c67d8ae5d34e844fd67cc47a136f0 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:25 +0200 Subject: [PATCH 18/54] s390: Replace S390_lowcore by get_lowcore() Replace all S390_lowcore usages in arch/s390/ by get_lowcore(). Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/current.h | 2 +- arch/s390/include/asm/facility.h | 4 +- arch/s390/include/asm/hardirq.h | 6 +-- arch/s390/include/asm/mmu_context.h | 8 +-- arch/s390/include/asm/pai.h | 8 +-- arch/s390/include/asm/percpu.h | 2 +- arch/s390/include/asm/preempt.h | 30 +++++------ arch/s390/include/asm/processor.h | 8 +-- arch/s390/include/asm/setup.h | 34 ++++++------ arch/s390/include/asm/smp.h | 2 +- arch/s390/include/asm/softirq_stack.h | 2 +- arch/s390/include/asm/spinlock.h | 2 +- arch/s390/include/asm/timex.h | 10 ++-- arch/s390/include/asm/vtime.h | 12 ++--- arch/s390/kernel/dumpstack.c | 8 +-- arch/s390/kernel/early.c | 36 ++++++------- arch/s390/kernel/idle.c | 10 ++-- arch/s390/kernel/irq.c | 18 +++---- arch/s390/kernel/machine_kexec.c | 4 +- arch/s390/kernel/nmi.c | 29 +++++------ arch/s390/kernel/perf_cpum_sf.c | 2 +- arch/s390/kernel/perf_pai_crypto.c | 4 +- arch/s390/kernel/perf_pai_ext.c | 4 +- arch/s390/kernel/process.c | 6 +-- arch/s390/kernel/setup.c | 24 ++++----- arch/s390/kernel/smp.c | 30 +++++------ arch/s390/kernel/syscall.c | 4 +- arch/s390/kernel/time.c | 22 ++++---- arch/s390/kernel/traps.c | 24 ++++----- arch/s390/kernel/vtime.c | 74 +++++++++++++-------------- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/lib/spinlock.c | 4 +- arch/s390/lib/test_unwind.c | 2 +- arch/s390/lib/uaccess.c | 4 +- arch/s390/mm/dump_pagetables.c | 2 +- arch/s390/mm/fault.c | 16 +++--- arch/s390/mm/gmap.c | 6 +-- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgalloc.c | 4 +- arch/s390/pci/pci.c | 2 +- 40 files changed, 236 insertions(+), 237 deletions(-) diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h index 68f84315277c..d03a922c641e 100644 --- a/arch/s390/include/asm/current.h +++ b/arch/s390/include/asm/current.h @@ -14,6 +14,6 @@ struct task_struct; -#define current ((struct task_struct *const)S390_lowcore.current_task) +#define current ((struct task_struct *const)get_lowcore()->current_task) #endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 796007125dff..d46cc725f024 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -92,8 +92,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size) asm volatile( " stfl 0(0)\n" - : "=m" (S390_lowcore.stfl_fac_list)); - stfl_fac_list = S390_lowcore.stfl_fac_list; + : "=m" (get_lowcore()->stfl_fac_list)); + stfl_fac_list = get_lowcore()->stfl_fac_list; memcpy(stfle_fac_list, &stfl_fac_list, 4); nr = 4; /* bytes stored by stfl */ if (stfl_fac_list & 0x01000000) { diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 58668ffb5488..a5b45388c91f 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -13,9 +13,9 @@ #include -#define local_softirq_pending() (S390_lowcore.softirq_pending) -#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x)) -#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x)) +#define local_softirq_pending() (get_lowcore()->softirq_pending) +#define set_softirq_pending(x) (get_lowcore()->softirq_pending = (x)) +#define or_softirq_pending(x) (get_lowcore()->softirq_pending |= (x)) #define __ARCH_IRQ_STAT #define __ARCH_IRQ_EXIT_IRQS_DISABLED diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index a7789a9f6218..d56eb0a1f37b 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -76,9 +76,9 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct * int cpu = smp_processor_id(); if (next == &init_mm) - S390_lowcore.user_asce = s390_invalid_asce; + get_lowcore()->user_asce = s390_invalid_asce; else - S390_lowcore.user_asce.val = next->context.asce; + get_lowcore()->user_asce.val = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR7 */ local_ctl_load(7, &s390_invalid_asce); @@ -111,7 +111,7 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - local_ctl_load(7, &S390_lowcore.user_asce); + local_ctl_load(7, &get_lowcore()->user_asce); } #define activate_mm activate_mm @@ -120,7 +120,7 @@ static inline void activate_mm(struct mm_struct *prev, { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - local_ctl_load(7, &S390_lowcore.user_asce); + local_ctl_load(7, &get_lowcore()->user_asce); } #include diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 8e1dd389ae88..25f2077ba3c9 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -55,11 +55,11 @@ static __always_inline void pai_kernel_enter(struct pt_regs *regs) return; if (!static_branch_unlikely(&pai_key)) return; - if (!S390_lowcore.ccd) + if (!get_lowcore()->ccd) return; if (!user_mode(regs)) return; - WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET); + WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd | PAI_CRYPTO_KERNEL_OFFSET); } static __always_inline void pai_kernel_exit(struct pt_regs *regs) @@ -68,11 +68,11 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs) return; if (!static_branch_unlikely(&pai_key)) return; - if (!S390_lowcore.ccd) + if (!get_lowcore()->ccd) return; if (!user_mode(regs)) return; - WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET); + WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd & ~PAI_CRYPTO_KERNEL_OFFSET); } #define PAI_SAVE_AREA(x) ((x)->hw.event_base) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 264095dd84bc..89a28740b6ab 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -9,7 +9,7 @@ * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. */ -#define __my_cpu_offset S390_lowcore.percpu_offset +#define __my_cpu_offset get_lowcore()->percpu_offset /* * For 64 bit module code, the module may be more than 4G above the diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 0e3da500e98c..3ae5f31c665d 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -14,7 +14,7 @@ static __always_inline int preempt_count(void) { - return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; + return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) @@ -22,26 +22,26 @@ static __always_inline void preempt_count_set(int pc) int old, new; do { - old = READ_ONCE(S390_lowcore.preempt_count); + old = READ_ONCE(get_lowcore()->preempt_count); new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (__atomic_cmpxchg(&S390_lowcore.preempt_count, + } while (__atomic_cmpxchg(&get_lowcore()->preempt_count, old, new) != old); } static __always_inline void set_preempt_need_resched(void) { - __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); + __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count); } static __always_inline void clear_preempt_need_resched(void) { - __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); + __atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count); } static __always_inline bool test_preempt_need_resched(void) { - return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); + return !(READ_ONCE(get_lowcore()->preempt_count) & PREEMPT_NEED_RESCHED); } static __always_inline void __preempt_count_add(int val) @@ -52,11 +52,11 @@ static __always_inline void __preempt_count_add(int val) */ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { - __atomic_add_const(val, &S390_lowcore.preempt_count); + __atomic_add_const(val, &get_lowcore()->preempt_count); return; } } - __atomic_add(val, &S390_lowcore.preempt_count); + __atomic_add(val, &get_lowcore()->preempt_count); } static __always_inline void __preempt_count_sub(int val) @@ -66,12 +66,12 @@ static __always_inline void __preempt_count_sub(int val) static __always_inline bool __preempt_count_dec_and_test(void) { - return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; + return __atomic_add(-1, &get_lowcore()->preempt_count) == 1; } static __always_inline bool should_resched(int preempt_offset) { - return unlikely(READ_ONCE(S390_lowcore.preempt_count) == + return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset); } @@ -81,12 +81,12 @@ static __always_inline bool should_resched(int preempt_offset) static __always_inline int preempt_count(void) { - return READ_ONCE(S390_lowcore.preempt_count); + return READ_ONCE(get_lowcore()->preempt_count); } static __always_inline void preempt_count_set(int pc) { - S390_lowcore.preempt_count = pc; + get_lowcore()->preempt_count = pc; } static __always_inline void set_preempt_need_resched(void) @@ -104,17 +104,17 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - S390_lowcore.preempt_count += val; + get_lowcore()->preempt_count += val; } static __always_inline void __preempt_count_sub(int val) { - S390_lowcore.preempt_count -= val; + get_lowcore()->preempt_count -= val; } static __always_inline bool __preempt_count_dec_and_test(void) { - return !--S390_lowcore.preempt_count && tif_need_resched(); + return !--get_lowcore()->preempt_count && tif_need_resched(); } static __always_inline bool should_resched(int preempt_offset) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 07ad5a1df878..c87cf2b8e81a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -46,17 +46,17 @@ typedef long (*sys_call_ptr_t)(struct pt_regs *regs); static __always_inline void set_cpu_flag(int flag) { - S390_lowcore.cpu_flags |= (1UL << flag); + get_lowcore()->cpu_flags |= (1UL << flag); } static __always_inline void clear_cpu_flag(int flag) { - S390_lowcore.cpu_flags &= ~(1UL << flag); + get_lowcore()->cpu_flags &= ~(1UL << flag); } static __always_inline bool test_cpu_flag(int flag) { - return S390_lowcore.cpu_flags & (1UL << flag); + return get_lowcore()->cpu_flags & (1UL << flag); } static __always_inline bool test_and_set_cpu_flag(int flag) @@ -269,7 +269,7 @@ static __always_inline unsigned long __current_stack_pointer(void) static __always_inline bool on_thread_stack(void) { - unsigned long ksp = S390_lowcore.kernel_stack; + unsigned long ksp = get_lowcore()->kernel_stack; return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); } diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 32f70873e2b7..8505737712ee 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -77,24 +77,24 @@ extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; -#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) -#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) +#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM) +#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM) +#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR) -#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) -#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) -#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) -#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) -#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) -#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) -#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST) -#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) -#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) -#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) -#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO) -#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP) +#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C) +#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP) +#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE) +#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1) +#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2) +#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE) +#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC) +#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST) +#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS) +#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC) +#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO) +#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 6e5b1b4b19a9..0b1ed637bfd6 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -11,7 +11,7 @@ #include #include -#define raw_smp_processor_id() (S390_lowcore.cpu_nr) +#define raw_smp_processor_id() (get_lowcore()->cpu_nr) extern struct mutex smp_cpu_state_mutex; extern unsigned int smp_cpu_mt_shift; diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h index 1ac5115d3115..42d61296bbad 100644 --- a/arch/s390/include/asm/softirq_stack.h +++ b/arch/s390/include/asm/softirq_stack.h @@ -8,7 +8,7 @@ #ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static inline void do_softirq_own_stack(void) { - call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); + call_on_stack(0, get_lowcore()->async_stack, void, __do_softirq); } #endif #endif /* __ASM_S390_SOFTIRQ_STACK_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 37127cd7749e..3e43c90ff135 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -16,7 +16,7 @@ #include #include -#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) +#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval) extern int spin_retry; diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 4d646659a5f5..640901f2fbc3 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -161,16 +161,16 @@ static inline unsigned long local_tick_disable(void) { unsigned long old; - old = S390_lowcore.clock_comparator; - S390_lowcore.clock_comparator = clock_comparator_max; - set_clock_comparator(S390_lowcore.clock_comparator); + old = get_lowcore()->clock_comparator; + get_lowcore()->clock_comparator = clock_comparator_max; + set_clock_comparator(get_lowcore()->clock_comparator); return old; } static inline void local_tick_enable(unsigned long comp) { - S390_lowcore.clock_comparator = comp; - set_clock_comparator(S390_lowcore.clock_comparator); + get_lowcore()->clock_comparator = comp; + set_clock_comparator(get_lowcore()->clock_comparator); } #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index 561c91c1a87c..ef4dd7d057a2 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -4,16 +4,16 @@ static inline void update_timer_sys(void) { - S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; - S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer; - S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; + get_lowcore()->system_timer += get_lowcore()->last_update_timer - get_lowcore()->exit_timer; + get_lowcore()->user_timer += get_lowcore()->exit_timer - get_lowcore()->sys_enter_timer; + get_lowcore()->last_update_timer = get_lowcore()->sys_enter_timer; } static inline void update_timer_mcck(void) { - S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer; - S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer; - S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer; + get_lowcore()->system_timer += get_lowcore()->last_update_timer - get_lowcore()->exit_timer; + get_lowcore()->user_timer += get_lowcore()->exit_timer - get_lowcore()->mcck_enter_timer; + get_lowcore()->last_update_timer = get_lowcore()->mcck_enter_timer; } #endif /* _S390_VTIME_H */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index d2012635b093..1ecd0580561f 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -61,28 +61,28 @@ static bool in_task_stack(unsigned long sp, struct task_struct *task, static bool in_irq_stack(unsigned long sp, struct stack_info *info) { - unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET; + unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET; return in_stack(sp, info, STACK_TYPE_IRQ, stack); } static bool in_nodat_stack(unsigned long sp, struct stack_info *info) { - unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET; + unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET; return in_stack(sp, info, STACK_TYPE_NODAT, stack); } static bool in_mcck_stack(unsigned long sp, struct stack_info *info) { - unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET; + unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET; return in_stack(sp, info, STACK_TYPE_MCCK, stack); } static bool in_restart_stack(unsigned long sp, struct stack_info *info) { - unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET; + unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET; return in_stack(sp, info, STACK_TYPE_RESTART, stack); } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c666271433fb..467ed4dba817 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -72,7 +72,7 @@ static void __init reset_tod_clock(void) memset(&tod_clock_base, 0, sizeof(tod_clock_base)); tod_clock_base.tod = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; } /* @@ -99,7 +99,7 @@ static noinline __init void detect_machine_type(void) /* Check current-configuration-level */ if (stsi(NULL, 0, 0, 0) <= 2) { - S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; + get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; return; } /* Get virtual-machine cpu information. */ @@ -108,9 +108,9 @@ static noinline __init void detect_machine_type(void) /* Detect known hypervisors */ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; + get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - S390_lowcore.machine_flags |= MACHINE_FLAG_VM; + get_lowcore()->machine_flags |= MACHINE_FLAG_VM; } /* Remove leading, trailing and double whitespace. */ @@ -166,7 +166,7 @@ static __init void setup_topology(void) if (!test_facility(11)) return; - S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -186,8 +186,8 @@ static noinline __init void setup_lowcore_early(void) psw.addr = (unsigned long)early_pgm_check_handler; psw.mask = PSW_KERNEL_BITS; - S390_lowcore.program_new_psw = psw; - S390_lowcore.preempt_count = INIT_PREEMPT_COUNT; + get_lowcore()->program_new_psw = psw; + get_lowcore()->preempt_count = INIT_PREEMPT_COUNT; } static noinline __init void setup_facility_list(void) @@ -211,43 +211,43 @@ static __init void detect_diag9c(void) EX_TABLE(0b,1b) : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; + get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; } static __init void detect_machine_facilities(void) { if (test_facility(8)) { - S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; + get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; system_ctl_set_bit(0, CR0_EDAT_BIT); } if (test_facility(78)) - S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; + get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; if (test_facility(3)) - S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; + get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(50) && test_facility(73)) { - S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + get_lowcore()->machine_flags |= MACHINE_FLAG_TE; system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); } if (test_facility(51)) - S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; + get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; if (test_facility(129)) system_ctl_set_bit(0, CR0_VECTOR_BIT); if (test_facility(130)) - S390_lowcore.machine_flags |= MACHINE_FLAG_NX; + get_lowcore()->machine_flags |= MACHINE_FLAG_NX; if (test_facility(133)) - S390_lowcore.machine_flags |= MACHINE_FLAG_GS; + get_lowcore()->machine_flags |= MACHINE_FLAG_GS; if (test_facility(139) && (tod_clock_base.tod >> 63)) { /* Enabled signed clock comparator comparisons */ - S390_lowcore.machine_flags |= MACHINE_FLAG_SCC; + get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; clock_comparator_max = -1ULL >> 1; system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); } if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; + get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; /* the control bit is set during PCI initialization */ } if (test_facility(194)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RDP; + get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index af9c97c0ad73..2c34e02ae64b 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -34,13 +34,13 @@ void account_idle_time_irq(void) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } - idle_time = S390_lowcore.int_clock - idle->clock_idle_enter; + idle_time = get_lowcore()->int_clock - idle->clock_idle_enter; - S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = S390_lowcore.int_clock; + get_lowcore()->steal_timer += idle->clock_idle_enter - get_lowcore()->last_update_clock; + get_lowcore()->last_update_clock = get_lowcore()->int_clock; - S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; - S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; + get_lowcore()->system_timer += get_lowcore()->last_update_timer - idle->timer_idle_enter; + get_lowcore()->last_update_timer = get_lowcore()->sys_enter_timer; /* Account time spent with enabled wait psw loaded as idle time. */ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 9acc6630abd3..1af5a08d72ab 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -100,8 +100,8 @@ static const struct irq_class irqclass_sub_desc[] = { static void do_IRQ(struct pt_regs *regs, int irq) { - if (tod_after_eq(S390_lowcore.int_clock, - S390_lowcore.clock_comparator)) + if (tod_after_eq(get_lowcore()->int_clock, + get_lowcore()->clock_comparator)) /* Serve timer interrupts first. */ clock_comparator_work(); generic_handle_irq(irq); @@ -111,7 +111,7 @@ static int on_async_stack(void) { unsigned long frame = current_frame_address(); - return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0; + return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0; } static void do_irq_async(struct pt_regs *regs, int irq) @@ -119,7 +119,7 @@ static void do_irq_async(struct pt_regs *regs, int irq) if (on_async_stack()) { do_IRQ(regs, irq); } else { - call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ, + call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ, struct pt_regs *, regs, int, irq); } } @@ -153,8 +153,8 @@ void noinstr do_io_irq(struct pt_regs *regs) set_cpu_flag(CIF_NOHZ_DELAY); do { - regs->tpi_info = S390_lowcore.tpi_info; - if (S390_lowcore.tpi_info.adapter_IO) + regs->tpi_info = get_lowcore()->tpi_info; + if (get_lowcore()->tpi_info.adapter_IO) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); @@ -183,9 +183,9 @@ void noinstr do_ext_irq(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - regs->int_code = S390_lowcore.ext_int_code_addr; - regs->int_parm = S390_lowcore.ext_params; - regs->int_parm_long = S390_lowcore.ext_params2; + regs->int_code = get_lowcore()->ext_int_code_addr; + regs->int_parm = get_lowcore()->ext_params; + regs->int_parm_long = get_lowcore()->ext_params2; from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3aee98efc374..f4cf65da6d49 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -52,7 +52,7 @@ static void __do_machine_kdump(void *data) purgatory = (purgatory_t)image->start; /* store_status() saved the prefix register to lowcore */ - prefix = (unsigned long) S390_lowcore.prefixreg_save_area; + prefix = (unsigned long)get_lowcore()->prefixreg_save_area; /* Now do the reset */ s390_reset_system(); @@ -91,7 +91,7 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ - mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); + mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); if (MACHINE_HAS_GS) { diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 230d010bac9b..db562416d728 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -125,7 +125,7 @@ static notrace void s390_handle_damage(void) smp_emergency_stop(); diag_amode31_ops.diag308_reset(); ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); - u64_to_hex(ptr, S390_lowcore.mcck_interruption_code); + u64_to_hex(ptr, get_lowcore()->mcck_interruption_code); /* * Disable low address protection and make machine check new PSW a @@ -135,17 +135,17 @@ static notrace void s390_handle_damage(void) cr0_new = cr0; cr0_new.lap = 0; local_ctl_load(0, &cr0_new.reg); - psw_save = S390_lowcore.mcck_new_psw; - psw_bits(S390_lowcore.mcck_new_psw).io = 0; - psw_bits(S390_lowcore.mcck_new_psw).ext = 0; - psw_bits(S390_lowcore.mcck_new_psw).wait = 1; + psw_save = get_lowcore()->mcck_new_psw; + psw_bits(get_lowcore()->mcck_new_psw).io = 0; + psw_bits(get_lowcore()->mcck_new_psw).ext = 0; + psw_bits(get_lowcore()->mcck_new_psw).wait = 1; sclp_emergency_printk(message); /* * Restore machine check new PSW and control register 0 to original * values. This makes possible system dump analysis easier. */ - S390_lowcore.mcck_new_psw = psw_save; + get_lowcore()->mcck_new_psw = psw_save; local_ctl_load(0, &cr0.reg); disabled_wait(); while (1); @@ -226,7 +226,7 @@ static bool notrace nmi_registers_valid(union mci mci) /* * Set the clock comparator register to the next expected value. */ - set_clock_comparator(S390_lowcore.clock_comparator); + set_clock_comparator(get_lowcore()->clock_comparator); if (!mci.gr || !mci.fp || !mci.fc) return false; /* @@ -252,7 +252,7 @@ static bool notrace nmi_registers_valid(union mci mci) * check handling must take care of this. The host values are saved by * KVM and are not affected. */ - cr2.reg = S390_lowcore.cregs_save_area[2]; + cr2.reg = get_lowcore()->cregs_save_area[2]; if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST)) return false; if (!mci.ms || !mci.pm || !mci.ia) @@ -278,11 +278,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs) sie_page = container_of(sie_block, struct sie_page, sie_block); mcck_backup = &sie_page->mcck_info; - mcck_backup->mcic = S390_lowcore.mcck_interruption_code & + mcck_backup->mcic = get_lowcore()->mcck_interruption_code & ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE); - mcck_backup->ext_damage_code = S390_lowcore.external_damage_code; - mcck_backup->failing_storage_address - = S390_lowcore.failing_storage_address; + mcck_backup->ext_damage_code = get_lowcore()->external_damage_code; + mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address; } NOKPROBE_SYMBOL(s390_backup_mcck_info); @@ -314,7 +313,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (user_mode(regs)) update_timer_mcck(); inc_irq_stat(NMI_NMI); - mci.val = S390_lowcore.mcck_interruption_code; + mci.val = get_lowcore()->mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); /* @@ -382,9 +381,9 @@ void notrace s390_do_machine_check(struct pt_regs *regs) } if (mci.ed && mci.ec) { /* External damage */ - if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) + if (get_lowcore()->external_damage_code & (1U << ED_STP_SYNC)) mcck->stp_queue |= stp_sync_check(); - if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) + if (get_lowcore()->external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); mcck_pending = 1; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 06efad5b4f93..736c1d9632dd 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1022,7 +1022,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) } /* Load current program parameter */ - lpp(&S390_lowcore.lpp); + lpp(&get_lowcore()->lpp); debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i " "interval %#lx tear %#lx dear %#lx\n", __func__, diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 95079a289109..2f5a20e300f6 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -372,7 +372,7 @@ static int paicrypt_add(struct perf_event *event, int flags) if (++cpump->active_events == 1) { ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET; - WRITE_ONCE(S390_lowcore.ccd, ccd); + WRITE_ONCE(get_lowcore()->ccd, ccd); local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); } if (flags & PERF_EF_START) @@ -409,7 +409,7 @@ static void paicrypt_del(struct perf_event *event, int flags) paicrypt_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); - WRITE_ONCE(S390_lowcore.ccd, 0); + WRITE_ONCE(get_lowcore()->ccd, 0); } } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 6040f3c2b942..6295531b39a2 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -389,7 +389,7 @@ static int paiext_add(struct perf_event *event, int flags) struct paiext_cb *pcb = cpump->paiext_cb; if (++cpump->active_events == 1) { - S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); + get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb); pcb->acc = virt_to_phys(cpump->area) | 0x1; /* Enable CPU instruction lookup for PAIE1 control block */ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT); @@ -431,7 +431,7 @@ static void paiext_del(struct perf_event *event, int flags) /* Disable CPU instruction lookup for PAIE1 control block */ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); pcb->acc = 0; - S390_lowcore.aicd = 0; + get_lowcore()->aicd = 0; } } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d8740631df4b..9637aee43c40 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -71,10 +71,10 @@ void flush_thread(void) void arch_setup_new_exec(void) { - if (S390_lowcore.current_pid != current->pid) { - S390_lowcore.current_pid = current->pid; + if (get_lowcore()->current_pid != current->pid) { + get_lowcore()->current_pid = current->pid; if (test_facility(40)) - lpp(&S390_lowcore.lpp); + lpp(&get_lowcore()->lpp); } } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 90c2c786bb35..3993f4caf224 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -421,16 +421,16 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = S390_lowcore.machine_flags; - lc->preempt_count = S390_lowcore.preempt_count; + lc->machine_flags = get_lowcore()->machine_flags; + lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); - lc->sys_enter_timer = S390_lowcore.sys_enter_timer; - lc->exit_timer = S390_lowcore.exit_timer; - lc->user_timer = S390_lowcore.user_timer; - lc->system_timer = S390_lowcore.system_timer; - lc->steal_timer = S390_lowcore.steal_timer; - lc->last_update_timer = S390_lowcore.last_update_timer; - lc->last_update_clock = S390_lowcore.last_update_clock; + lc->sys_enter_timer = get_lowcore()->sys_enter_timer; + lc->exit_timer = get_lowcore()->exit_timer; + lc->user_timer = get_lowcore()->user_timer; + lc->system_timer = get_lowcore()->system_timer; + lc->steal_timer = get_lowcore()->steal_timer; + lc->last_update_timer = get_lowcore()->last_update_timer; + lc->last_update_clock = get_lowcore()->last_update_clock; /* * Allocate the global restart stack which is the same for * all CPUs in case *one* of them does a PSW restart. @@ -439,7 +439,7 @@ static void __init setup_lowcore(void) lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET; lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET; lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET; - lc->kernel_stack = S390_lowcore.kernel_stack; + lc->kernel_stack = get_lowcore()->kernel_stack; /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant * restart data to the absolute zero lowcore. This is necessary if @@ -455,8 +455,8 @@ static void __init setup_lowcore(void) lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; - lc->kernel_asce = S390_lowcore.kernel_asce; - lc->user_asce = S390_lowcore.user_asce; + lc->kernel_asce = get_lowcore()->kernel_asce; + lc->user_asce = get_lowcore()->user_asce; system_ctlreg_init_save_area(lc); abs_lc = get_abs_lowcore(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0324649aae0a..ebe4bc326a6b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -203,7 +203,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) mcck_stack = stack_alloc(); if (!lc || !nodat_stack || !async_stack || !mcck_stack) goto out; - memcpy(lc, &S390_lowcore, 512); + memcpy(lc, get_lowcore(), 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + STACK_INIT_OFFSET; lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; @@ -265,9 +265,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; - lc->kernel_asce = S390_lowcore.kernel_asce; + lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = S390_lowcore.machine_flags; + lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); @@ -407,7 +407,7 @@ void smp_call_ipl_cpu(void (*func)(void *), void *data) struct lowcore *lc = lowcore_ptr[0]; if (pcpu_devices[0].address == stap()) - lc = &S390_lowcore; + lc = get_lowcore(); pcpu_delegate(&pcpu_devices[0], func, data, lc->nodat_stack); @@ -844,13 +844,13 @@ static void smp_start_secondary(void *cpuvoid) { int cpu = raw_smp_processor_id(); - S390_lowcore.last_update_clock = get_tod_clock(); - S390_lowcore.restart_stack = (unsigned long)restart_stack; - S390_lowcore.restart_fn = (unsigned long)do_restart; - S390_lowcore.restart_data = 0; - S390_lowcore.restart_source = -1U; - S390_lowcore.restart_flags = 0; - restore_access_regs(S390_lowcore.access_regs_save_area); + get_lowcore()->last_update_clock = get_tod_clock(); + get_lowcore()->restart_stack = (unsigned long)restart_stack; + get_lowcore()->restart_fn = (unsigned long)do_restart; + get_lowcore()->restart_data = 0; + get_lowcore()->restart_source = -1U; + get_lowcore()->restart_flags = 0; + restore_access_regs(get_lowcore()->access_regs_save_area); cpu_init(); rcutree_report_cpu_starting(cpu); init_cpu_timer(); @@ -981,16 +981,16 @@ void __init smp_prepare_boot_cpu(void) WARN_ON(!cpu_present(0) || !cpu_online(0)); pcpu->state = CPU_STATE_CONFIGURED; - S390_lowcore.percpu_offset = __per_cpu_offset[0]; + get_lowcore()->percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } void __init smp_setup_processor_id(void) { pcpu_devices[0].address = stap(); - S390_lowcore.cpu_nr = 0; - S390_lowcore.spinlock_lockval = arch_spin_lockval(0); - S390_lowcore.spinlock_index = 0; + get_lowcore()->cpu_nr = 0; + get_lowcore()->spinlock_lockval = arch_spin_lockval(0); + get_lowcore()->spinlock_index = 0; } /* diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index dc2355c623d6..7b9bf0ad364d 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -151,8 +151,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { add_random_kstack_offset(); enter_from_user_mode(regs); - regs->psw = S390_lowcore.svc_old_psw; - regs->int_code = S390_lowcore.svc_int_code; + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; update_timer_sys(); if (static_branch_likely(&cpu_has_bear)) current->thread.last_break = regs->last_break; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index fb9f31f36628..b713effe0579 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -131,7 +131,7 @@ void clock_comparator_work(void) { struct clock_event_device *cd; - S390_lowcore.clock_comparator = clock_comparator_max; + get_lowcore()->clock_comparator = clock_comparator_max; cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } @@ -139,8 +139,8 @@ void clock_comparator_work(void) static int s390_next_event(unsigned long delta, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_tod_clock() + delta; - set_clock_comparator(S390_lowcore.clock_comparator); + get_lowcore()->clock_comparator = get_tod_clock() + delta; + set_clock_comparator(get_lowcore()->clock_comparator); return 0; } @@ -153,8 +153,8 @@ void init_cpu_timer(void) struct clock_event_device *cd; int cpu; - S390_lowcore.clock_comparator = clock_comparator_max; - set_clock_comparator(S390_lowcore.clock_comparator); + get_lowcore()->clock_comparator = clock_comparator_max; + set_clock_comparator(get_lowcore()->clock_comparator); cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); @@ -184,8 +184,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code, unsigned long param64) { inc_irq_stat(IRQEXT_CLK); - if (S390_lowcore.clock_comparator == clock_comparator_max) - set_clock_comparator(S390_lowcore.clock_comparator); + if (get_lowcore()->clock_comparator == clock_comparator_max) + set_clock_comparator(get_lowcore()->clock_comparator); } static void stp_timing_alert(struct stp_irq_parm *); @@ -408,12 +408,12 @@ static void clock_sync_global(long delta) static void clock_sync_local(long delta) { /* Add the delta to the clock comparator. */ - if (S390_lowcore.clock_comparator != clock_comparator_max) { - S390_lowcore.clock_comparator += delta; - set_clock_comparator(S390_lowcore.clock_comparator); + if (get_lowcore()->clock_comparator != clock_comparator_max) { + get_lowcore()->clock_comparator += delta; + set_clock_comparator(get_lowcore()->clock_comparator); } /* Adjust the last_update_clock time-stamp. */ - S390_lowcore.last_update_clock += delta; + get_lowcore()->last_update_clock += delta; } /* Single threaded workqueue used for stp sync events */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 52578b5cecbd..8b904f7efb0e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -293,10 +293,10 @@ void __init trap_init(void) local_irq_save(flags); cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); - psw_bits(S390_lowcore.external_new_psw).mcheck = 1; - psw_bits(S390_lowcore.program_new_psw).mcheck = 1; - psw_bits(S390_lowcore.svc_new_psw).mcheck = 1; - psw_bits(S390_lowcore.io_new_psw).mcheck = 1; + psw_bits(get_lowcore()->external_new_psw).mcheck = 1; + psw_bits(get_lowcore()->program_new_psw).mcheck = 1; + psw_bits(get_lowcore()->svc_new_psw).mcheck = 1; + psw_bits(get_lowcore()->io_new_psw).mcheck = 1; local_ctl_load(0, &cr0); local_irq_restore(flags); local_mcck_enable(); @@ -310,8 +310,8 @@ void noinstr __do_pgm_check(struct pt_regs *regs) unsigned int trapnr; irqentry_state_t state; - regs->int_code = S390_lowcore.pgm_int_code; - regs->int_parm_long = S390_lowcore.trans_exc_code; + regs->int_code = get_lowcore()->pgm_int_code; + regs->int_parm_long = get_lowcore()->trans_exc_code; state = irqentry_enter(regs); @@ -324,19 +324,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - if (S390_lowcore.pgm_code & 0x0200) { + if (get_lowcore()->pgm_code & 0x0200) { /* transaction abort */ - current->thread.trap_tdb = S390_lowcore.pgm_tdb; + current->thread.trap_tdb = get_lowcore()->pgm_tdb; } - if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) { + if (get_lowcore()->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; set_thread_flag(TIF_PER_TRAP); - ev->address = S390_lowcore.per_address; - ev->cause = S390_lowcore.per_code_combined; - ev->paid = S390_lowcore.per_access_id; + ev->address = get_lowcore()->per_address; + ev->cause = get_lowcore()->per_code_combined; + ev->paid = get_lowcore()->per_access_id; } else { /* PER event in kernel is kprobes */ __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index ffc1db0cbf9c..7d8991c3cd3a 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -41,8 +41,8 @@ static inline void set_vtimer(u64 expires) " stpt %0\n" /* Store current cpu timer value */ " spt %1" /* Set new value imm. afterwards */ : "=Q" (timer) : "Q" (expires)); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; + get_lowcore()->system_timer += get_lowcore()->last_update_timer - timer; + get_lowcore()->last_update_timer = expires; } static inline int virt_timer_forward(u64 elapsed) @@ -118,21 +118,21 @@ static int do_account_vtime(struct task_struct *tsk) { u64 timer, clock, user, guest, system, hardirq, softirq; - timer = S390_lowcore.last_update_timer; - clock = S390_lowcore.last_update_clock; + timer = get_lowcore()->last_update_timer; + clock = get_lowcore()->last_update_clock; asm volatile( " stpt %0\n" /* Store current cpu timer value */ " stckf %1" /* Store current tod clock value */ - : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock) + : "=Q" (get_lowcore()->last_update_timer), + "=Q" (get_lowcore()->last_update_clock) : : "cc"); - clock = S390_lowcore.last_update_clock - clock; - timer -= S390_lowcore.last_update_timer; + clock = get_lowcore()->last_update_clock - clock; + timer -= get_lowcore()->last_update_timer; if (hardirq_count()) - S390_lowcore.hardirq_timer += timer; + get_lowcore()->hardirq_timer += timer; else - S390_lowcore.system_timer += timer; + get_lowcore()->system_timer += timer; /* Update MT utilization calculation */ if (smp_cpu_mtid && @@ -141,16 +141,16 @@ static int do_account_vtime(struct task_struct *tsk) /* Calculate cputime delta */ user = update_tsk_timer(&tsk->thread.user_timer, - READ_ONCE(S390_lowcore.user_timer)); + READ_ONCE(get_lowcore()->user_timer)); guest = update_tsk_timer(&tsk->thread.guest_timer, - READ_ONCE(S390_lowcore.guest_timer)); + READ_ONCE(get_lowcore()->guest_timer)); system = update_tsk_timer(&tsk->thread.system_timer, - READ_ONCE(S390_lowcore.system_timer)); + READ_ONCE(get_lowcore()->system_timer)); hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, - READ_ONCE(S390_lowcore.hardirq_timer)); + READ_ONCE(get_lowcore()->hardirq_timer)); softirq = update_tsk_timer(&tsk->thread.softirq_timer, - READ_ONCE(S390_lowcore.softirq_timer)); - S390_lowcore.steal_timer += + READ_ONCE(get_lowcore()->softirq_timer)); + get_lowcore()->steal_timer += clock - user - guest - system - hardirq - softirq; /* Push account value */ @@ -177,16 +177,16 @@ static int do_account_vtime(struct task_struct *tsk) void vtime_task_switch(struct task_struct *prev) { do_account_vtime(prev); - prev->thread.user_timer = S390_lowcore.user_timer; - prev->thread.guest_timer = S390_lowcore.guest_timer; - prev->thread.system_timer = S390_lowcore.system_timer; - prev->thread.hardirq_timer = S390_lowcore.hardirq_timer; - prev->thread.softirq_timer = S390_lowcore.softirq_timer; - S390_lowcore.user_timer = current->thread.user_timer; - S390_lowcore.guest_timer = current->thread.guest_timer; - S390_lowcore.system_timer = current->thread.system_timer; - S390_lowcore.hardirq_timer = current->thread.hardirq_timer; - S390_lowcore.softirq_timer = current->thread.softirq_timer; + prev->thread.user_timer = get_lowcore()->user_timer; + prev->thread.guest_timer = get_lowcore()->guest_timer; + prev->thread.system_timer = get_lowcore()->system_timer; + prev->thread.hardirq_timer = get_lowcore()->hardirq_timer; + prev->thread.softirq_timer = get_lowcore()->softirq_timer; + get_lowcore()->user_timer = current->thread.user_timer; + get_lowcore()->guest_timer = current->thread.guest_timer; + get_lowcore()->system_timer = current->thread.system_timer; + get_lowcore()->hardirq_timer = current->thread.hardirq_timer; + get_lowcore()->softirq_timer = current->thread.softirq_timer; } /* @@ -201,23 +201,23 @@ void vtime_flush(struct task_struct *tsk) if (do_account_vtime(tsk)) virt_timer_expire(); - steal = S390_lowcore.steal_timer; - avg_steal = S390_lowcore.avg_steal_timer; + steal = get_lowcore()->steal_timer; + avg_steal = get_lowcore()->avg_steal_timer; if ((s64) steal > 0) { - S390_lowcore.steal_timer = 0; + get_lowcore()->steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - S390_lowcore.avg_steal_timer = avg_steal / 2; + get_lowcore()->avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) { - u64 timer = S390_lowcore.last_update_timer; + u64 timer = get_lowcore()->last_update_timer; - S390_lowcore.last_update_timer = get_cpu_timer(); + get_lowcore()->last_update_timer = get_cpu_timer(); - return timer - S390_lowcore.last_update_timer; + return timer - get_lowcore()->last_update_timer; } /* @@ -229,9 +229,9 @@ void vtime_account_kernel(struct task_struct *tsk) u64 delta = vtime_delta(); if (tsk->flags & PF_VCPU) - S390_lowcore.guest_timer += delta; + get_lowcore()->guest_timer += delta; else - S390_lowcore.system_timer += delta; + get_lowcore()->system_timer += delta; virt_timer_forward(delta); } @@ -241,7 +241,7 @@ void vtime_account_softirq(struct task_struct *tsk) { u64 delta = vtime_delta(); - S390_lowcore.softirq_timer += delta; + get_lowcore()->softirq_timer += delta; virt_timer_forward(delta); } @@ -250,7 +250,7 @@ void vtime_account_hardirq(struct task_struct *tsk) { u64 delta = vtime_delta(); - S390_lowcore.hardirq_timer += delta; + get_lowcore()->hardirq_timer += delta; virt_timer_forward(delta); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 82e9631cd9ef..50b77b759042 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4079,7 +4079,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { /* do not poll with more than halt_poll_max_steal percent of steal time */ - if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= + if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >= READ_ONCE(halt_poll_max_steal)) { vcpu->stat.halt_no_poll_steal++; return true; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 81c53440b3e6..0c9a73a18826 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -119,7 +119,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) struct spin_wait *node, *next; int lockval, ix, node_id, tail_id, old, new, owner, count; - ix = S390_lowcore.spinlock_index++; + ix = get_lowcore()->spinlock_index++; barrier(); lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ node = this_cpu_ptr(&spin_wait[ix]); @@ -205,7 +205,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) } out: - S390_lowcore.spinlock_index--; + get_lowcore()->spinlock_index--; } static inline void arch_spin_lock_classic(arch_spinlock_t *lp) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 2848e3fb2ff5..768898dacb92 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -356,7 +356,7 @@ static noinline int unwindme_func2(struct unwindme *u) if (u->flags & UWM_SWITCH_STACK) { local_irq_save(flags); local_mcck_save(mflags); - rc = call_on_stack(1, S390_lowcore.nodat_stack, + rc = call_on_stack(1, get_lowcore()->nodat_stack, int, unwindme_func3, struct unwindme *, u); local_mcck_restore(mflags); local_irq_restore(flags); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 61d8dcd95bbc..c7c269d5c491 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -21,13 +21,13 @@ void debug_user_asce(int exit) local_ctl_store(1, &cr1); local_ctl_store(7, &cr7); - if (cr1.val == S390_lowcore.kernel_asce.val && cr7.val == S390_lowcore.user_asce.val) + if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016lx user: %016lx\n", exit ? "exit" : "entry", cr1.val, cr7.val, - S390_lowcore.kernel_asce.val, S390_lowcore.user_asce.val); + get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index ffd07ed7b4af..45db5f47b22d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -288,7 +288,7 @@ static int pt_dump_init(void) * kernel ASCE. We need this to keep the page table walker functions * from accessing non-existent entries. */ - max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; + max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7cd50ad3b4ad..6b19a33c49c2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -74,7 +74,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs) return USER_FAULT; if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; - gmap = (struct gmap *)S390_lowcore.gmap; + gmap = (struct gmap *)get_lowcore()->gmap; if (gmap && gmap->asce == regs->cr1) return GMAP_FAULT; return KERNEL_FAULT; @@ -182,15 +182,15 @@ static void dump_fault_info(struct pt_regs *regs) pr_cont("mode while using "); switch (get_fault_type(regs)) { case USER_FAULT: - asce = S390_lowcore.user_asce.val; + asce = get_lowcore()->user_asce.val; pr_cont("user "); break; case GMAP_FAULT: - asce = ((struct gmap *)S390_lowcore.gmap)->asce; + asce = ((struct gmap *)get_lowcore()->gmap)->asce; pr_cont("gmap "); break; case KERNEL_FAULT: - asce = S390_lowcore.kernel_asce.val; + asce = get_lowcore()->kernel_asce.val; pr_cont("kernel "); break; default: @@ -351,7 +351,7 @@ static void do_exception(struct pt_regs *regs, int access) mmap_read_lock(mm); gmap = NULL; if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { - gmap = (struct gmap *)S390_lowcore.gmap; + gmap = (struct gmap *)get_lowcore()->gmap; current->thread.gmap_addr = address; current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_int_code = regs->int_code & 0xffff; @@ -522,7 +522,7 @@ void do_secure_storage_access(struct pt_regs *regs) switch (get_fault_type(regs)) { case GMAP_FAULT: mm = current->mm; - gmap = (struct gmap *)S390_lowcore.gmap; + gmap = (struct gmap *)get_lowcore()->gmap; mmap_read_lock(mm); addr = __gmap_translate(gmap, addr); mmap_read_unlock(mm); @@ -563,7 +563,7 @@ NOKPROBE_SYMBOL(do_secure_storage_access); void do_non_secure_storage_access(struct pt_regs *regs) { - struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; unsigned long gaddr = get_fault_address(regs); if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT)) @@ -575,7 +575,7 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access); void do_secure_storage_violation(struct pt_regs *regs) { - struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; unsigned long gaddr = get_fault_address(regs); /* diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index d5a5756dd69f..eb0b51a36be0 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(gmap_remove); */ void gmap_enable(struct gmap *gmap) { - S390_lowcore.gmap = (unsigned long) gmap; + get_lowcore()->gmap = (unsigned long)gmap; } EXPORT_SYMBOL_GPL(gmap_enable); @@ -297,7 +297,7 @@ EXPORT_SYMBOL_GPL(gmap_enable); */ void gmap_disable(struct gmap *gmap) { - S390_lowcore.gmap = 0UL; + get_lowcore()->gmap = 0UL; } EXPORT_SYMBOL_GPL(gmap_disable); @@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(gmap_disable); */ struct gmap *gmap_get_enabled(void) { - return (struct gmap *) S390_lowcore.gmap; + return (struct gmap *)get_lowcore()->gmap; } EXPORT_SYMBOL_GPL(gmap_get_enabled); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 01bc8fad64d6..5f805ad42d4c 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -75,7 +75,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, break; } table = (unsigned long *)((unsigned long)old & mask); - crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val); + crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); } else if (MACHINE_HAS_IDTE) { cspg(old, *old, new); } else { diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index abb629d7e131..07d0fe197dad 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -64,8 +64,8 @@ static void __crst_table_upgrade(void *arg) /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - S390_lowcore.user_asce.val = mm->context.asce; - local_ctl_load(7, &S390_lowcore.user_asce); + get_lowcore()->user_asce.val = mm->context.asce; + local_ctl_load(7, &get_lowcore()->user_asce); } __tlb_flush_local(); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0de0f6e405b5..cff4838fad21 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1064,7 +1064,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO; + get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO; return NULL; } if (!strcmp(str, "force_floating")) { From eafcd205b1a6f32879c347d717b1ce3bdfdcc7c6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:26 +0200 Subject: [PATCH 19/54] s390/drivers: Replace S390_lowcore by get_lowcore() Replace all S390_lowcore usages in drivers/s390 by get_lowcore(). Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_early.c | 4 ++-- drivers/s390/char/sclp_early_core.c | 12 ++++++------ drivers/s390/cio/qdio_main.c | 2 +- drivers/s390/cio/qdio_thinint.c | 2 +- drivers/s390/cio/trace.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 60a247fdb2a7..42986284cc78 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -50,9 +50,9 @@ static void __init sclp_early_facilities_detect(void) sclp.has_aisi = !!(sccb->fac118 & 0x10); sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01); if (sccb->fac85 & 0x02) - S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; + get_lowcore()->machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) - S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; + get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_GUEST; if (sccb->cpuoff > 134) { sclp.has_diag318 = !!(sccb->byte_134 & 0x80); sclp.has_diag320 = !!(sccb->byte_134 & 0x04); diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 9f6165cafdc3..5a5383cceb6f 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -38,11 +38,11 @@ void sclp_early_wait_irq(void) cr0_new.sssm = 1; local_ctl_load(0, &cr0_new.reg); - psw_ext_save = S390_lowcore.external_new_psw; + psw_ext_save = get_lowcore()->external_new_psw; psw_mask = __extract_psw(); - S390_lowcore.external_new_psw.mask = psw_mask; + get_lowcore()->external_new_psw.mask = psw_mask; psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT; - S390_lowcore.ext_int_code = 0; + get_lowcore()->ext_int_code = 0; do { asm volatile( @@ -53,12 +53,12 @@ void sclp_early_wait_irq(void) "0:\n" : [addr] "=&d" (addr), [psw_wait_addr] "=Q" (psw_wait.addr), - [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr) + [psw_ext_addr] "=Q" (get_lowcore()->external_new_psw.addr) : [psw_wait] "Q" (psw_wait) : "cc", "memory"); - } while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG); + } while (get_lowcore()->ext_int_code != EXT_IRQ_SERVICE_SIG); - S390_lowcore.external_new_psw = psw_ext_save; + get_lowcore()->external_new_psw = psw_ext_save; local_ctl_load(0, &cr0.reg); } diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index a1cb39f4b7a2..b711bb17f9da 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -695,7 +695,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) return; qdio_deliver_irq(irq_ptr); - irq_ptr->last_data_irq_time = S390_lowcore.int_clock; + irq_ptr->last_data_irq_time = get_lowcore()->int_clock; } static void qdio_handle_activate_check(struct qdio_irq *irq_ptr, diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index ccd4ed93bd92..f931954910c5 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -99,7 +99,7 @@ static inline u32 clear_shared_ind(void) static void tiqdio_thinint_handler(struct airq_struct *airq, struct tpi_info *tpi_info) { - u64 irq_time = S390_lowcore.int_clock; + u64 irq_time = get_lowcore()->int_clock; u32 si_used = clear_shared_ind(); struct qdio_irq *irq; diff --git a/drivers/s390/cio/trace.h b/drivers/s390/cio/trace.h index a4c5c6736b31..6bca5315ee2a 100644 --- a/drivers/s390/cio/trace.h +++ b/drivers/s390/cio/trace.h @@ -169,7 +169,7 @@ TRACE_EVENT(s390_cio_tpi, else if (addr) __entry->tpi_info = *addr; else - __entry->tpi_info = S390_lowcore.tpi_info; + __entry->tpi_info = get_lowcore()->tpi_info; __entry->cssid = __entry->tpi_info.schid.cssid; __entry->ssid = __entry->tpi_info.schid.ssid; __entry->schno = __entry->tpi_info.schid.sch_no; From bbf786061dbe38135b76f2cde9e72aec624eb710 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:27 +0200 Subject: [PATCH 20/54] s390/boot: Replace S390_lowcore by get_lowcore() Replace all S390_lowcore usages in arch/s390/boot by get_lowcore(). Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 4 ++-- arch/s390/boot/ipl_report.c | 2 +- arch/s390/boot/pgm_check_info.c | 18 +++++++++--------- arch/s390/boot/physmem_info.c | 8 ++++---- arch/s390/boot/startup.c | 12 ++++++------ arch/s390/boot/vmem.c | 12 ++++++------ 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index b24de9aabf7d..a21f301acd29 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -51,11 +51,11 @@ static inline int __diag308(unsigned long subcode, void *addr) : [r1] "+&d" (r1.pair), [reg1] "=&d" (reg1), [reg2] "=&a" (reg2), - "+Q" (S390_lowcore.program_new_psw), + "+Q" (get_lowcore()->program_new_psw), "=Q" (old) : [subcode] "d" (subcode), [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) + [psw_pgm] "a" (&get_lowcore()->program_new_psw) : "cc", "memory"); return r1.odd; } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 1803035e68d2..d00898852a88 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -106,7 +106,7 @@ int read_ipl_report(void) * the IPL parameter list, then align the address to a double * word boundary. */ - tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr; + tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr; pl_hdr = (struct ipl_pl_hdr *) tmp; tmp = (tmp + pl_hdr->len + 7) & -8UL; rl_hdr = (struct ipl_rl_hdr *) tmp; diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index ea96275b0380..5352b3d356da 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -145,22 +145,22 @@ void print_stacktrace(unsigned long sp) void print_pgm_check_info(void) { - unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); + unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; + struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); decompressor_printk("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) decompressor_printk("Kernel command line: %s\n", early_command_line); decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", - S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); + get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); if (kaslr_enabled()) { decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); decompressor_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); } decompressor_printk("PSW : %016lx %016lx (%pS)\n", - S390_lowcore.psw_save_area.mask, - S390_lowcore.psw_save_area.addr, - (void *)S390_lowcore.psw_save_area.addr); + get_lowcore()->psw_save_area.mask, + get_lowcore()->psw_save_area.addr, + (void *)get_lowcore()->psw_save_area.addr); decompressor_printk( " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, @@ -174,8 +174,8 @@ void print_pgm_check_info(void) gpregs[8], gpregs[9], gpregs[10], gpregs[11]); decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(S390_lowcore.gpregs_save_area[15]); + print_stacktrace(get_lowcore()->gpregs_save_area[15]); decompressor_printk("Last Breaking-Event-Address:\n"); - decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, - (void *)S390_lowcore.pgm_last_break); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, + (void *)get_lowcore()->pgm_last_break); } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 0cf79826eef9..4c9ad8258f7e 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -81,11 +81,11 @@ static int __diag260(unsigned long rx1, unsigned long rx2) [reg2] "=&a" (reg2), [rc] "+&d" (rc), [ry] "+&d" (ry), - "+Q" (S390_lowcore.program_new_psw), + "+Q" (get_lowcore()->program_new_psw), "=Q" (old) : [rx] "d" (rx.pair), [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) + [psw_pgm] "a" (&get_lowcore()->program_new_psw) : "cc", "memory"); return rc == 0 ? ry : -1; } @@ -129,10 +129,10 @@ static int tprot(unsigned long addr) : [reg1] "=&d" (reg1), [reg2] "=&a" (reg2), [rc] "+&d" (rc), - "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (get_lowcore()->program_new_psw.addr), "=Q" (old) : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [psw_pgm] "a" (&get_lowcore()->program_new_psw), [addr] "a" (addr) : "cc", "memory"); return rc; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 182aac6a0f77..4d04ab84c20e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -78,10 +78,10 @@ static int cmma_test_essa(void) [reg2] "=&a" (reg2), [rc] "+&d" (rc), [tmp] "=&d" (tmp), - "+Q" (S390_lowcore.program_new_psw), + "+Q" (get_lowcore()->program_new_psw), "=Q" (old) : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [psw_pgm] "a" (&get_lowcore()->program_new_psw), [cmd] "i" (ESSA_GET_STATE) : "cc", "memory"); return rc; @@ -101,10 +101,10 @@ static void cmma_init(void) static void setup_lpp(void) { - S390_lowcore.current_pid = 0; - S390_lowcore.lpp = LPP_MAGIC; + get_lowcore()->current_pid = 0; + get_lowcore()->lpp = LPP_MAGIC; if (test_facility(40)) - lpp(&S390_lowcore.lpp); + lpp(&get_lowcore()->lpp); } #ifdef CONFIG_KERNEL_UNCOMPRESSED @@ -477,7 +477,7 @@ void startup_kernel(void) * Save KASLR offset for early dumps, before vmcore_info is set. * Mark as uneven to distinguish from real vmcore_info pointer. */ - S390_lowcore.vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0; + get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0; /* * Jump to the decompressed kernel entry point and switch DAT mode on. diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 96d48b7112d4..07d697eb279d 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -470,13 +470,13 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l kasan_populate_shadow(kernel_start, kernel_end); - S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; - S390_lowcore.user_asce = s390_invalid_asce; + get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; + get_lowcore()->user_asce = s390_invalid_asce; - local_ctl_load(1, &S390_lowcore.kernel_asce); - local_ctl_load(7, &S390_lowcore.user_asce); - local_ctl_load(13, &S390_lowcore.kernel_asce); + local_ctl_load(1, &get_lowcore()->kernel_asce); + local_ctl_load(7, &get_lowcore()->user_asce); + local_ctl_load(13, &get_lowcore()->kernel_asce); - init_mm.context.asce = S390_lowcore.kernel_asce.val; + init_mm.context.asce = get_lowcore()->kernel_asce.val; init_mm.pgd = init_mm_pgd; } From d7c3ebc49eb72e9c8142fed49e9c9ec21815810c Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:28 +0200 Subject: [PATCH 21/54] s390/nmi: Remove duplicate get_lowcore() calls Assign the output from get_lowcore() to a local variable, so the code is easier to read. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/nmi.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index db562416d728..fbd218b6fc8e 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -117,6 +117,7 @@ static __always_inline char *u64_to_hex(char *dest, u64 val) static notrace void s390_handle_damage(void) { + struct lowcore *lc = get_lowcore(); union ctlreg0 cr0, cr0_new; char message[100]; psw_t psw_save; @@ -125,7 +126,7 @@ static notrace void s390_handle_damage(void) smp_emergency_stop(); diag_amode31_ops.diag308_reset(); ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); - u64_to_hex(ptr, get_lowcore()->mcck_interruption_code); + u64_to_hex(ptr, lc->mcck_interruption_code); /* * Disable low address protection and make machine check new PSW a @@ -135,17 +136,17 @@ static notrace void s390_handle_damage(void) cr0_new = cr0; cr0_new.lap = 0; local_ctl_load(0, &cr0_new.reg); - psw_save = get_lowcore()->mcck_new_psw; - psw_bits(get_lowcore()->mcck_new_psw).io = 0; - psw_bits(get_lowcore()->mcck_new_psw).ext = 0; - psw_bits(get_lowcore()->mcck_new_psw).wait = 1; + psw_save = lc->mcck_new_psw; + psw_bits(lc->mcck_new_psw).io = 0; + psw_bits(lc->mcck_new_psw).ext = 0; + psw_bits(lc->mcck_new_psw).wait = 1; sclp_emergency_printk(message); /* * Restore machine check new PSW and control register 0 to original * values. This makes possible system dump analysis easier. */ - get_lowcore()->mcck_new_psw = psw_save; + lc->mcck_new_psw = psw_save; local_ctl_load(0, &cr0.reg); disabled_wait(); while (1); @@ -301,6 +302,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) static int ipd_count; static DEFINE_SPINLOCK(ipd_lock); static unsigned long long last_ipd; + struct lowcore *lc = get_lowcore(); struct mcck_struct *mcck; unsigned long long tmp; irqentry_state_t irq_state; @@ -313,7 +315,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (user_mode(regs)) update_timer_mcck(); inc_irq_stat(NMI_NMI); - mci.val = get_lowcore()->mcck_interruption_code; + mci.val = lc->mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); /* @@ -381,9 +383,9 @@ void notrace s390_do_machine_check(struct pt_regs *regs) } if (mci.ed && mci.ec) { /* External damage */ - if (get_lowcore()->external_damage_code & (1U << ED_STP_SYNC)) + if (lc->external_damage_code & (1U << ED_STP_SYNC)) mcck->stp_queue |= stp_sync_check(); - if (get_lowcore()->external_damage_code & (1U << ED_STP_ISLAND)) + if (lc->external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); mcck_pending = 1; } From eb28ec2b2e55c58148fa07eb994af11e6099e748 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:29 +0200 Subject: [PATCH 22/54] s390/smp: Remove duplicate get_lowcore() calls Assign the output from get_lowcore() to a local variable, so the code is easier to read. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ebe4bc326a6b..4c2faab56eb6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -842,15 +842,16 @@ void __init smp_detect_cpus(void) */ static void smp_start_secondary(void *cpuvoid) { + struct lowcore *lc = get_lowcore(); int cpu = raw_smp_processor_id(); - get_lowcore()->last_update_clock = get_tod_clock(); - get_lowcore()->restart_stack = (unsigned long)restart_stack; - get_lowcore()->restart_fn = (unsigned long)do_restart; - get_lowcore()->restart_data = 0; - get_lowcore()->restart_source = -1U; - get_lowcore()->restart_flags = 0; - restore_access_regs(get_lowcore()->access_regs_save_area); + lc->last_update_clock = get_tod_clock(); + lc->restart_stack = (unsigned long)restart_stack; + lc->restart_fn = (unsigned long)do_restart; + lc->restart_data = 0; + lc->restart_source = -1U; + lc->restart_flags = 0; + restore_access_regs(lc->access_regs_save_area); cpu_init(); rcutree_report_cpu_starting(cpu); init_cpu_timer(); @@ -987,10 +988,12 @@ void __init smp_prepare_boot_cpu(void) void __init smp_setup_processor_id(void) { + struct lowcore *lc = get_lowcore(); + pcpu_devices[0].address = stap(); - get_lowcore()->cpu_nr = 0; - get_lowcore()->spinlock_lockval = arch_spin_lockval(0); - get_lowcore()->spinlock_index = 0; + lc->cpu_nr = 0; + lc->spinlock_lockval = arch_spin_lockval(0); + lc->spinlock_index = 0; } /* From 46c3031108e83a63007cf2f4c9a379875825e972 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:30 +0200 Subject: [PATCH 23/54] s390/vtime: Remove duplicate get_lowcore() calls Assign the output from get_lowcore() to a local variable, so the code is easier to read. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vtime.h | 16 ++++--- arch/s390/kernel/vtime.c | 78 +++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index ef4dd7d057a2..9d25fb35a042 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -4,16 +4,20 @@ static inline void update_timer_sys(void) { - get_lowcore()->system_timer += get_lowcore()->last_update_timer - get_lowcore()->exit_timer; - get_lowcore()->user_timer += get_lowcore()->exit_timer - get_lowcore()->sys_enter_timer; - get_lowcore()->last_update_timer = get_lowcore()->sys_enter_timer; + struct lowcore *lc = get_lowcore(); + + lc->system_timer += lc->last_update_timer - lc->exit_timer; + lc->user_timer += lc->exit_timer - lc->sys_enter_timer; + lc->last_update_timer = lc->sys_enter_timer; } static inline void update_timer_mcck(void) { - get_lowcore()->system_timer += get_lowcore()->last_update_timer - get_lowcore()->exit_timer; - get_lowcore()->user_timer += get_lowcore()->exit_timer - get_lowcore()->mcck_enter_timer; - get_lowcore()->last_update_timer = get_lowcore()->mcck_enter_timer; + struct lowcore *lc = get_lowcore(); + + lc->system_timer += lc->last_update_timer - lc->exit_timer; + lc->user_timer += lc->exit_timer - lc->mcck_enter_timer; + lc->last_update_timer = lc->mcck_enter_timer; } #endif /* _S390_VTIME_H */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7d8991c3cd3a..234a0ba30510 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -35,14 +35,15 @@ static DEFINE_PER_CPU(u64, mt_scaling_jiffies); static inline void set_vtimer(u64 expires) { + struct lowcore *lc = get_lowcore(); u64 timer; asm volatile( " stpt %0\n" /* Store current cpu timer value */ " spt %1" /* Set new value imm. afterwards */ : "=Q" (timer) : "Q" (expires)); - get_lowcore()->system_timer += get_lowcore()->last_update_timer - timer; - get_lowcore()->last_update_timer = expires; + lc->system_timer += lc->last_update_timer - timer; + lc->last_update_timer = expires; } static inline int virt_timer_forward(u64 elapsed) @@ -117,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime, static int do_account_vtime(struct task_struct *tsk) { u64 timer, clock, user, guest, system, hardirq, softirq; + struct lowcore *lc = get_lowcore(); - timer = get_lowcore()->last_update_timer; - clock = get_lowcore()->last_update_clock; + timer = lc->last_update_timer; + clock = lc->last_update_clock; asm volatile( " stpt %0\n" /* Store current cpu timer value */ " stckf %1" /* Store current tod clock value */ - : "=Q" (get_lowcore()->last_update_timer), - "=Q" (get_lowcore()->last_update_clock) + : "=Q" (lc->last_update_timer), + "=Q" (lc->last_update_clock) : : "cc"); - clock = get_lowcore()->last_update_clock - clock; - timer -= get_lowcore()->last_update_timer; + clock = lc->last_update_clock - clock; + timer -= lc->last_update_timer; if (hardirq_count()) - get_lowcore()->hardirq_timer += timer; + lc->hardirq_timer += timer; else - get_lowcore()->system_timer += timer; + lc->system_timer += timer; /* Update MT utilization calculation */ if (smp_cpu_mtid && @@ -141,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk) /* Calculate cputime delta */ user = update_tsk_timer(&tsk->thread.user_timer, - READ_ONCE(get_lowcore()->user_timer)); + READ_ONCE(lc->user_timer)); guest = update_tsk_timer(&tsk->thread.guest_timer, - READ_ONCE(get_lowcore()->guest_timer)); + READ_ONCE(lc->guest_timer)); system = update_tsk_timer(&tsk->thread.system_timer, - READ_ONCE(get_lowcore()->system_timer)); + READ_ONCE(lc->system_timer)); hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, - READ_ONCE(get_lowcore()->hardirq_timer)); + READ_ONCE(lc->hardirq_timer)); softirq = update_tsk_timer(&tsk->thread.softirq_timer, - READ_ONCE(get_lowcore()->softirq_timer)); - get_lowcore()->steal_timer += + READ_ONCE(lc->softirq_timer)); + lc->steal_timer += clock - user - guest - system - hardirq - softirq; /* Push account value */ @@ -176,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk) void vtime_task_switch(struct task_struct *prev) { + struct lowcore *lc = get_lowcore(); + do_account_vtime(prev); - prev->thread.user_timer = get_lowcore()->user_timer; - prev->thread.guest_timer = get_lowcore()->guest_timer; - prev->thread.system_timer = get_lowcore()->system_timer; - prev->thread.hardirq_timer = get_lowcore()->hardirq_timer; - prev->thread.softirq_timer = get_lowcore()->softirq_timer; - get_lowcore()->user_timer = current->thread.user_timer; - get_lowcore()->guest_timer = current->thread.guest_timer; - get_lowcore()->system_timer = current->thread.system_timer; - get_lowcore()->hardirq_timer = current->thread.hardirq_timer; - get_lowcore()->softirq_timer = current->thread.softirq_timer; + prev->thread.user_timer = lc->user_timer; + prev->thread.guest_timer = lc->guest_timer; + prev->thread.system_timer = lc->system_timer; + prev->thread.hardirq_timer = lc->hardirq_timer; + prev->thread.softirq_timer = lc->softirq_timer; + lc->user_timer = current->thread.user_timer; + lc->guest_timer = current->thread.guest_timer; + lc->system_timer = current->thread.system_timer; + lc->hardirq_timer = current->thread.hardirq_timer; + lc->softirq_timer = current->thread.softirq_timer; } /* @@ -196,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev) */ void vtime_flush(struct task_struct *tsk) { + struct lowcore *lc = get_lowcore(); u64 steal, avg_steal; if (do_account_vtime(tsk)) virt_timer_expire(); - steal = get_lowcore()->steal_timer; - avg_steal = get_lowcore()->avg_steal_timer; + steal = lc->steal_timer; + avg_steal = lc->avg_steal_timer; if ((s64) steal > 0) { - get_lowcore()->steal_timer = 0; + lc->steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - get_lowcore()->avg_steal_timer = avg_steal / 2; + lc->avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) { - u64 timer = get_lowcore()->last_update_timer; + struct lowcore *lc = get_lowcore(); + u64 timer = lc->last_update_timer; - get_lowcore()->last_update_timer = get_cpu_timer(); - - return timer - get_lowcore()->last_update_timer; + lc->last_update_timer = get_cpu_timer(); + return timer - lc->last_update_timer; } /* @@ -226,12 +231,13 @@ static u64 vtime_delta(void) */ void vtime_account_kernel(struct task_struct *tsk) { + struct lowcore *lc = get_lowcore(); u64 delta = vtime_delta(); if (tsk->flags & PF_VCPU) - get_lowcore()->guest_timer += delta; + lc->guest_timer += delta; else - get_lowcore()->system_timer += delta; + lc->system_timer += delta; virt_timer_forward(delta); } From 15428734e1da88c40924e6788403171c8dff7cf8 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:31 +0200 Subject: [PATCH 24/54] s390/idle: Remove duplicate get_lowcore() calls Assign the output from get_lowcore() to a local variable, so the code is easier to read. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/idle.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 2c34e02ae64b..39cb8d0ae348 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -24,6 +24,7 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); void account_idle_time_irq(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct lowcore *lc = get_lowcore(); unsigned long idle_time; u64 cycles_new[8]; int i; @@ -34,13 +35,13 @@ void account_idle_time_irq(void) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } - idle_time = get_lowcore()->int_clock - idle->clock_idle_enter; + idle_time = lc->int_clock - idle->clock_idle_enter; - get_lowcore()->steal_timer += idle->clock_idle_enter - get_lowcore()->last_update_clock; - get_lowcore()->last_update_clock = get_lowcore()->int_clock; + lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock; + lc->last_update_clock = lc->int_clock; - get_lowcore()->system_timer += get_lowcore()->last_update_timer - idle->timer_idle_enter; - get_lowcore()->last_update_timer = get_lowcore()->sys_enter_timer; + lc->system_timer += lc->last_update_timer - idle->timer_idle_enter; + lc->last_update_timer = lc->sys_enter_timer; /* Account time spent with enabled wait psw loaded as idle time. */ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); From 81f907b246f35b4cc5cb38b86b633601b76d17e8 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:32 +0200 Subject: [PATCH 25/54] s390/mm: Remove duplicate get_lowcore() calls Assign the output from get_lowcore() to a local variable, so the code is easier to read. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/traps.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 8b904f7efb0e..a7c211a3a0c9 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -288,15 +288,16 @@ static void __init test_monitor_call(void) void __init trap_init(void) { + struct lowcore *lc = get_lowcore(); unsigned long flags; struct ctlreg cr0; local_irq_save(flags); cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT); - psw_bits(get_lowcore()->external_new_psw).mcheck = 1; - psw_bits(get_lowcore()->program_new_psw).mcheck = 1; - psw_bits(get_lowcore()->svc_new_psw).mcheck = 1; - psw_bits(get_lowcore()->io_new_psw).mcheck = 1; + psw_bits(lc->external_new_psw).mcheck = 1; + psw_bits(lc->program_new_psw).mcheck = 1; + psw_bits(lc->svc_new_psw).mcheck = 1; + psw_bits(lc->io_new_psw).mcheck = 1; local_ctl_load(0, &cr0); local_irq_restore(flags); local_mcck_enable(); @@ -307,11 +308,12 @@ static void (*pgm_check_table[128])(struct pt_regs *regs); void noinstr __do_pgm_check(struct pt_regs *regs) { - unsigned int trapnr; + struct lowcore *lc = get_lowcore(); irqentry_state_t state; + unsigned int trapnr; - regs->int_code = get_lowcore()->pgm_int_code; - regs->int_parm_long = get_lowcore()->trans_exc_code; + regs->int_code = lc->pgm_int_code; + regs->int_parm_long = lc->trans_exc_code; state = irqentry_enter(regs); @@ -324,19 +326,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - if (get_lowcore()->pgm_code & 0x0200) { + if (lc->pgm_code & 0x0200) { /* transaction abort */ - current->thread.trap_tdb = get_lowcore()->pgm_tdb; + current->thread.trap_tdb = lc->pgm_tdb; } - if (get_lowcore()->pgm_code & PGM_INT_CODE_PER) { + if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; set_thread_flag(TIF_PER_TRAP); - ev->address = get_lowcore()->per_address; - ev->cause = get_lowcore()->per_code_combined; - ev->paid = get_lowcore()->per_access_id; + ev->address = lc->per_address; + ev->cause = lc->per_code_combined; + ev->paid = lc->per_access_id; } else { /* PER event in kernel is kprobes */ __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); From 39976f1278a97d79e758ceb59fe613764844c9c3 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 10 Jun 2024 13:45:33 +0200 Subject: [PATCH 26/54] s390: Remove S390_lowcore With all users gone, remove S390_lowcore. Acked-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 94f954250ae0..c724e71e1785 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -213,8 +213,6 @@ struct lowcore { __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed __aligned(8192); -#define S390_lowcore (*((struct lowcore *) 0)) - static __always_inline struct lowcore *get_lowcore(void) { return NULL; From 447360d75cf473d4b1048a7204443262f3f60758 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 10 Jun 2024 17:10:48 +0200 Subject: [PATCH 27/54] s390/sclp: Define commands for storage (un)assignment Replace immediate values with SCLP_CMDW_UN|ASSIGN_STORAGE defines. Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20240610151048.2548428-1-agordeev@linux.ibm.com Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_cmd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 7815e9bea69a..f905a6643a0f 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -31,6 +31,9 @@ #include "sclp.h" +#define SCLP_CMDW_ASSIGN_STORAGE 0x000d0001 +#define SCLP_CMDW_UNASSIGN_STORAGE 0x000c0001 + static void sclp_sync_callback(struct sclp_req *req, void *data) { struct completion *completion = data; @@ -225,7 +228,7 @@ static int sclp_assign_storage(u16 rn) unsigned long long start; int rc; - rc = do_assign_storage(0x000d0001, rn); + rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn); if (rc) return rc; start = rn2addr(rn); @@ -235,7 +238,7 @@ static int sclp_assign_storage(u16 rn) static int sclp_unassign_storage(u16 rn) { - return do_assign_storage(0x000c0001, rn); + return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn); } struct attach_storage_sccb { From 7c67928648188d193c9f73679f7e6aa67d088237 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 9 Jun 2024 21:02:19 +0200 Subject: [PATCH 28/54] s390: Provide optimized __arch_hweight*() implementations Make use of the popcnt instruction to provide optimized __arch_hweight*() implementations. The generated code is shorter and avoids rather expensive functions calls. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 2 +- arch/s390/include/asm/arch_hweight.h | 76 ++++++++++++++++++++++++++++ arch/s390/include/asm/bitops.h | 3 +- 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/asm/arch_hweight.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c59d2b54df49..9788ae5e930b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -21,7 +21,7 @@ config ARCH_PROC_KCORE_TEXT def_bool y config GENERIC_HWEIGHT - def_bool y + def_bool !HAVE_MARCH_Z196_FEATURES config GENERIC_BUG def_bool y if BUG diff --git a/arch/s390/include/asm/arch_hweight.h b/arch/s390/include/asm/arch_hweight.h new file mode 100644 index 000000000000..50e23ce854e5 --- /dev/null +++ b/arch/s390/include/asm/arch_hweight.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_ARCH_HWEIGHT_H +#define _ASM_S390_ARCH_HWEIGHT_H + +#include + +static __always_inline unsigned long popcnt_z196(unsigned long w) +{ + unsigned long cnt; + + asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],0,0" + : [cnt] "=d" (cnt) + : [w] "d" (w) + : "cc"); + return cnt; +} + +static __always_inline unsigned long popcnt_z15(unsigned long w) +{ + unsigned long cnt; + + asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],8,0" + : [cnt] "=d" (cnt) + : [w] "d" (w) + : "cc"); + return cnt; +} + +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES)) + return popcnt_z15(w); + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) { + w = popcnt_z196(w); + w += w >> 32; + w += w >> 16; + w += w >> 8; + return w & 0xff; + } + return __sw_hweight64(w); +} + +static __always_inline unsigned int __arch_hweight32(unsigned int w) +{ + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES)) + return popcnt_z15(w); + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) { + w = popcnt_z196(w); + w += w >> 16; + w += w >> 8; + return w & 0xff; + } + return __sw_hweight32(w); +} + +static __always_inline unsigned int __arch_hweight16(unsigned int w) +{ + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES)) + return popcnt_z15((unsigned short)w); + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) { + w = popcnt_z196(w); + w += w >> 8; + return w & 0xff; + } + return __sw_hweight16(w); +} + +static __always_inline unsigned int __arch_hweight8(unsigned int w) +{ + if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) + return popcnt_z196((unsigned char)w); + return __sw_hweight8(w); +} + +#endif /* _ASM_S390_ARCH_HWEIGHT_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index c467dffa8c12..54a079cd39ed 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -379,8 +379,9 @@ static inline int fls(unsigned int word) return fls64(word); } +#include +#include #include -#include #include #include #include From 37db17c1008c84f972da1ddd7bb10248670e6aa3 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 16 Jun 2024 01:35:37 +0000 Subject: [PATCH 29/54] s390/mm: Get total ram pages from memblock On s390, zero page's size relies on total ram pages. Since we plan to move the accounting into __free_pages_core(), totalram_pages may not represent the total usable pages on system at this point when defer_init is enabled. We can get the total usable pages from memblock directly. The size maybe not accurate due to the alignment, but enough for the calculation. Signed-off-by: Wei Yang CC: Mike Rapoport (IBM) CC: David Hildenbrand Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20240616013537.20338-1-richard.weiyang@gmail.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e769d2726f4e..ddcd39ef4346 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -62,6 +62,7 @@ EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { + unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); unsigned int order; struct page *page; int i; @@ -70,7 +71,7 @@ static void __init setup_zero_pages(void) order = 7; /* Limit number of empty zero pages for small memory sizes */ - while (order > 2 && (totalram_pages() >> 10) < (1UL << order)) + while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); From 68d7bb54215f4b941fc58cfa22e6e0ea54e70f42 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 15 Jun 2024 16:46:50 -0700 Subject: [PATCH 30/54] s390/crc32: Add missing MODULE_DESCRIPTION() macro With ARCH=s390, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/s390/crypto/crc32-vx_s390.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Acked-by: Heiko Carstens Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240615-md-s390-arch-s390-crypto-v1-1-7120d406e7c7@quicinc.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/crypto/crc32-vx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 74f17c905d12..89a10337e6ea 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -297,6 +297,7 @@ module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); module_exit(crc_vx_mod_exit); MODULE_AUTHOR("Hendrik Brueckner "); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32"); From 4657a8a1c0538abc9e841fa64692d2c59edac2c0 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 15 Jun 2024 17:47:26 -0700 Subject: [PATCH 31/54] s390/lib: Add missing MODULE_DESCRIPTION() macros With ARCH=s390, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/s390/lib/test_kprobes_s390.o WARNING: modpost: missing MODULE_DESCRIPTION() in arch/s390/lib/test_unwind.o WARNING: modpost: missing MODULE_DESCRIPTION() in arch/s390/lib/test_modules.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Acked-by: Heiko Carstens Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240615-md-s390-arch-s390-lib-v1-1-d7424b943973@quicinc.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/lib/test_kprobes.c | 1 + arch/s390/lib/test_modules.c | 1 + arch/s390/lib/test_unwind.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c index 9e62d62812e5..9021298c3e8a 100644 --- a/arch/s390/lib/test_kprobes.c +++ b/arch/s390/lib/test_kprobes.c @@ -72,4 +72,5 @@ static struct kunit_suite kprobes_test_suite = { kunit_test_suites(&kprobes_test_suite); +MODULE_DESCRIPTION("KUnit tests for kprobes"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c index 9894009fc1f2..f96b6a3737e7 100644 --- a/arch/s390/lib/test_modules.c +++ b/arch/s390/lib/test_modules.c @@ -29,4 +29,5 @@ static struct kunit_suite modules_test_suite = { kunit_test_suites(&modules_test_suite); +MODULE_DESCRIPTION("KUnit test that modules with many relocations are loaded properly"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 768898dacb92..8b7f981e6f34 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -519,4 +519,5 @@ static struct kunit_suite test_unwind_suite = { kunit_test_suites(&test_unwind_suite); +MODULE_DESCRIPTION("KUnit test for unwind_for_each_frame"); MODULE_LICENSE("GPL"); From 7a6d19c3c78f8cf8e192b6615073ec712fb6f16a Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 15 Jun 2024 18:23:21 -0700 Subject: [PATCH 32/54] s390/mm: Add missing MODULE_DESCRIPTION() macro With ARCH=s390, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/s390/mm/cmm.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Acked-by: Heiko Carstens Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240615-md-s390-arch-s390-mm-v1-1-a360eed8c7c3@quicinc.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/mm/cmm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index f8b13f247646..5cb5e724cde3 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -427,4 +427,5 @@ static void __exit cmm_exit(void) } module_exit(cmm_exit); +MODULE_DESCRIPTION("Cooperative memory management interface"); MODULE_LICENSE("GPL"); From b051271dba2a7568125b0d901825cc134fb85071 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 15 Jun 2024 19:34:50 -0700 Subject: [PATCH 33/54] s390/dcssblk: Add missing MODULE_DESCRIPTION() macro With ARCH=s390, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/s390/block/dcssblk.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Acked-by: Heiko Carstens Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240615-md-s390-drivers-s390-block-dcssblk-v1-1-d9d19703abcb@quicinc.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- drivers/s390/block/dcssblk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6d1689a2717e..995d4c84ed5a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -1032,4 +1032,5 @@ MODULE_PARM_DESC(segments, "Name of DCSS segment(s) to be loaded, " "the contiguous segments - \n" "e.g. segments=\"mydcss1,mydcss2:mydcss3,mydcss4(local)\""); +MODULE_DESCRIPTION("S/390 block driver for DCSS memory"); MODULE_LICENSE("GPL"); From f219af8a3f9e54bad10021defe04cd8b5867cb89 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Sat, 15 Jun 2024 20:01:17 -0700 Subject: [PATCH 34/54] s390/3270: Add missing MODULE_DESCRIPTION() macros With ARCH=s390, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/s390/char/raw3270.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/s390/char/con3270.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/s390/char/fs3270.o Add the missing invocations of the MODULE_DESCRIPTION() macro. Acked-by: Heiko Carstens Signed-off-by: Jeff Johnson Link: https://lore.kernel.org/r/20240615-md-s390-drivers-s390-char-v1-1-d1cd23ff6476@quicinc.com Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- drivers/s390/char/con3270.c | 1 + drivers/s390/char/fs3270.c | 1 + drivers/s390/char/raw3270.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 251d2a1c3eef..053102d0fcd2 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -2185,6 +2185,7 @@ con3270_init(void) console_initcall(con3270_init); #endif +MODULE_DESCRIPTION("IBM/3270 Driver - tty functions"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(IBM_TTY3270_MAJOR); diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 4d824f86bbbb..61515781c5dd 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -559,6 +559,7 @@ static void __exit fs3270_exit(void) __unregister_chrdev(IBM_FS3270_MAJOR, 0, 1, "fs3270"); } +MODULE_DESCRIPTION("IBM/3270 Driver - fullscreen driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(IBM_FS3270_MAJOR); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index c57694be9bd3..ba3d7114b34f 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -1341,6 +1341,7 @@ static void raw3270_exit(void) class_unregister(&class3270); } +MODULE_DESCRIPTION("IBM/3270 Driver - core functions"); MODULE_LICENSE("GPL"); module_init(raw3270_init); From ecec74bbfc6fb1885355aa0337f3452146e8caff Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 20 Jun 2024 14:20:26 +0200 Subject: [PATCH 35/54] s390/sclp: Suppress unnecessary Store Data warning On systems that do not support Store Data events (such as when running as KVM guest) the following warning message appears during boot: sclp_sd: Store Data request failed (eq=2, di=3, response=0x40f0, flags=0x00, status=0, rc=-5) This warning does not add any useful information since the result is expected due to missing support for that event type. Suppress this message by checking the associated masks of supported events before issuing a Store Data event. Suggested-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp_sd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c index f9e164be7568..5aeb7e094a1c 100644 --- a/drivers/s390/char/sclp_sd.c +++ b/drivers/s390/char/sclp_sd.c @@ -194,6 +194,10 @@ static int sclp_sd_sync(unsigned long page, u8 eq, u8 di, u64 sat, u64 sa, struct sclp_sd_evbuf *evbuf; int rc; + if (!sclp_sd_register.sclp_send_mask || + !sclp_sd_register.sclp_receive_mask) + return -EIO; + sclp_sd_listener_init(&listener, __pa(sccb)); sclp_sd_listener_add(&listener); From bf365071ea92b9579d5a272679b74052a5643e35 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 20 Jun 2024 14:20:27 +0200 Subject: [PATCH 36/54] s390/sclp: Prevent release of buffer in I/O When a task waiting for completion of a Store Data operation is interrupted, an attempt is made to halt this operation. If this attempt fails due to a hardware or firmware problem, there is a chance that the SCLP facility might store data into buffers referenced by the original operation at a later time. Handle this situation by not releasing the referenced data buffers if the halt attempt fails. For current use cases, this might result in a leak of few pages of memory in case of a rare hardware/firmware malfunction. Reviewed-by: Heiko Carstens Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp_sd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c index 5aeb7e094a1c..322700b96207 100644 --- a/drivers/s390/char/sclp_sd.c +++ b/drivers/s390/char/sclp_sd.c @@ -324,8 +324,14 @@ static int sclp_sd_store_data(struct sclp_sd_data *result, u8 di) &esize); if (rc) { /* Cancel running request if interrupted */ - if (rc == -ERESTARTSYS) - sclp_sd_sync(page, SD_EQ_HALT, di, 0, 0, NULL, NULL); + if (rc == -ERESTARTSYS) { + if (sclp_sd_sync(page, SD_EQ_HALT, di, 0, 0, NULL, NULL)) { + pr_warn("Could not stop Store Data request - leaking at least %zu bytes\n", + (size_t)dsize * PAGE_SIZE); + data = NULL; + asce = 0; + } + } vfree(data); goto out; } From bb748badfdfb4c6e5c5cb40ded1721f99d32072f Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 20 Jun 2024 14:20:28 +0200 Subject: [PATCH 37/54] s390/sclp: Add timeout to Store Data requests Due to a bug in some firmware versions, Store Data requests might not get an event response in certain situations. As a result, the boot process will be blocked indefinitely. Fix this by introducing timeout handling for Store Data requests. In case a timeout occurs, the Store Data operation is halted and no data is retrieved from the SCLP facility. Note: A minority of installed systems rely on Store Data result for device auto-configuration. These systems will fail to boot in case of a Store Data timeout and will need to be switched to manual device configuration as workaround. Reviewed-by: Heiko Carstens Signed-off-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp_sd.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c index 322700b96207..c2dc9aadb7d2 100644 --- a/drivers/s390/char/sclp_sd.c +++ b/drivers/s390/char/sclp_sd.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include +#include #include #include #include @@ -28,6 +29,8 @@ #define SD_DI_CONFIG 3 +#define SD_TIMEOUT msecs_to_jiffies(30000) + struct sclp_sd_evbuf { struct evbuf_header hdr; u8 eq; @@ -234,9 +237,12 @@ static int sclp_sd_sync(unsigned long page, u8 eq, u8 di, u64 sat, u64 sa, goto out; } if (!(evbuf->rflags & 0x80)) { - rc = wait_for_completion_interruptible(&listener.completion); - if (rc) + rc = wait_for_completion_interruptible_timeout(&listener.completion, SD_TIMEOUT); + if (rc == 0) + rc = -ETIME; + if (rc < 0) goto out; + rc = 0; evbuf = &listener.evbuf; } switch (evbuf->status) { @@ -323,8 +329,8 @@ static int sclp_sd_store_data(struct sclp_sd_data *result, u8 di) rc = sclp_sd_sync(page, SD_EQ_STORE_DATA, di, asce, (u64) data, &dsize, &esize); if (rc) { - /* Cancel running request if interrupted */ - if (rc == -ERESTARTSYS) { + /* Cancel running request if interrupted or timed out */ + if (rc == -ERESTARTSYS || rc == -ETIME) { if (sclp_sd_sync(page, SD_EQ_HALT, di, 0, 0, NULL, NULL)) { pr_warn("Could not stop Store Data request - leaking at least %zu bytes\n", (size_t)dsize * PAGE_SIZE); From d6d1aa519c94367e8cd95d6f17ee4cba354aeb30 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Wed, 26 Jun 2024 10:31:48 +0200 Subject: [PATCH 38/54] s390/topology: Remove CPU KOBJ_CHANGE uevents s390 generates KOBJ_CHANGE uevents on CPUs whenever a topology update occurs. These uevents currently have no users and they are also not present on other architectures. As they are not necessary, remove these extra uevents. Suggested-by: Heiko Carstens Acked-by: Alexander Gordeev Signed-off-by: Mete Durlu Signed-off-by: Alexander Gordeev --- arch/s390/kernel/topology.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 89e91b8ce842..98ef6dc7916b 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -320,16 +320,10 @@ static int __arch_update_cpu_topology(void) int arch_update_cpu_topology(void) { - struct device *dev; - int cpu, rc; + int rc; rc = __arch_update_cpu_topology(); on_each_cpu(__arch_update_dedicated_flag, NULL, 0); - for_each_online_cpu(cpu) { - dev = get_cpu_device(cpu); - if (dev) - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - } return rc; } From 02ee149198b4e4ad1a5b9e4c7806f924609b5fb3 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 25 Jun 2024 17:13:30 +0200 Subject: [PATCH 39/54] s390/pgtable: Make crdte() and cspg() return a value Make the crdte() and cspg() wrappers return a boolean to indicate success, like the other already existing "compare and swap" type of wrappers. Add documentation for those functions as well. Acked-by: Alexander Gordeev Signed-off-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0fd5a9c7e901..b5632dbe5438 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -609,7 +609,15 @@ static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new) : "cc"); } -static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new) +/** + * cspg() - Compare and Swap and Purge (CSPG) + * @ptr: Pointer to the value to be exchanged + * @old: The expected old value + * @new: The new value + * + * Return: True if compare and swap was successful, otherwise false. + */ +static inline bool cspg(unsigned long *ptr, unsigned long old, unsigned long new) { union register_pair r1 = { .even = old, .odd = new, }; unsigned long address = (unsigned long)ptr | 1; @@ -619,6 +627,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new : [r1] "+&d" (r1.pair), "+m" (*ptr) : [address] "d" (address) : "cc"); + return old == r1.even; } #define CRDTE_DTT_PAGE 0x00UL @@ -627,7 +636,18 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new #define CRDTE_DTT_REGION2 0x18UL #define CRDTE_DTT_REGION1 0x1cUL -static inline void crdte(unsigned long old, unsigned long new, +/** + * crdte() - Compare and Replace DAT Table Entry + * @old: The expected old value + * @new: The new value + * @table: Pointer to the value to be exchanged + * @dtt: Table type of the table to be exchanged + * @address: The address mapped by the entry to be replaced + * @asce: The ASCE of this entry + * + * Return: True if compare and replace was successful, otherwise false. + */ +static inline bool crdte(unsigned long old, unsigned long new, unsigned long *table, unsigned long dtt, unsigned long address, unsigned long asce) { @@ -638,6 +658,7 @@ static inline void crdte(unsigned long old, unsigned long new, : [r1] "+&d" (r1.pair) : [r2] "d" (r2.pair), [asce] "a" (asce) : "memory", "cc"); + return old == r1.even; } /* From 5f95843d51e7e0eb0d6ccea074f906214ef06fbc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Jun 2024 10:35:24 +0200 Subject: [PATCH 40/54] s390/hwcaps: Add documentation for HWCAP flags Describe that some HWCAP bits are reserved to avoid that they will be used by accident. Suggested-by: Stefan Liebler Reviewed-by: Stefan Liebler Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/elf.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 70a30ae258b7..8f2c23cc52b6 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -91,6 +91,14 @@ /* Keep this the last entry. */ #define R_390_NUM 61 +/* + * HWCAP flags - for AT_HWCAP + * + * Bits 32-63 are reserved for use by libc. + * Bit 31 is reserved and will be used by libc to determine if a second + * argument is passed to IFUNC resolvers. This will be implemented when + * there is a need for AT_HWCAP2. + */ enum { HWCAP_NR_ESAN3 = 0, HWCAP_NR_ZARCH = 1, From 4a39f12e753d35682a473b2edb4581b65b958d92 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 3 Jul 2024 10:11:59 +0200 Subject: [PATCH 41/54] s390/smp: Switch to GENERIC_CPU_DEVICES Instead of setting up non-boot CPUs early in architecture code, only setup the cpu present mask and let the generic code handle cpu bringup. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/include/asm/smp.h | 2 +- arch/s390/kernel/smp.c | 55 ++++++++++++--------------------- drivers/s390/char/sclp_config.c | 2 +- 4 files changed, 23 insertions(+), 37 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9788ae5e930b..371c2bf88149 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -142,6 +142,7 @@ config S390 select FUNCTION_ALIGNMENT_8B if CC_IS_GCC select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC select GENERIC_ALLOCATOR + select GENERIC_CPU_DEVICES select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 0b1ed637bfd6..c13c79025348 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -59,7 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus) { } -extern int smp_rescan_cpus(void); +extern int smp_rescan_cpus(bool early); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4c2faab56eb6..c3c54adf67bc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,8 +74,6 @@ enum { CPU_STATE_CONFIGURED, }; -static DEFINE_PER_CPU(struct cpu *, cpu_device); - struct pcpu { unsigned long ec_mask; /* bit mask for ec_xxx functions */ unsigned long ec_clk; /* sigp timestamp for ec_xxx */ @@ -719,8 +717,6 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) } } -static int smp_add_present_cpu(int cpu); - static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, bool configured, bool early) { @@ -744,7 +740,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, pcpu->state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); set_cpu_present(cpu, true); - if (!early && smp_add_present_cpu(cpu) != 0) + if (!early && arch_register_cpu(cpu)) set_cpu_present(cpu, false); else nr++; @@ -831,9 +827,6 @@ void __init smp_detect_cpus(void) s_cpus += smp_cpu_mtid + 1; } pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); - - /* Add CPUs present at boot */ - __smp_rescan_cpus(info, true); memblock_free(info, sizeof(*info)); } @@ -974,6 +967,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); system_ctl_set_bit(0, 13); + smp_rescan_cpus(true); } void __init smp_prepare_boot_cpu(void) @@ -1111,35 +1105,34 @@ static struct attribute_group cpu_online_attr_group = { static int smp_cpu_online(unsigned int cpu) { - struct device *s = &per_cpu(cpu_device, cpu)->dev; + struct cpu *c = &per_cpu(cpu_devices, cpu); - return sysfs_create_group(&s->kobj, &cpu_online_attr_group); + return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); } static int smp_cpu_pre_down(unsigned int cpu) { - struct device *s = &per_cpu(cpu_device, cpu)->dev; + struct cpu *c = &per_cpu(cpu_devices, cpu); - sysfs_remove_group(&s->kobj, &cpu_online_attr_group); + sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); return 0; } -static int smp_add_present_cpu(int cpu) +bool arch_cpu_is_hotpluggable(int cpu) { - struct device *s; - struct cpu *c; + return !!cpu; +} + +int arch_register_cpu(int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); int rc; - c = kzalloc(sizeof(*c), GFP_KERNEL); - if (!c) - return -ENOMEM; - per_cpu(cpu_device, cpu) = c; - s = &c->dev; - c->hotpluggable = !!cpu; + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); rc = register_cpu(c, cpu); if (rc) goto out; - rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); + rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group); if (rc) goto out_cpu; rc = topology_cpu_init(c); @@ -1148,14 +1141,14 @@ static int smp_add_present_cpu(int cpu) return 0; out_topology: - sysfs_remove_group(&s->kobj, &cpu_common_attr_group); + sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group); out_cpu: unregister_cpu(c); out: return rc; } -int __ref smp_rescan_cpus(void) +int __ref smp_rescan_cpus(bool early) { struct sclp_core_info *info; int nr; @@ -1164,7 +1157,7 @@ int __ref smp_rescan_cpus(void) if (!info) return -ENOMEM; smp_get_core_info(info, 0); - nr = __smp_rescan_cpus(info, false); + nr = __smp_rescan_cpus(info, early); kfree(info); if (nr) topology_schedule_update(); @@ -1181,7 +1174,7 @@ static ssize_t __ref rescan_store(struct device *dev, rc = lock_device_hotplug_sysfs(); if (rc) return rc; - rc = smp_rescan_cpus(); + rc = smp_rescan_cpus(false); unlock_device_hotplug(); return rc ? rc : count; } @@ -1190,7 +1183,7 @@ static DEVICE_ATTR_WO(rescan); static int __init s390_smp_init(void) { struct device *dev_root; - int cpu, rc = 0; + int rc; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { @@ -1199,17 +1192,9 @@ static int __init s390_smp_init(void) if (rc) return rc; } - - for_each_present_cpu(cpu) { - rc = smp_add_present_cpu(cpu); - if (rc) - goto out; - } - rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", smp_cpu_online, smp_cpu_pre_down); rc = rc <= 0 ? rc : 0; -out: return rc; } subsys_initcall(s390_smp_init); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 10383e936461..f56ea9b60e08 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -60,7 +60,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work) static void __ref sclp_cpu_change_notify(struct work_struct *work) { lock_device_hotplug(); - smp_rescan_cpus(); + smp_rescan_cpus(false); unlock_device_hotplug(); } From f2ed8367bfa55a2ad3adfe7a59b79b82905df740 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Jul 2024 17:04:56 +0200 Subject: [PATCH 42/54] s390/atomic_ops: Use symbolic names Consistently use symbolic names in all atomic ops inline assemblies. Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/atomic_ops.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 7fa5f96a553a..7f47e2927804 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -13,16 +13,16 @@ static __always_inline int __atomic_read(const atomic_t *v) int c; asm volatile( - " l %0,%1\n" - : "=d" (c) : "R" (v->counter)); + " l %[c],%[counter]\n" + : [c] "=d" (c) : [counter] "R" (v->counter)); return c; } static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( - " st %1,%0\n" - : "=R" (v->counter) : "d" (i)); + " st %[i],%[counter]\n" + : [counter] "=R" (v->counter) : [i] "d" (i)); } static __always_inline s64 __atomic64_read(const atomic64_t *v) @@ -30,16 +30,16 @@ static __always_inline s64 __atomic64_read(const atomic64_t *v) s64 c; asm volatile( - " lg %0,%1\n" - : "=d" (c) : "RT" (v->counter)); + " lg %[c],%[counter]\n" + : [c] "=d" (c) : [counter] "RT" (v->counter)); return c; } static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( - " stg %1,%0\n" - : "=RT" (v->counter) : "d" (i)); + " stg %[i],%[counter]\n" + : [counter] "=RT" (v->counter) : [i] "d" (i)); } #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES From ee19370c92f6db4e92e060b5e0c2aa99e4f85408 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Jul 2024 17:04:57 +0200 Subject: [PATCH 43/54] s390/atomic_ops: Improve __atomic_set() for small values Use mvhi/mvghi for small constant values within the __atomic_set() inline assemblies. This avoids loading the specified value into a register. The size of the kernel image is reduced by ~1.2kb. Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/atomic_ops.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 7f47e2927804..b028c5309bef 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,6 +8,8 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ +#include + static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -20,9 +22,15 @@ static __always_inline int __atomic_read(const atomic_t *v) static __always_inline void __atomic_set(atomic_t *v, int i) { - asm volatile( - " st %[i],%[counter]\n" - : [counter] "=R" (v->counter) : [i] "d" (i)); + if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + asm volatile( + " mvhi %[counter], %[i]\n" + : [counter] "=Q" (v->counter) : [i] "K" (i)); + } else { + asm volatile( + " st %[i],%[counter]\n" + : [counter] "=R" (v->counter) : [i] "d" (i)); + } } static __always_inline s64 __atomic64_read(const atomic64_t *v) @@ -37,9 +45,15 @@ static __always_inline s64 __atomic64_read(const atomic64_t *v) static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { - asm volatile( - " stg %[i],%[counter]\n" - : [counter] "=RT" (v->counter) : [i] "d" (i)); + if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + asm volatile( + " mvghi %[counter], %[i]\n" + : [counter] "=Q" (v->counter) : [i] "K" (i)); + } else { + asm volatile( + " stg %[i],%[counter]\n" + : [counter] "=RT" (v->counter) : [i] "d" (i)); + } } #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES From 279a0164e001a87aa7b9852969ed333c3c69b3aa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Jul 2024 17:04:58 +0200 Subject: [PATCH 44/54] s390/atomic_ops: Make use of flag output constraint With gcc 14.1.0 support for flag output constraint was added for s390. Use this for __atomic_cmpxchg_bool(). This allows for slightly better code, since the compiler can generate code depending on the condition code which is the result of an inline assembly. The size of the kernel image is reduced by ~12kb. Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/atomic_ops.h | 50 ++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index b028c5309bef..2b379d1d9046 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -178,6 +178,44 @@ static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) +{ + asm volatile( + " csg %[old],%[new],%[ptr]" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "cc", "memory"); + return old; +} + +#ifdef __GCC_ASM_FLAG_OUTPUTS__ + +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +{ + int cc; + + asm volatile( + " cs %[old],%[new],%[ptr]" + : [old] "+d" (old), [ptr] "+Q" (*ptr), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +{ + int cc; + + asm volatile( + " csg %[old],%[new],%[ptr]" + : [old] "+d" (old), [ptr] "+QS" (*ptr), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +#else /* __GCC_ASM_FLAG_OUTPUTS__ */ + static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -190,16 +228,6 @@ static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) -{ - asm volatile( - " csg %[old],%[new],%[ptr]" - : [old] "+d" (old), [ptr] "+QS" (*ptr) - : [new] "d" (new) - : "cc", "memory"); - return old; -} - static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; @@ -212,4 +240,6 @@ static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long ne return old == old_expected; } +#endif /* __GCC_ASM_FLAG_OUTPUTS__ */ + #endif /* __ARCH_S390_ATOMIC_OPS__ */ From 7455a33179e65267cd7c8910050b6f0be3ff6b83 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:04 +0200 Subject: [PATCH 45/54] s390/sclp: Diag204 busy indication facility detection Detect diag204 busy indication facility. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/sclp.h | 1 + drivers/s390/char/sclp_early.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 5742d23bba13..da3dad18fe50 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -84,6 +84,7 @@ struct sclp_info { unsigned char has_ibs : 1; unsigned char has_skey : 1; unsigned char has_kss : 1; + unsigned char has_diag204_bif : 1; unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 42986284cc78..07df04af82f2 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -53,6 +53,7 @@ static void __init sclp_early_facilities_detect(void) get_lowcore()->machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_GUEST; + sclp.has_diag204_bif = !!(sccb->fac98 & 0x80); if (sccb->cpuoff > 134) { sclp.has_diag318 = !!(sccb->byte_134 & 0x80); sclp.has_diag320 = !!(sccb->byte_134 & 0x04); From bb9be93acb7e6a0fa78919d30e68410c401fe690 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:05 +0200 Subject: [PATCH 46/54] s390/diag: Return errno's from diag204 Return different errno's from diag204 to allow users to handle them accordingly. Instead of returning -1 regardless of the failing condition, return -EINVAL on invalid memory address and -EOPNOTSUPP when diag instruction fails. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/kernel/diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 8dee9aa0ec95..9f33dcecbffa 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -215,16 +215,16 @@ int diag204(unsigned long subcode, unsigned long size, void *addr) { if (addr) { if (WARN_ON_ONCE(!is_vmalloc_addr(addr))) - return -1; + return -EINVAL; if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE))) - return -1; + return -EINVAL; } if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4) addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr)); diag_stat_inc(DIAG_STAT_X204); size = __diag204(&subcode, size, addr); if (subcode) - return -1; + return -EOPNOTSUPP; return size; } EXPORT_SYMBOL(diag204); From df7e714d6d6ca7921be2e5c7d599d9b4aa96d682 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:06 +0200 Subject: [PATCH 47/54] s390/diag: Diag204 add busy return errno When diag204-busy-indication facility is installed, diag204 can return '8' which means device is busy and no operation is done. Add check for return codes of diag204 call. Return error codes according to diag204 return codes. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/kernel/diag.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 9f33dcecbffa..9b65f04c83de 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -185,6 +185,8 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) } EXPORT_SYMBOL(diag14); +#define DIAG204_BUSY_RC 8 + static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) { union register_pair rp = { .even = *subcode, .odd = size }; @@ -223,7 +225,9 @@ int diag204(unsigned long subcode, unsigned long size, void *addr) addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr)); diag_stat_inc(DIAG_STAT_X204); size = __diag204(&subcode, size, addr); - if (subcode) + if (subcode == DIAG204_BUSY_RC) + return -EBUSY; + else if (subcode) return -EOPNOTSUPP; return size; } From 97999f8c62a43cf2af5d725b045b82f9b47d83ea Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:07 +0200 Subject: [PATCH 48/54] s390/diag: Add busy-indication-facility requirements To verify if busy indication facility is installed or not sclp bits has to be checked. Add a function that checks sclp to improve readability. Add busy-indication-request bit mask for diag204 subcodes. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/diag.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 20b94220113b..6527d7e8e010 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -12,6 +12,7 @@ #include #include #include +#include #include enum diag_stat_enum { @@ -117,6 +118,7 @@ enum diag204_sc { }; #define DIAG204_SUBCODE_MASK 0xffff +#define DIAG204_BIF_BIT 0x80000000 /* The two available diag 204 data formats */ enum diag204_format { @@ -326,6 +328,11 @@ union diag318_info { }; }; +static inline bool diag204_has_bif(void) +{ + return sclp.has_diag204_bif; +} + int diag204(unsigned long subcode, unsigned long size, void *addr); int diag224(void *ptr); int diag26c(void *req, void *resp, enum diag26c_sc subcode); From f4493954215ceb8b22aca3ee6b10c6172f20a9fc Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:08 +0200 Subject: [PATCH 49/54] s390/hypfs_diag: Diag204 busy loop When diag204 busy-indiciation facility is installed and diag204 is returning busy, hypfs diag204 handler now does an interruptable busy wait until diag204 is no longer busy. If there is a signal pending, call would be restarted with -ERESTARTSYSCALL, except for fatal signals. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/hypfs/hypfs_dbfs.c | 4 +++- arch/s390/hypfs/hypfs_diag.c | 17 ++++++++++++++--- arch/s390/include/asm/diag.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 4024599eb448..0e855c5e91c5 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -39,7 +39,9 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, return 0; df = file_inode(file)->i_private; - mutex_lock(&df->lock); + if (mutex_lock_interruptible(&df->lock)) + return -ERESTARTSYS; + data = hypfs_dbfs_data_alloc(df); if (!data) { mutex_unlock(&df->lock); diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 279b7bba4d43..26a009f9c49e 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -140,11 +140,22 @@ static int diag204_probe(void) int diag204_store(void *buf, int pages) { + unsigned long subcode; int rc; - rc = diag204((unsigned long)diag204_store_sc | - (unsigned long)diag204_get_info_type(), pages, buf); - return rc < 0 ? -EOPNOTSUPP : 0; + subcode = diag204_get_info_type(); + subcode |= diag204_store_sc; + if (diag204_has_bif()) + subcode |= DIAG204_BIF_BIT; + while (1) { + rc = diag204(subcode, pages, buf); + if (rc != -EBUSY) + break; + if (signal_pending(current)) + return -ERESTARTSYS; + schedule_timeout_interruptible(DIAG204_BUSY_WAIT); + } + return rc < 0 ? rc : 0; } struct dbfs_d204_hdr { diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 6527d7e8e010..c0d43512f4fc 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -119,6 +119,7 @@ enum diag204_sc { #define DIAG204_SUBCODE_MASK 0xffff #define DIAG204_BIF_BIT 0x80000000 +#define DIAG204_BUSY_WAIT (HZ / 10) /* The two available diag 204 data formats */ enum diag204_format { From 6fdf72c9a9f614115ac7f2889c1018bbff1af9b3 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:09 +0200 Subject: [PATCH 50/54] s390/sthyi: Move diag operations Move diag204 related operations to their own functions for better error handling and better readability. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/kernel/sthyi.c | 46 +++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 30bb20461db4..9f9bac01f5f2 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -300,9 +300,37 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, return (struct diag204_x_part_block *)&block->cpus[i]; } +static void *diag204_get_data(void) +{ + unsigned long subcode; + void *diag204_buf; + int pages, rc; + + subcode = DIAG204_SUBC_RSI; + subcode |= DIAG204_INFO_EXT; + pages = diag204(subcode, 0, NULL); + if (pages < 0) + return ERR_PTR(pages); + if (pages == 0) + return ERR_PTR(-ENODATA); + diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + subcode = DIAG204_SUBC_STIB7; + subcode |= DIAG204_INFO_EXT; + rc = diag204(subcode, pages, diag204_buf); + if (rc < 0) { + vfree(diag204_buf); + return ERR_PTR(rc); + } + return diag204_buf; +} + static void fill_diag(struct sthyi_sctns *sctns) { - int i, r, pages; + int i; bool this_lpar; void *diag204_buf; void *diag224_buf = NULL; @@ -312,22 +340,10 @@ static void fill_diag(struct sthyi_sctns *sctns) struct lpar_cpu_inf lpar_inf = {}; /* Errors are handled through the validity bits in the response. */ - pages = diag204((unsigned long)DIAG204_SUBC_RSI | - (unsigned long)DIAG204_INFO_EXT, 0, NULL); - if (pages <= 0) + diag204_buf = diag204_get_data(); + if (IS_ERR(diag204_buf)) return; - diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), - PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, - __builtin_return_address(0)); - if (!diag204_buf) - return; - - r = diag204((unsigned long)DIAG204_SUBC_STIB7 | - (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); - if (r < 0) - goto out; - diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); if (!diag224_buf || diag224(diag224_buf)) goto out; From b7a5e5dfbd68050e8582e901f397dc75451bb877 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Thu, 4 Jul 2024 14:10:10 +0200 Subject: [PATCH 51/54] s390/sthyi: Use cached data when diag is busy When sthyi is being emulated, data from diag204 is used. If diag204 returns busy, previously cached sthyi info block is returned to the caller and cache expiry is set to expired. Acked-by: Heiko Carstens Reviewed-by: Tobias Huschle Signed-off-by: Tobias Huschle Signed-off-by: Mete Durlu Signed-off-by: Vasily Gorbik --- arch/s390/kernel/sthyi.c | 55 +++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 9f9bac01f5f2..1cf2ad04f8e9 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -300,7 +300,7 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, return (struct diag204_x_part_block *)&block->cpus[i]; } -static void *diag204_get_data(void) +static void *diag204_get_data(bool diag204_allow_busy) { unsigned long subcode; void *diag204_buf; @@ -320,6 +320,8 @@ static void *diag204_get_data(void) return ERR_PTR(-ENOMEM); subcode = DIAG204_SUBC_STIB7; subcode |= DIAG204_INFO_EXT; + if (diag204_has_bif() && diag204_allow_busy) + subcode |= DIAG204_BIF_BIT; rc = diag204(subcode, pages, diag204_buf); if (rc < 0) { vfree(diag204_buf); @@ -328,22 +330,27 @@ static void *diag204_get_data(void) return diag204_buf; } -static void fill_diag(struct sthyi_sctns *sctns) +static bool is_diag204_cached(struct sthyi_sctns *sctns) +{ + /* + * Check if validity bits are set when diag204 data + * is gathered. + */ + if (sctns->par.infpval1) + return true; + return false; +} + +static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf) { int i; bool this_lpar; - void *diag204_buf; void *diag224_buf = NULL; struct diag204_x_info_blk_hdr *ti_hdr; struct diag204_x_part_block *part_block; struct diag204_x_phys_block *phys_block; struct lpar_cpu_inf lpar_inf = {}; - /* Errors are handled through the validity bits in the response. */ - diag204_buf = diag204_get_data(); - if (IS_ERR(diag204_buf)) - return; - diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); if (!diag224_buf || diag224(diag224_buf)) goto out; @@ -408,7 +415,6 @@ static void fill_diag(struct sthyi_sctns *sctns) out: free_page((unsigned long)diag224_buf); - vfree(diag204_buf); } static int sthyi(u64 vaddr, u64 *rc) @@ -430,19 +436,31 @@ static int sthyi(u64 vaddr, u64 *rc) static int fill_dst(void *dst, u64 *rc) { + void *diag204_buf; + struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; /* * If the facility is on, we don't want to emulate the instruction. * We ask the hypervisor to provide the data. */ - if (test_facility(74)) + if (test_facility(74)) { + memset(dst, 0, PAGE_SIZE); return sthyi((u64)dst, rc); - + } + /* + * When emulating, if diag204 returns BUSY don't reset dst buffer + * and use cached data. + */ + *rc = 0; + diag204_buf = diag204_get_data(is_diag204_cached(sctns)); + if (IS_ERR(diag204_buf)) + return PTR_ERR(diag204_buf); + memset(dst, 0, PAGE_SIZE); fill_hdr(sctns); fill_stsi(sctns); - fill_diag(sctns); - *rc = 0; + fill_diag(sctns, diag204_buf); + vfree(diag204_buf); return 0; } @@ -461,11 +479,14 @@ static int sthyi_update_cache(u64 *rc) { int r; - memset(sthyi_cache.info, 0, PAGE_SIZE); r = fill_dst(sthyi_cache.info, rc); - if (r) - return r; - sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; + if (r == 0) { + sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; + } else if (r == -EBUSY) { + /* mark as expired and return 0 to keep using cached data */ + sthyi_cache.end = jiffies - 1; + r = 0; + } return r; } From 723ac2d6ba77be712b165b710c60adda6e657c9e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 3 Jul 2024 17:58:59 +0200 Subject: [PATCH 52/54] s390/entry: Pass the asce as parameter to sie64a() Pass the guest ASCE explicitly as parameter, instead of having sie64a() take it from lowcore. This removes hidden state from lowcore, and makes things look cleaner. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20240703155900.103783-2-imbrenda@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kvm_host.h | 7 ++++--- arch/s390/include/asm/stacktrace.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 8 +++----- arch/s390/kvm/kvm-s390.c | 3 ++- arch/s390/kvm/vsie.c | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888f..2d4e3f50a823 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1029,11 +1029,12 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); -int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa); +int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa, + unsigned long gasce); -static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) +static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce) { - return __sie64a(virt_to_phys(sie_block), sie_block, rsa); + return __sie64a(virt_to_phys(sie_block), sie_block, rsa, gasce); } extern char sie_exit; diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 85b6738b826a..1d5ca13dc90f 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -65,6 +65,7 @@ struct stack_frame { unsigned long sie_reason; unsigned long sie_flags; unsigned long sie_control_block_phys; + unsigned long sie_guest_asce; }; }; unsigned long gprs[10]; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index f55979f64d49..26bb45d0e6f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -63,6 +63,7 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); + OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 60cf917a7122..454b6b92c7f8 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -179,6 +179,7 @@ SYM_FUNC_END(__switch_to_asm) * %r2 pointer to sie control block phys * %r3 pointer to sie control block virt * %r4 guest register save area + * %r5 guest asce */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers @@ -186,15 +187,12 @@ SYM_FUNC_START(__sie64a) stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area + stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 - lg %r14,__LC_GMAP # get gmap pointer - ltgr %r14,%r14 - jz .Lsie_gmap oi __LC_CPU_FLAGS+7,_CIF_SIE - lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce -.Lsie_gmap: + lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now tm __SIE_PROG20+3(%r14),3 # last exit... diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 50b77b759042..7697c1fc1733 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4829,7 +4829,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) sizeof(sie_page->pv_grregs)); } exit_reason = sie64a(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs); + vcpu->run->s.regs.gprs, + gmap_get_enabled()->asce); if (kvm_s390_pv_cpu_is_protected(vcpu)) { memcpy(vcpu->run->s.regs.gprs, sie_page->pv_grregs, diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c9ecae830634..97a70c2b83ee 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1150,7 +1150,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) - rc = sie64a(scb_s, vcpu->run->s.regs.gprs); + rc = sie64a(scb_s, vcpu->run->s.regs.gprs, gmap_get_enabled()->asce); barrier(); vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE; From 275d05ce0680949085622bf38a365efcdd34fe11 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 3 Jul 2024 17:59:00 +0200 Subject: [PATCH 53/54] s390/kvm: Move bitfields for dat tables Move and improve the struct definitions for DAT tables from gaccess.c to a new header. Once in a separate header, the structs become available everywhere. One possible usecase is to merge them in the s390 pte_t and p?d_t definitions, which is left as an exercise for the reader. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20240703155900.103783-3-imbrenda@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/dat-bits.h | 170 +++++++++++++++++++++++++++++++ arch/s390/kvm/gaccess.c | 163 +---------------------------- 2 files changed, 173 insertions(+), 160 deletions(-) create mode 100644 arch/s390/include/asm/dat-bits.h diff --git a/arch/s390/include/asm/dat-bits.h b/arch/s390/include/asm/dat-bits.h new file mode 100644 index 000000000000..8d65eec2f124 --- /dev/null +++ b/arch/s390/include/asm/dat-bits.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAT table and related structures + * + * Copyright IBM Corp. 2024 + * + */ + +#ifndef _S390_DAT_BITS_H +#define _S390_DAT_BITS_H + +union asce { + unsigned long val; + struct { + unsigned long rsto: 52;/* Region- or Segment-Table Origin */ + unsigned long : 2; + unsigned long g : 1; /* Subspace Group control */ + unsigned long p : 1; /* Private Space control */ + unsigned long s : 1; /* Storage-Alteration-Event control */ + unsigned long x : 1; /* Space-Switch-Event control */ + unsigned long r : 1; /* Real-Space control */ + unsigned long : 1; + unsigned long dt : 2; /* Designation-Type control */ + unsigned long tl : 2; /* Region- or Segment-Table Length */ + }; +}; + +enum { + ASCE_TYPE_SEGMENT = 0, + ASCE_TYPE_REGION3 = 1, + ASCE_TYPE_REGION2 = 2, + ASCE_TYPE_REGION1 = 3 +}; + +union region1_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Second-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Second-Table Length */ + }; +}; + +union region2_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Third-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Third-Table Length */ + }; +}; + +struct region3_table_entry_fc0 { + unsigned long sto: 52;/* Segment-Table Origin */ + unsigned long : 1; + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Segment-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Segment-Table Length */ +}; + +struct region3_table_entry_fc1 { + unsigned long rfaa: 33;/* Region-Frame Absolute Address */ + unsigned long : 14; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc : 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long iep : 1; /* Instruction-Execution-Protection */ + unsigned long : 2; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union region3_table_entry { + unsigned long val; + struct region3_table_entry_fc0 fc0; + struct region3_table_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc: 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr: 1; /* Common-Region Bit */ + unsigned long tt: 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +struct segment_table_entry_fc0 { + unsigned long pto: 53;/* Page-Table Origin */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 3; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +struct segment_table_entry_fc1 { + unsigned long sfaa: 44;/* Segment-Frame Absolute Address */ + unsigned long : 3; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc : 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long iep : 1; /* Instruction-Execution-Protection */ + unsigned long : 2; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union segment_table_entry { + unsigned long val; + struct segment_table_entry_fc0 fc0; + struct segment_table_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc: 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs: 1; /* Common-Segment Bit */ + unsigned long tt: 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +union page_table_entry { + unsigned long val; + struct { + unsigned long pfra: 52;/* Page-Frame Real Address */ + unsigned long z : 1; /* Zero Bit */ + unsigned long i : 1; /* Page-Invalid Bit */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long iep : 1; /* Instruction-Execution-Protection */ + unsigned long : 8; + }; +}; + +enum { + TABLE_TYPE_SEGMENT = 0, + TABLE_TYPE_REGION3 = 1, + TABLE_TYPE_REGION2 = 2, + TABLE_TYPE_REGION1 = 3 +}; + +#endif /* _S390_DAT_BITS_H */ diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 5bf3d94e9dda..e65f597e3044 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -14,167 +14,10 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" -union asce { - unsigned long val; - struct { - unsigned long origin : 52; /* Region- or Segment-Table Origin */ - unsigned long : 2; - unsigned long g : 1; /* Subspace Group Control */ - unsigned long p : 1; /* Private Space Control */ - unsigned long s : 1; /* Storage-Alteration-Event Control */ - unsigned long x : 1; /* Space-Switch-Event Control */ - unsigned long r : 1; /* Real-Space Control */ - unsigned long : 1; - unsigned long dt : 2; /* Designation-Type Control */ - unsigned long tl : 2; /* Region- or Segment-Table Length */ - }; -}; - -enum { - ASCE_TYPE_SEGMENT = 0, - ASCE_TYPE_REGION3 = 1, - ASCE_TYPE_REGION2 = 2, - ASCE_TYPE_REGION1 = 3 -}; - -union region1_table_entry { - unsigned long val; - struct { - unsigned long rto: 52;/* Region-Table Origin */ - unsigned long : 2; - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Region-Second-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long : 1; - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Region-Second-Table Length */ - }; -}; - -union region2_table_entry { - unsigned long val; - struct { - unsigned long rto: 52;/* Region-Table Origin */ - unsigned long : 2; - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Region-Third-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long : 1; - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Region-Third-Table Length */ - }; -}; - -struct region3_table_entry_fc0 { - unsigned long sto: 52;/* Segment-Table Origin */ - unsigned long : 1; - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Segment-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Segment-Table Length */ -}; - -struct region3_table_entry_fc1 { - unsigned long rfaa : 33; /* Region-Frame Absolute Address */ - unsigned long : 14; - unsigned long av : 1; /* ACCF-Validity Control */ - unsigned long acc: 4; /* Access-Control Bits */ - unsigned long f : 1; /* Fetch-Protection Bit */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 2; - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -union region3_table_entry { - unsigned long val; - struct region3_table_entry_fc0 fc0; - struct region3_table_entry_fc1 fc1; - struct { - unsigned long : 53; - unsigned long fc : 1; /* Format-Control */ - unsigned long : 4; - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; - }; -}; - -struct segment_entry_fc0 { - unsigned long pto: 53;/* Page-Table Origin */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 3; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -struct segment_entry_fc1 { - unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ - unsigned long : 3; - unsigned long av : 1; /* ACCF-Validity Control */ - unsigned long acc: 4; /* Access-Control Bits */ - unsigned long f : 1; /* Fetch-Protection Bit */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 2; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -union segment_table_entry { - unsigned long val; - struct segment_entry_fc0 fc0; - struct segment_entry_fc1 fc1; - struct { - unsigned long : 53; - unsigned long fc : 1; /* Format-Control */ - unsigned long : 4; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; - }; -}; - -enum { - TABLE_TYPE_SEGMENT = 0, - TABLE_TYPE_REGION3 = 1, - TABLE_TYPE_REGION2 = 2, - TABLE_TYPE_REGION1 = 3 -}; - -union page_table_entry { - unsigned long val; - struct { - unsigned long pfra : 52; /* Page-Frame Real Address */ - unsigned long z : 1; /* Zero Bit */ - unsigned long i : 1; /* Page-Invalid Bit */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 8; - }; -}; - /* * vaddress union in order to easily decode a virtual address into its * region first index, region second index etc. parts. @@ -632,7 +475,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); if (asce.r) goto real_address; - ptr = asce.origin * PAGE_SIZE; + ptr = asce.rsto * PAGE_SIZE; switch (asce.dt) { case ASCE_TYPE_REGION1: if (vaddr.rfx01 > asce.tl) @@ -1379,7 +1222,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; - ptr = asce.origin * PAGE_SIZE; + ptr = asce.rsto * PAGE_SIZE; if (asce.r) { *fake = 1; ptr = 0; From df39038cd89525d465c2c8827eb64116873f141a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 15 Jul 2024 20:04:16 +0200 Subject: [PATCH 54/54] s390/mm: Fix VM_FAULT_HWPOISON handling in do_exception() There is no support for HWPOISON, MEMORY_FAILURE, or ARCH_HAS_COPY_MC on s390. Therefore we do not expect to see VM_FAULT_HWPOISON in do_exception(). However, since commit af19487f00f3 ("mm: make PTE_MARKER_SWAPIN_ERROR more general"), it is possible to see VM_FAULT_HWPOISON in combination with PTE_MARKER_POISONED, even on architectures that do not support HWPOISON otherwise. In this case, we will end up on the BUG() in do_exception(). Fix this by treating VM_FAULT_HWPOISON the same as VM_FAULT_SIGBUS, similar to x86 when MEMORY_FAILURE is not configured. Also print unexpected fault flags, for easier debugging. Note that VM_FAULT_HWPOISON_LARGE is not expected, because s390 cannot support swap entries on other levels than PTE level. Cc: stable@vger.kernel.org # 6.6+ Fixes: af19487f00f3 ("mm: make PTE_MARKER_SWAPIN_ERROR more general") Reported-by: Yunseong Kim Tested-by: Yunseong Kim Acked-by: Alexander Gordeev Signed-off-by: Gerald Schaefer Message-ID: <20240715180416.3632453-1-gerald.schaefer@linux.ibm.com> Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6b19a33c49c2..8e149ef5e89b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -433,12 +433,13 @@ static void do_exception(struct pt_regs *regs, int access) handle_fault_error_nolock(regs, 0); else do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); else do_sigbus(regs); } else { + pr_emerg("Unexpected fault flags: %08x\n", fault); BUG(); } }