From eaa7995c529b54d68d97a30f6344cc6ca2f214a7 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 7 Jan 2021 17:16:02 -0800 Subject: [PATCH 01/25] regulator: core: avoid regulator_resolve_supply() race condition The final step in regulator_register() is to call regulator_resolve_supply() for each registered regulator (including the one in the process of being registered). The regulator_resolve_supply() function first checks if rdev->supply is NULL, then it performs various steps to try to find the supply. If successful, rdev->supply is set inside of set_supply(). This procedure can encounter a race condition if two concurrent tasks call regulator_register() near to each other on separate CPUs and one of the regulators has rdev->supply_name specified. There is currently nothing guaranteeing atomicity between the rdev->supply check and set steps. Thus, both tasks can observe rdev->supply==NULL in their regulator_resolve_supply() calls. This then results in both creating a struct regulator for the supply. One ends up actually stored in rdev->supply and the other is lost (though still present in the supply's consumer_list). Here is a kernel log snippet showing the issue: [ 12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level' already present! Avoid this race condition by holding the rdev->mutex lock inside of regulator_resolve_supply() while checking and setting rdev->supply. Signed-off-by: David Collins Link: https://lore.kernel.org/r/1610068562-4410-1-git-send-email-collinsd@codeaurora.org Signed-off-by: Mark Brown --- drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ca03d8e70bd1..cfc3bc9df93a 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1813,23 +1813,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) { struct regulator_dev *r; struct device *dev = rdev->dev.parent; - int ret; + int ret = 0; /* No supply to resolve? */ if (!rdev->supply_name) return 0; - /* Supply already resolved? */ + /* Supply already resolved? (fast-path without locking contention) */ if (rdev->supply) return 0; + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) + goto out; + r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); /* Did the lookup explicitly defer for us? */ if (ret == -EPROBE_DEFER) - return ret; + goto out; if (have_full_constraints()) { r = dummy_regulator_rdev; @@ -1837,15 +1848,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } else { dev_err(dev, "Failed to resolve %s-supply for %s\n", rdev->supply_name, rdev->desc->name); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); - if (!have_full_constraints()) - return -EINVAL; + if (!have_full_constraints()) { + ret = -EINVAL; + goto out; + } r = dummy_regulator_rdev; get_device(&r->dev); } @@ -1859,7 +1873,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (r->dev.parent && r->dev.parent != rdev->dev.parent) { if (!device_is_bound(r->dev.parent)) { put_device(&r->dev); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } @@ -1867,13 +1882,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) ret = regulator_resolve_supply(r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } ret = set_supply(rdev, r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } /* @@ -1886,11 +1901,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; - return ret; + goto out; } } - return 0; +out: + regulator_unlock(rdev); + return ret; } /* Internal regulator request function */ From 11663111cd49b4c6dd27479774e420f139e4c447 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Jan 2021 17:22:27 +0000 Subject: [PATCH 02/25] KVM: arm64: Hide PMU registers from userspace when not available It appears that while we are now able to properly hide PMU registers from the guest when a PMU isn't available (either because none has been configured, the host doesn't have the PMU support compiled in, or that the HW doesn't have one at all), we are still exposing more than we should to userspace. Introduce a visibility callback gating all the PMU registers, which covers both usrespace and guest. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 68 +++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 42ccc27fb684..45f4ae71c8dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -590,6 +590,15 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); } +static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return 0; + + return REG_HIDDEN; +} + static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; @@ -936,15 +945,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } +#define PMU_SYS_REG(r) \ + SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility + /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ - { SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \ - access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } + { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ + .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ - { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ - access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } + { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \ + .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), } static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1486,8 +1498,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, - { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, - { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, + { PMU_SYS_REG(SYS_PMINTENSET_EL1), + .access = access_pminten, .reg = PMINTENSET_EL1 }, + { PMU_SYS_REG(SYS_PMINTENCLR_EL1), + .access = access_pminten, .reg = PMINTENSET_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1526,23 +1540,36 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, - { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, - { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, - { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, - { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, - { SYS_DESC(SYS_PMCEID1_EL0), access_pmceid }, - { SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, - { SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper }, - { SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr }, + { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr, + .reset = reset_pmcr, .reg = PMCR_EL0 }, + { PMU_SYS_REG(SYS_PMCNTENSET_EL0), + .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, + { PMU_SYS_REG(SYS_PMCNTENCLR_EL0), + .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, + { PMU_SYS_REG(SYS_PMOVSCLR_EL0), + .access = access_pmovs, .reg = PMOVSSET_EL0 }, + { PMU_SYS_REG(SYS_PMSWINC_EL0), + .access = access_pmswinc, .reg = PMSWINC_EL0 }, + { PMU_SYS_REG(SYS_PMSELR_EL0), + .access = access_pmselr, .reg = PMSELR_EL0 }, + { PMU_SYS_REG(SYS_PMCEID0_EL0), + .access = access_pmceid, .reset = NULL }, + { PMU_SYS_REG(SYS_PMCEID1_EL0), + .access = access_pmceid, .reset = NULL }, + { PMU_SYS_REG(SYS_PMCCNTR_EL0), + .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 }, + { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), + .access = access_pmu_evtyper, .reset = NULL }, + { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), + .access = access_pmu_evcntr, .reset = NULL }, /* * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, - { SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, + { PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr, + .reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 }, + { PMU_SYS_REG(SYS_PMOVSSET_EL0), + .access = access_pmovs, .reg = PMOVSSET_EL0 }, { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, @@ -1694,7 +1721,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, + { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper, + .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, From 7ded92e25cac9758a755b8f524b11b509c49afe1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Jan 2021 17:22:28 +0000 Subject: [PATCH 03/25] KVM: arm64: Simplify handling of absent PMU system registers Now that all PMU registers are gated behind a .visibility callback, remove the other checks against an absent PMU. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 45f4ae71c8dc..93f0a4a0789a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -622,9 +622,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = kvm_vcpu_has_pmu(vcpu); + bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); - enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -909,11 +908,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_vcpu_has_pmu(vcpu)) { - kvm_inject_undefined(vcpu); - return false; - } - if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { kvm_inject_undefined(vcpu); From 2c91ef39216149df6703c3fa6a47dd9a1e6091c1 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 29 Dec 2020 16:00:59 +0000 Subject: [PATCH 04/25] KVM: arm64: Allow PSCI SYSTEM_OFF/RESET to return The KVM/arm64 PSCI relay assumes that SYSTEM_OFF and SYSTEM_RESET should not return, as dictated by the PSCI spec. However, there is firmware out there which breaks this assumption, leading to a hyp panic. Make KVM more robust to broken firmware by allowing these to return. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201229160059.64135-1-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index e3947846ffcb..8e7128cb7667 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -77,12 +77,6 @@ static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3)); } -static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt) -{ - psci_forward(host_ctxt); - hyp_panic(); /* unreachable */ -} - static unsigned int find_cpu_id(u64 mpidr) { unsigned int i; @@ -251,10 +245,13 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: return psci_forward(host_ctxt); + /* + * SYSTEM_OFF/RESET should not return according to the spec. + * Allow it so as to stay robust to broken firmware. + */ case PSCI_0_2_FN_SYSTEM_OFF: case PSCI_0_2_FN_SYSTEM_RESET: - psci_forward_noreturn(host_ctxt); - unreachable(); + return psci_forward(host_ctxt); case PSCI_0_2_FN64_CPU_SUSPEND: return psci_cpu_suspend(func_id, host_ctxt); case PSCI_0_2_FN64_CPU_ON: From 7ba8b4380afbdbb29d53c50bee6563cd7457fc34 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Wed, 6 Jan 2021 14:42:18 +0000 Subject: [PATCH 05/25] KVM: arm64: Use the reg_to_encoding() macro instead of sys_reg() The reg_to_encoding() macro is a wrapper over sys_reg() and conveniently takes a sys_reg_desc or a sys_reg_params argument and returns the 32 bit register encoding. Use it instead of calling sys_reg() directly. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210106144218.110665-1-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 93f0a4a0789a..7c4f79532406 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -43,6 +43,10 @@ * 64bit interface. */ +#define reg_to_encoding(x) \ + sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) + static bool read_from_write_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) @@ -273,8 +277,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - u32 sr = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 sr = reg_to_encoding(r); if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) { kvm_inject_undefined(vcpu); @@ -924,10 +927,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } -#define reg_to_encoding(x) \ - sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) - /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ @@ -1026,8 +1025,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) { - u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 id = reg_to_encoding(r); u64 val = raz ? 0 : read_sanitised_ftr_reg(id); if (id == SYS_ID_AA64PFR0_EL1) { @@ -1068,8 +1066,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, static unsigned int id_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 id = reg_to_encoding(r); switch (id) { case SYS_ID_AA64ZFR0_EL1: From 55a8b42e8645a6dab88674a30cb6ed328e660680 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 00:26:35 -0800 Subject: [PATCH 06/25] spi: altera: Fix memory leak on error path Release master that have been previously allocated if the number of chipselect is invalid. Fixes: 8e04187c1bc7 ("spi: altera: add SPI core parameters support via platform data.") Signed-off-by: Pan Bian Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/20210120082635.49304-1-bianpan2016@163.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c index cbc4c28c1541..62ea0c9e321b 100644 --- a/drivers/spi/spi-altera.c +++ b/drivers/spi/spi-altera.c @@ -254,7 +254,8 @@ static int altera_spi_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Invalid number of chipselect: %hu\n", pdata->num_chipselect); - return -EINVAL; + err = -EINVAL; + goto exit; } master->num_chipselect = pdata->num_chipselect; From e1663372d5ffaa3fc79b7932878c5c860f735412 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 8 Jan 2021 16:12:54 +0000 Subject: [PATCH 07/25] KVM: arm64: Compute TPIDR_EL2 ignoring MTE tag KASAN in HW_TAGS mode will store MTE tags in the top byte of the pointer. When computing the offset for TPIDR_EL2 we don't want anything in the top byte, so remove the tag to ensure the computation is correct no matter what the tag. Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS") Signed-off-by: Steven Price [maz: added comment] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210108161254.53674-1-steven.price@arm.com --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 04c44853b103..fe60d25c000e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1396,8 +1396,9 @@ static void cpu_init_hyp_mode(void) * Calculate the raw per-cpu offset without a translation from the * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. + * Also drop the KASAN tag which gets in the way... */ - params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); params->mair_el2 = read_sysreg(mair_el1); From 9529aaa056edc76b3a41df616c71117ebe11e049 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jan 2021 10:56:36 +0000 Subject: [PATCH 08/25] KVM: arm64: Filter out v8.1+ events on v8.0 HW When running on v8.0 HW, make sure we don't try to advertise events in the 0x4000-0x403f range. Cc: stable@vger.kernel.org Fixes: 88865beca9062 ("KVM: arm64: Mask out filtered events in PCMEID{0,1}_EL1") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210121105636.1478491-1-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4ad66a532e38..247422ac78a9 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -788,7 +788,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) { unsigned long *bmap = vcpu->kvm->arch.pmu_filter; u64 val, mask = 0; - int base, i; + int base, i, nr_events; if (!pmceid1) { val = read_sysreg(pmceid0_el0); @@ -801,13 +801,17 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!bmap) return val; + nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + for (i = 0; i < 32; i += 8) { u64 byte; byte = bitmap_get_value8(bmap, base + i); mask |= byte << i; - byte = bitmap_get_value8(bmap, 0x4000 + base + i); - mask |= byte << (32 + i); + if (nr_events >= (0x4000 + base + 32)) { + byte = bitmap_get_value8(bmap, 0x4000 + base + i); + mask |= byte << (32 + i); + } } return val & mask; From 51dfb6ca3728bd0a0a3c23776a12d2a15a1d2457 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 20 Jan 2021 23:58:44 +0300 Subject: [PATCH 09/25] regulator: consumer: Add missing stubs to regulator/consumer.h Add missing stubs to regulator/consumer.h in order to fix COMPILE_TEST of the kernel. In particular this should fix compile-testing of OPP core because of a missing stub for regulator_sync_voltage(). Reported-by: kernel test robot Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210120205844.12658-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2024944fd2f7..20e84a84fb77 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -331,6 +331,12 @@ regulator_get_exclusive(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check +devm_regulator_get_exclusive(struct device *dev, const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id) { @@ -486,6 +492,11 @@ static inline int regulator_get_voltage(struct regulator *regulator) return -EINVAL; } +static inline int regulator_sync_voltage(struct regulator *regulator) +{ + return -EINVAL; +} + static inline int regulator_is_supported_voltage(struct regulator *regulator, int min_uV, int max_uV) { @@ -578,6 +589,25 @@ static inline int devm_regulator_unregister_notifier(struct regulator *regulator return 0; } +static inline int regulator_suspend_enable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_suspend_disable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_set_suspend_voltage(struct regulator *regulator, + int min_uV, int max_uV, + suspend_state_t state) +{ + return -EINVAL; +} + static inline void *regulator_get_drvdata(struct regulator *regulator) { return NULL; From 139bc8a6146d92822c866cf2fd410159c56b3648 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jan 2021 12:08:15 +0000 Subject: [PATCH 10/25] KVM: Forbid the use of tagged userspace addresses for memslots The use of a tagged address could be pretty confusing for the whole memslot infrastructure as well as the MMU notifiers. Forbid it altogether, as it never quite worked the first place. Cc: stable@vger.kernel.org Reported-by: Rick Edgecombe Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 3 +++ virt/kvm/kvm_main.c | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4e5316ed10e9..c347b7083abf 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1269,6 +1269,9 @@ field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including anonymous memory, ordinary files, and hugetlbfs. +On architectures that support a form of address tagging, userspace_addr must +be an untagged address. + It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr be identical. This allows large pages in the guest to be backed by large pages in the host. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c4..a9abaf5f8e53 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1290,6 +1290,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((mem->userspace_addr & (PAGE_SIZE - 1)) || + (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size)) return -EINVAL; From 14a71d509ac809dcf56d7e3ca376b15d17bd0ddd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Jan 2021 13:20:42 +0000 Subject: [PATCH 11/25] regulator: Fix lockdep warning resolving supplies With commit eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) we started holding the rdev lock while resolving supplies, an operation that requires holding the regulator_list_mutex. This results in lockdep warnings since in other places we take the list mutex then the mutex on an individual rdev. Since the goal is to make sure that we don't call set_supply() twice rather than a concern about the cost of resolution pull the rdev lock and check for duplicate resolution down to immediately before we do the set_supply() and drop it again once the allocation is done. Fixes: eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210122132042.10306-1-broonie@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/core.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cfc3bc9df93a..67a768fe5b2a 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1823,17 +1823,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (rdev->supply) return 0; - /* - * Recheck rdev->supply with rdev->mutex lock held to avoid a race - * between rdev->supply null check and setting rdev->supply in - * set_supply() from concurrent tasks. - */ - regulator_lock(rdev); - - /* Supply just resolved by a concurrent task? */ - if (rdev->supply) - goto out; - r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); @@ -1885,12 +1874,29 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) goto out; } - ret = set_supply(rdev, r); - if (ret < 0) { + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) { + regulator_unlock(rdev); put_device(&r->dev); goto out; } + ret = set_supply(rdev, r); + if (ret < 0) { + regulator_unlock(rdev); + put_device(&r->dev); + goto out; + } + + regulator_unlock(rdev); + /* * In set_machine_constraints() we may have turned this regulator on * but we couldn't propagate to the supply if it hadn't been resolved @@ -1906,7 +1912,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } out: - regulator_unlock(rdev); return ret; } From 396cf2a46adddbf51373e16225c1d25254310046 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Thu, 21 Jan 2021 15:12:36 -0800 Subject: [PATCH 12/25] spidev: Add cisco device compatible Add compatible string for Cisco device present on the Cisco Petra platform. Signed-off-by: Daniel Walker Cc: xe-linux-external@cisco.com Link: https://lore.kernel.org/r/20210121231237.30664-2-danielwa@cisco.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 859910ec8d9f..8cb4d923aeaa 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -682,6 +682,7 @@ static const struct of_device_id spidev_dt_ids[] = { { .compatible = "lwn,bk4" }, { .compatible = "dh,dhcom-board" }, { .compatible = "menlo,m53cpld" }, + { .compatible = "cisco,spi-petra" }, {}, }; MODULE_DEVICE_TABLE(of, spidev_dt_ids); From a10f373ad3c760dd40b41e2f69a800ee7b8da15e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 8 Jan 2021 16:53:49 +0000 Subject: [PATCH 13/25] KVM: Documentation: Fix spec for KVM_CAP_ENABLE_CAP_VM The documentation classifies KVM_ENABLE_CAP with KVM_CAP_ENABLE_CAP_VM as a vcpu ioctl, which is incorrect. Fix it by specifying it as a VM ioctl. Fixes: e5d83c74a580 ("kvm: make KVM_CAP_ENABLE_CAP_VM architecture agnostic") Signed-off-by: Quentin Perret Message-Id: <20210108165349.747359-1-qperret@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a9bf7f2ab76f..40b943a9bd1d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1336,7 +1336,7 @@ documentation when it pops into existence). :Capability: KVM_CAP_ENABLE_CAP_VM :Architectures: all -:Type: vcpu ioctl +:Type: vm ioctl :Parameters: struct kvm_enable_cap (in) :Returns: 0 on success; -1 on error From eb79cd00ce25974c21f34f1eeb92a580ff572971 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Jan 2021 12:45:15 -0800 Subject: [PATCH 14/25] KVM: x86: Add more protection against undefined behavior in rsvd_bits() Add compile-time asserts in rsvd_bits() to guard against KVM passing in garbage hardcoded values, and cap the upper bound at '63' for dynamic values to prevent generating a mask that would overflow a u64. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20210113204515.3473079-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 581925e476d6..261be1d2032b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -44,8 +44,15 @@ #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 -static inline u64 rsvd_bits(int s, int e) +static __always_inline u64 rsvd_bits(int s, int e) { + BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s); + + if (__builtin_constant_p(e)) + BUILD_BUG_ON(e > 63); + else + e &= 63; + if (e < s) return 0; From e61ab2a320c3dfd6209efe18a575979e07470597 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 18 Jan 2021 10:58:00 +0800 Subject: [PATCH 15/25] KVM: x86/pmu: Fix UBSAN shift-out-of-bounds warning in intel_pmu_refresh() Since we know vPMU will not work properly when (1) the guest bit_width(s) of the [gp|fixed] counters are greater than the host ones, or (2) guest requested architectural events exceeds the range supported by the host, so we can setup a smaller left shift value and refresh the guest cpuid entry, thus fixing the following UBSAN shift-out-of-bounds warning: shift exponent 197 is too large for 64-bit type 'long long unsigned int' Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 intel_pmu_refresh.cold+0x75/0x99 arch/x86/kvm/vmx/pmu_intel.c:348 kvm_vcpu_after_set_cpuid+0x65a/0xf80 arch/x86/kvm/cpuid.c:177 kvm_vcpu_ioctl_set_cpuid2+0x160/0x440 arch/x86/kvm/cpuid.c:308 kvm_arch_vcpu_ioctl+0x11b6/0x2d70 arch/x86/kvm/x86.c:4709 kvm_vcpu_ioctl+0x7b9/0xdb0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3386 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+ae488dc136a4cc6ba32b@syzkaller.appspotmail.com Signed-off-by: Like Xu Message-Id: <20210118025800.34620-1-like.xu@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a886a47daebd..d1584ae6625a 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, x86_pmu.num_counters_gp); + eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; + eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len); pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, x86_pmu.num_counters_fixed); + edx.split.bit_width_fixed = min_t(int, + edx.split.bit_width_fixed, x86_pmu.bit_width_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } From 98dd2f108e448988d91e296173e773b06fb978b8 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 30 Dec 2020 16:19:16 +0800 Subject: [PATCH 16/25] KVM: x86/pmu: Fix HW_REF_CPU_CYCLES event pseudo-encoding in intel_arch_events[] The HW_REF_CPU_CYCLES event on the fixed counter 2 is pseudo-encoded as 0x0300 in the intel_perfmon_event_map[]. Correct its usage. Fixes: 62079d8a4312 ("KVM: PMU: add proper support for fixed counter 2") Signed-off-by: Like Xu Message-Id: <20201230081916.63417-1-like.xu@linux.intel.com> Reviewed-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index d1584ae6625a..cdf5f34518f4 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = { [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, - [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, + [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES }, }; /* mapping between fixed pmc index and intel_arch_events array */ From 1f7becf1b7e21794fc9d460765fe09679bc9b9e0 Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Mon, 18 Jan 2021 16:47:20 +0800 Subject: [PATCH 17/25] KVM: x86: get smi pending status correctly The injection process of smi has two steps: Qemu KVM Step1: cpu->interrupt_request &= \ ~CPU_INTERRUPT_SMI; kvm_vcpu_ioctl(cpu, KVM_SMI) call kvm_vcpu_ioctl_smi() and kvm_make_request(KVM_REQ_SMI, vcpu); Step2: kvm_vcpu_ioctl(cpu, KVM_RUN, 0) call process_smi() if kvm_check_request(KVM_REQ_SMI, vcpu) is true, mark vcpu->arch.smi_pending = true; The vcpu->arch.smi_pending will be set true in step2, unfortunately if vcpu paused between step1 and step2, the kvm_run->immediate_exit will be set and vcpu has to exit to Qemu immediately during step2 before mark vcpu->arch.smi_pending true. During VM migration, Qemu will get the smi pending status from KVM using KVM_GET_VCPU_EVENTS ioctl at the downtime, then the smi pending status will be lost. Signed-off-by: Jay Zhou Signed-off-by: Shengen Zhuang Message-Id: <20210118084720.1585-1-jianjay.zhou@huawei.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a8969a6dd06..9025c7673af6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void process_smi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); @@ -4230,6 +4231,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, { process_nmi(vcpu); + if (kvm_check_request(KVM_REQ_SMI, vcpu)) + process_smi(vcpu); + /* * In guest mode, payload delivery should be deferred, * so that the L1 hypervisor can intercept #PF before From 01ead84ccd23afadebe66aea0eda002ac29ca9be Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 8 Dec 2020 12:34:39 +0800 Subject: [PATCH 18/25] KVM: Documentation: Update description of KVM_{GET,CLEAR}_DIRTY_LOG Update various words, including the wrong parameter name and the vague description of the usage of "slot" field. Signed-off-by: Zenghui Yu Message-Id: <20201208043439.895-1-yuzenghui@huawei.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 40b943a9bd1d..99ceb978c8b0 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -360,10 +360,9 @@ since the last call to this ioctl. Bit 0 is the first page in the memory slot. Ensure the entire structure is cleared to avoid padding issues. -If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies -the address space for which you want to return the dirty bitmap. -They must be less than the value that KVM_CHECK_EXTENSION returns for -the KVM_CAP_MULTI_ADDRESS_SPACE capability. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies +the address space for which you want to return the dirty bitmap. See +KVM_SET_USER_MEMORY_REGION for details on the usage of slot field. The bits in the dirty bitmap are cleared before the ioctl returns, unless KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information, @@ -4435,7 +4434,7 @@ to I/O ports. :Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 :Architectures: x86, arm, arm64, mips :Type: vm ioctl -:Parameters: struct kvm_dirty_log (in) +:Parameters: struct kvm_clear_dirty_log (in) :Returns: 0 on success, -1 on error :: @@ -4462,10 +4461,9 @@ in KVM's dirty bitmap, and dirty tracking is re-enabled for that page (for example via write-protection, or by clearing the dirty bit in a page table entry). -If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies -the address space for which you want to return the dirty bitmap. -They must be less than the value that KVM_CHECK_EXTENSION returns for -the KVM_CAP_MULTI_ADDRESS_SPACE capability. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies +the address space for which you want to clear the dirty status. See +KVM_SET_USER_MEMORY_REGION for details on the usage of slot field. This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled; for more information, see the description of the capability. From d95df9510679757bdfc22376d351cdf367b3a604 Mon Sep 17 00:00:00 2001 From: Lorenzo Brescia Date: Wed, 23 Dec 2020 14:45:07 +0000 Subject: [PATCH 19/25] kvm: tracing: Fix unmatched kvm_entry and kvm_exit events On VMX, if we exit and then re-enter immediately without leaving the vmx_vcpu_run() function, the kvm_entry event is not logged. That means we will see one (or more) kvm_exit, without its (their) corresponding kvm_entry, as shown here: CPU-1979 [002] 89.871187: kvm_entry: vcpu 1 CPU-1979 [002] 89.871218: kvm_exit: reason MSR_WRITE CPU-1979 [002] 89.871259: kvm_exit: reason MSR_WRITE It also seems possible for a kvm_entry event to be logged, but then we leave vmx_vcpu_run() right away (if vmx->emulation_required is true). In this case, we will have a spurious kvm_entry event in the trace. Fix these situations by moving trace_kvm_entry() inside vmx_vcpu_run() (where trace_kvm_exit() already is). A trace obtained with this patch applied looks like this: CPU-14295 [000] 8388.395387: kvm_entry: vcpu 0 CPU-14295 [000] 8388.395392: kvm_exit: reason MSR_WRITE CPU-14295 [000] 8388.395393: kvm_entry: vcpu 0 CPU-14295 [000] 8388.395503: kvm_exit: reason EXTERNAL_INTERRUPT Of course, not calling trace_kvm_entry() in common x86 code any longer means that we need to adjust the SVM side of things too. Signed-off-by: Lorenzo Brescia Signed-off-by: Dario Faggioli Message-Id: <160873470698.11652.13483635328769030605.stgit@Wayrath> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7ef171790d02..f923e14e87df 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3739,6 +3739,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + trace_kvm_entry(vcpu); + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2af05d3b0590..cc60b1fc3ee7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6653,6 +6653,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return EXIT_FASTPATH_NONE; + trace_kvm_entry(vcpu); + if (vmx->ple_window_dirty) { vmx->ple_window_dirty = false; vmcs_write32(PLE_WINDOW, vmx->ple_window); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9025c7673af6..1f64e8b97605 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8992,8 +8992,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.request_immediate_exit(vcpu); } - trace_kvm_entry(vcpu); - fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_return(); @@ -11560,6 +11558,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); From d51e1d3f6b4236e0352407d8a63f5c5f71ce193d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 14 Jan 2021 22:54:47 +0200 Subject: [PATCH 20/25] KVM: nVMX: Sync unsync'd vmcs02 state to vmcs12 on migration Even when we are outside the nested guest, some vmcs02 fields may not be in sync vs vmcs12. This is intentional, even across nested VM-exit, because the sync can be delayed until the nested hypervisor performs a VMCLEAR or a VMREAD/VMWRITE that affects those rarely accessed fields. However, during KVM_GET_NESTED_STATE, the vmcs12 has to be up to date to be able to restore it. To fix that, call copy_vmcs02_to_vmcs12_rare() before the vmcs12 contents are copied to userspace. Fixes: 7952d769c29ca ("KVM: nVMX: Sync rarely accessed guest fields only when needed") Reviewed-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210114205449.8715-2-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0fbb46990dfc..776688f9d101 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6077,11 +6077,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { - if (vmx->nested.hv_evmcs) - copy_enlightened_to_vmcs12(vmx); - else if (enable_shadow_vmcs) - copy_shadow_to_vmcs12(vmx); + } else { + copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); + if (!vmx->nested.need_vmcs12_to_shadow_sync) { + if (vmx->nested.hv_evmcs) + copy_enlightened_to_vmcs12(vmx); + else if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + } } BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); From 250091409a4ac567581c1f929eb39139b57b56ec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 Jan 2021 15:50:47 -0800 Subject: [PATCH 21/25] KVM: SVM: Unconditionally sync GPRs to GHCB on VMRUN of SEV-ES guest Drop the per-GPR dirty checks when synchronizing GPRs to the GHCB, the GRPs' dirty bits are set from time zero and never cleared, i.e. will always be seen as dirty. The obvious alternative would be to clear the dirty bits when appropriate, but removing the dirty checks is desirable as it allows reverting GPR dirty+available tracking, which adds overhead to all flavors of x86 VMs. Note, unconditionally writing the GPRs in the GHCB is tacitly allowed by the GHCB spec, which allows the hypervisor (or guest) to provide unnecessary info; it's the guest's responsibility to consume only what it needs (the hypervisor is untrusted after all). The guest and hypervisor can supply additional state if desired but must not rely on that additional state being provided. Cc: Brijesh Singh Cc: Tom Lendacky Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Signed-off-by: Sean Christopherson Message-Id: <20210122235049.3107620-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c8ffdbc81709..ac652bc476ae 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1415,16 +1415,13 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) * to be returned: * GPRs RAX, RBX, RCX, RDX * - * Copy their values to the GHCB if they are dirty. + * Copy their values, even if they may not have been written during the + * VM-Exit. It's the guest's responsibility to not consume random data. */ - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX)) - ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX)) - ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX)) - ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX)) - ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); + ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); + ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); + ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); + ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); } static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) From aed89418de9a881419516fa0a5643577f521efc9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 Jan 2021 15:50:48 -0800 Subject: [PATCH 22/25] KVM: x86: Revert "KVM: x86: Mark GPRs dirty when written" Revert the dirty/available tracking of GPRs now that KVM copies the GPRs to the GHCB on any post-VMGEXIT VMRUN, even if a GPR is not dirty. Per commit de3cd117ed2f ("KVM: x86: Omit caching logic for always-available GPRs"), tracking for GPRs noticeably impacts KVM's code footprint. This reverts commit 1c04d8c986567c27c56c05205dceadc92efb14ff. Signed-off-by: Sean Christopherson Message-Id: <20210122235049.3107620-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 57 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f15bc16de07c..a889563ad02d 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,6 +9,34 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ +static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ +{ \ + return vcpu->arch.regs[VCPU_REGS_##uname]; \ +} \ +static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ + unsigned long val) \ +{ \ + vcpu->arch.regs[VCPU_REGS_##uname] = val; \ +} +BUILD_KVM_GPR_ACCESSORS(rax, RAX) +BUILD_KVM_GPR_ACCESSORS(rbx, RBX) +BUILD_KVM_GPR_ACCESSORS(rcx, RCX) +BUILD_KVM_GPR_ACCESSORS(rdx, RDX) +BUILD_KVM_GPR_ACCESSORS(rbp, RBP) +BUILD_KVM_GPR_ACCESSORS(rsi, RSI) +BUILD_KVM_GPR_ACCESSORS(rdi, RDI) +#ifdef CONFIG_X86_64 +BUILD_KVM_GPR_ACCESSORS(r8, R8) +BUILD_KVM_GPR_ACCESSORS(r9, R9) +BUILD_KVM_GPR_ACCESSORS(r10, R10) +BUILD_KVM_GPR_ACCESSORS(r11, R11) +BUILD_KVM_GPR_ACCESSORS(r12, R12) +BUILD_KVM_GPR_ACCESSORS(r13, R13) +BUILD_KVM_GPR_ACCESSORS(r14, R14) +BUILD_KVM_GPR_ACCESSORS(r15, R15) +#endif + static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { @@ -34,35 +62,6 @@ static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } -#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ -static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ -{ \ - return vcpu->arch.regs[VCPU_REGS_##uname]; \ -} \ -static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ - unsigned long val) \ -{ \ - vcpu->arch.regs[VCPU_REGS_##uname] = val; \ - kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \ -} -BUILD_KVM_GPR_ACCESSORS(rax, RAX) -BUILD_KVM_GPR_ACCESSORS(rbx, RBX) -BUILD_KVM_GPR_ACCESSORS(rcx, RCX) -BUILD_KVM_GPR_ACCESSORS(rdx, RDX) -BUILD_KVM_GPR_ACCESSORS(rbp, RBP) -BUILD_KVM_GPR_ACCESSORS(rsi, RSI) -BUILD_KVM_GPR_ACCESSORS(rdi, RDI) -#ifdef CONFIG_X86_64 -BUILD_KVM_GPR_ACCESSORS(r8, R8) -BUILD_KVM_GPR_ACCESSORS(r9, R9) -BUILD_KVM_GPR_ACCESSORS(r10, R10) -BUILD_KVM_GPR_ACCESSORS(r11, R11) -BUILD_KVM_GPR_ACCESSORS(r12, R12) -BUILD_KVM_GPR_ACCESSORS(r13, R13) -BUILD_KVM_GPR_ACCESSORS(r14, R14) -BUILD_KVM_GPR_ACCESSORS(r15, R15) -#endif - static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) From 9a78e15802a87de2b08dfd1bd88e855201d2c8fa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Jan 2021 11:43:08 -0500 Subject: [PATCH 23/25] KVM: x86: allow KVM_REQ_GET_NESTED_STATE_PAGES outside guest mode for VMX VMX also uses KVM_REQ_GET_NESTED_STATE_PAGES for the Hyper-V eVMCS, which may need to be loaded outside guest mode. Therefore we cannot WARN in that case. However, that part of nested_get_vmcs12_pages is _not_ needed at vmentry time. Split it out of KVM_REQ_GET_NESTED_STATE_PAGES handling, so that both vmentry and migration (and in the latter case, independent of is_guest_mode) do the parts that are needed. Cc: # 5.10.x: f2c7ef3ba: KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES Cc: # 5.10.x Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ arch/x86/kvm/vmx/nested.c | 31 +++++++++++++++++++++++++------ arch/x86/kvm/x86.c | 4 +--- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cb4c6ee10029..7a605ad8254d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -200,6 +200,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (WARN_ON(!is_guest_mode(vcpu))) + return true; + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 776688f9d101..f2b9bfb58206 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3124,13 +3124,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_host_map *map; - struct page *page; - u64 hpa; /* * hv_evmcs may end up being not mapped after migration (when @@ -3153,6 +3149,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } } + return true; +} + +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_host_map *map; + struct page *page; + u64 hpa; + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { /* * Translate L1 physical address to host physical @@ -3221,6 +3228,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); + + return true; +} + +static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) +{ + if (!nested_get_evmcs_page(vcpu)) + return false; + + if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) + return false; + return true; } @@ -6605,7 +6624,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .hv_timer_pending = nested_vmx_preemption_timer_pending, .get_state = vmx_get_nested_state, .set_state = vmx_set_nested_state, - .get_nested_state_pages = nested_get_vmcs12_pages, + .get_nested_state_pages = vmx_get_nested_state_pages, .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f64e8b97605..76bce832cade 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8806,9 +8806,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (WARN_ON_ONCE(!is_guest_mode(vcpu))) - ; - else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; goto out; } From 377bf660d07a47269510435d11f3b65d53edca20 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 26 Jan 2021 10:39:46 -0800 Subject: [PATCH 24/25] Revert "mm: fix initialization of struct page for holes in memory layout" This reverts commit d3921cb8be29ce5668c64e23ffdaeec5f8c69399. Chris Wilson reports that it causes boot problems: "We have half a dozen or so different machines in CI that are silently failing to boot, that we believe is bisected to this patch" and the CI team confirmed that a revert fixed the issues. The cause is unknown for now, so let's revert it. Link: https://lore.kernel.org/lkml/161160687463.28991.354987542182281928@build.alporthouse.com/ Reported-and-tested-by: Chris Wilson Acked-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 84 ++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 50 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 783913e41f65..519a60d5b6f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7080,26 +7080,23 @@ void __init free_area_init_memoryless_node(int nid) * Initialize all valid struct pages in the range [spfn, epfn) and mark them * PageReserved(). Return the number of struct pages that were initialized. */ -static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn, - int zone, int nid) +static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) { - unsigned long pfn, zone_spfn, zone_epfn; + unsigned long pfn; u64 pgcnt = 0; - zone_spfn = arch_zone_lowest_possible_pfn[zone]; - zone_epfn = arch_zone_highest_possible_pfn[zone]; - - spfn = clamp(spfn, zone_spfn, zone_epfn); - epfn = clamp(epfn, zone_spfn, zone_epfn); - for (pfn = spfn; pfn < epfn; pfn++) { if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + pageblock_nr_pages - 1; continue; } - - __init_single_page(pfn_to_page(pfn), pfn, zone, nid); + /* + * Use a fake node/zone (0) for now. Some of these pages + * (in memblock.reserved but not in memblock.memory) will + * get re-initialized via reserve_bootmem_region() later. + */ + __init_single_page(pfn_to_page(pfn), pfn, 0, 0); __SetPageReserved(pfn_to_page(pfn)); pgcnt++; } @@ -7108,64 +7105,51 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn, } /* - * Only struct pages that correspond to ranges defined by memblock.memory - * are zeroed and initialized by going through __init_single_page() during - * memmap_init(). + * Only struct pages that are backed by physical memory are zeroed and + * initialized by going through __init_single_page(). But, there are some + * struct pages which are reserved in memblock allocator and their fields + * may be accessed (for example page_to_pfn() on some configuration accesses + * flags). We must explicitly initialize those struct pages. * - * But, there could be struct pages that correspond to holes in - * memblock.memory. This can happen because of the following reasons: - * - phyiscal memory bank size is not necessarily the exact multiple of the - * arbitrary section size - * - early reserved memory may not be listed in memblock.memory - * - memory layouts defined with memmap= kernel parameter may not align - * nicely with memmap sections - * - * Explicitly initialize those struct pages so that: - * - PG_Reserved is set - * - zone link is set accorging to the architecture constrains - * - node is set to node id of the next populated region except for the - * trailing hole where last node id is used + * This function also addresses a similar issue where struct pages are left + * uninitialized because the physical address range is not covered by + * memblock.memory or memblock.reserved. That could happen when memblock + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ -static void __init init_zone_unavailable_mem(int zone) +static void __init init_unavailable_mem(void) { - unsigned long start, end; - int i, nid; - u64 pgcnt; - unsigned long next = 0; + phys_addr_t start, end; + u64 i, pgcnt; + phys_addr_t next = 0; /* - * Loop through holes in memblock.memory and initialize struct - * pages corresponding to these holes + * Loop through unavailable ranges not covered by memblock.memory. */ pgcnt = 0; - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { + for_each_mem_range(i, &start, &end) { if (next < start) - pgcnt += init_unavailable_range(next, start, zone, nid); + pgcnt += init_unavailable_range(PFN_DOWN(next), + PFN_UP(start)); next = end; } /* - * Last section may surpass the actual end of memory (e.g. we can - * have 1Gb section and 512Mb of RAM pouplated). - * Make sure that memmap has a well defined state in this case. + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. */ - end = round_up(max_pfn, PAGES_PER_SECTION); - pgcnt += init_unavailable_range(next, end, zone, nid); + pgcnt += init_unavailable_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because * firmware is using some of this memory, or for some other reasons. */ if (pgcnt) - pr_info("Zone %s: zeroed struct page in unavailable ranges: %lld pages", zone_names[zone], pgcnt); -} - -static void __init init_unavailable_mem(void) -{ - int zone; - - for (zone = 0; zone < ZONE_MOVABLE; zone++) - init_zone_unavailable_mem(zone); + pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); } #else static inline void __init init_unavailable_mem(void) From 2ab38c17aac10bf55ab3efde4c4db3893d8691d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 26 Jan 2021 01:04:38 +0100 Subject: [PATCH 25/25] mailmap: remove the "repo-abbrev" comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the magical "repo-abbrev" comment added when this file was introduced in e0ab1ec9fcd3 ([PATCH] add .mailmap for proper git-shortlog output, 2007-02-14). It's been an undocumented feature of git-shortlog(1), originally added to git for Linus's use. Since then he's no longer using it[1], and I've removed the feature in git.git's 4e168333a87 (shortlog: remove unused(?) "repo-abbrev" feature, 2021-01-12). It's on the "master" branch, but not yet in a release version. Let's also remove it from linux.git, both as a heads-up to any potential users of it in linux.git whose use would be broken sooner than later by git itself, and because it'll eventually be entirely redundant. 1. https://lore.kernel.org/git/CAHk-=wixHyBKZVUcxq+NCWMbkrX0xnppb7UCopRWw1+oExYpYw@mail.gmail.com/ Acked-by: Linus Torvalds Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Linus Torvalds --- .mailmap | 3 --- 1 file changed, 3 deletions(-) diff --git a/.mailmap b/.mailmap index b1ab0129c7d6..cc4e91d3075e 100644 --- a/.mailmap +++ b/.mailmap @@ -9,9 +9,6 @@ # # Please keep this list dictionary sorted. # -# This comment is parsed by git-shortlog: -# repo-abbrev: /pub/scm/linux/kernel/git/ -# Aaron Durbin Adam Oldham Adam Radford