mirror of
https://github.com/torvalds/linux.git
synced 2026-06-07 22:14:04 +02:00
Linux 5.12-rc3
-----BEGIN PGP SIGNATURE----- iQFSBAABCAA8FiEEq68RxlopcLEwq+PEeb4+QwBBGIYFAmBOgu4eHHRvcnZhbGRz QGxpbnV4LWZvdW5kYXRpb24ub3JnAAoJEHm+PkMAQRiGUd0H/3Ey8aWjVAig9Pe+ VQVZKwG+LXWH6UmUx5qyaTxophhmGnWLvkigJMn63qIg4eQtfp2gNFHK+T4OJNIP ybnkjFZ337x4J9zD6m8mt4Wmelq9iW2wNOS+3YZAyYiGlXfMGM7SlYRCQRQznTED 2O/JCMsOoP+Z8tr5ah/bzs0dANsXmTZ3QqRP2uzb6irKTgFR3/weOhj+Ht1oJ4Aq V+bgdcwhtk20hJhlvVeqws+o74LR789tTDCknlz/YNMv9e6VPfyIQ5vJAcFmZATE Ezj9yzkZ4IU+Ux6ikAyaFyBU8d1a4Wqye3eHCZBsEo6tcSAhbTZ90eoU86vh6ajS LZjwkNw= =6y1u -----END PGP SIGNATURE----- Merge 5.12-rc3 into android-mainline Linux 5.12-rc3 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ibc683be6a28885f39d477e3f86ec2ecd1a9e7e21
This commit is contained in:
commit
6bb5c8b522
|
|
@ -23,6 +23,7 @@ properties:
|
|||
- enum:
|
||||
- ingenic,jz4775-intc
|
||||
- ingenic,jz4770-intc
|
||||
- ingenic,jz4760b-intc
|
||||
- const: ingenic,jz4760-intc
|
||||
- items:
|
||||
- const: ingenic,x1000-intc
|
||||
|
|
|
|||
|
|
@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can
|
|||
be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
|
||||
ioctl() at run-time.
|
||||
|
||||
Creation of the VM will fail if the requested IPA size (whether it is
|
||||
implicit or explicit) is unsupported on the host.
|
||||
|
||||
Please note that configuring the IPA size does not affect the capability
|
||||
exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
|
||||
size of the address translated by the stage2 level (guest physical to
|
||||
|
|
|
|||
|
|
@ -261,8 +261,8 @@ ABI/API
|
|||
L: linux-api@vger.kernel.org
|
||||
F: include/linux/syscalls.h
|
||||
F: kernel/sys_ni.c
|
||||
F: include/uapi/
|
||||
F: arch/*/include/uapi/
|
||||
X: include/uapi/
|
||||
X: arch/*/include/uapi/
|
||||
|
||||
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@
|
|||
VERSION = 5
|
||||
PATCHLEVEL = 12
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Frozen Wasteland
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
|
|
|||
|
|
@ -347,6 +347,7 @@ config ARCH_EP93XX
|
|||
select ARM_AMBA
|
||||
imply ARM_PATCH_PHYS_VIRT
|
||||
select ARM_VIC
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select AUTO_ZRELADDR
|
||||
select CLKDEV_LOOKUP
|
||||
select CLKSRC_MMIO
|
||||
|
|
|
|||
|
|
@ -47,10 +47,10 @@
|
|||
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6
|
||||
#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11
|
||||
|
|
@ -183,16 +183,16 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
|
|||
#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
|
||||
|
||||
extern void __kvm_flush_vm_context(void);
|
||||
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
|
||||
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
|
||||
int level);
|
||||
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
|
||||
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
|
||||
|
||||
extern u64 __vgic_v3_get_ich_vtr_el2(void);
|
||||
extern u64 __vgic_v3_get_gic_config(void);
|
||||
extern u64 __vgic_v3_read_vmcr(void);
|
||||
extern void __vgic_v3_write_vmcr(u32 vmcr);
|
||||
extern void __vgic_v3_init_lrs(void);
|
||||
|
|
|
|||
|
|
@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
|
|||
void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
|
||||
void __debug_switch_to_host(struct kvm_vcpu *vcpu);
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
|
||||
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
|
||||
#endif
|
||||
|
||||
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
|
||||
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
|
||||
|
||||
|
|
@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
|
|||
|
||||
void __noreturn hyp_panic(void);
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
|
||||
void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
|
||||
u64 elr, u64 par);
|
||||
#endif
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
|
|
|||
|
|
@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
|
|||
/* Array containing bases of nVHE per-CPU memory regions. */
|
||||
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
|
||||
|
||||
/* PMU available static key */
|
||||
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
|
||||
|
||||
#endif /* CONFIG_KVM */
|
||||
|
||||
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
|
||||
|
|
|
|||
|
|
@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
|
||||
|
||||
/*
|
||||
* We guarantee that both TLBs and I-cache are private to each
|
||||
* vcpu. If detecting that a vcpu from the same VM has
|
||||
* previously run on the same physical CPU, call into the
|
||||
* hypervisor code to nuke the relevant contexts.
|
||||
*
|
||||
* We might get preempted before the vCPU actually runs, but
|
||||
* over-invalidation doesn't affect correctness.
|
||||
*/
|
||||
if (*last_ran != vcpu->vcpu_id) {
|
||||
kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
|
||||
kvm_call_hyp(__kvm_flush_cpu_context, mmu);
|
||||
*last_ran = vcpu->vcpu_id;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
|
|||
|
||||
// If the hyp context is loaded, go straight to hyp_panic
|
||||
get_loaded_vcpu x0, x1
|
||||
cbz x0, hyp_panic
|
||||
cbnz x0, 1f
|
||||
b hyp_panic
|
||||
|
||||
1:
|
||||
// The hyp context is saved so make sure it is restored to allow
|
||||
// hyp_panic to run at hyp and, subsequently, panic to run in the host.
|
||||
// This makes use of __guest_exit to avoid duplication but sets the
|
||||
|
|
@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
|
|||
// current state is saved to the guest context but it will only be
|
||||
// accurate if the guest had been completely restored.
|
||||
adr_this_cpu x0, kvm_hyp_ctxt, x1
|
||||
adr x1, hyp_panic
|
||||
adr_l x1, hyp_panic
|
||||
str x1, [x0, #CPU_XREG_OFFSET(30)]
|
||||
|
||||
get_vcpu_ptr x1, x0
|
||||
|
|
@ -146,7 +148,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
|
|||
// Now restore the hyp regs
|
||||
restore_callee_saved_regs x2
|
||||
|
||||
set_loaded_vcpu xzr, x1, x2
|
||||
set_loaded_vcpu xzr, x2, x3
|
||||
|
||||
alternative_if ARM64_HAS_RAS_EXTN
|
||||
// If we have the RAS extensions we can consume a pending error
|
||||
|
|
|
|||
|
|
@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
|
|||
* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
|
||||
* EL1 instead of being trapped to EL2.
|
||||
*/
|
||||
write_sysreg(0, pmselr_el0);
|
||||
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
|
||||
if (kvm_arm_support_pmu_v3()) {
|
||||
write_sysreg(0, pmselr_el0);
|
||||
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
|
||||
}
|
||||
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
|
||||
}
|
||||
|
||||
static inline void __deactivate_traps_common(void)
|
||||
{
|
||||
write_sysreg(0, hstr_el2);
|
||||
write_sysreg(0, pmuserenr_el0);
|
||||
if (kvm_arm_support_pmu_v3())
|
||||
write_sysreg(0, pmuserenr_el0);
|
||||
}
|
||||
|
||||
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
|
|
|||
|
|
@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
|
|||
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
|
||||
}
|
||||
|
||||
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Disable and flush SPE data generation */
|
||||
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
|
||||
}
|
||||
|
||||
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__debug_switch_to_guest_common(vcpu);
|
||||
}
|
||||
|
||||
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
|
||||
}
|
||||
|
||||
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
|
||||
__debug_switch_to_host_common(vcpu);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter)
|
|||
SYM_FUNC_END(__host_enter)
|
||||
|
||||
/*
|
||||
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
|
||||
* void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
|
||||
* u64 elr, u64 par);
|
||||
*/
|
||||
SYM_FUNC_START(__hyp_do_panic)
|
||||
/* Prepare and exit to the host's panic funciton. */
|
||||
|
|
@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic)
|
|||
hyp_kimg_va lr, x6
|
||||
msr elr_el2, lr
|
||||
|
||||
/* Set the panic format string. Use the, now free, LR as scratch. */
|
||||
ldr lr, =__hyp_panic_string
|
||||
hyp_kimg_va lr, x6
|
||||
mov x29, x0
|
||||
|
||||
/* Load the format string into x0 and arguments into x1-7 */
|
||||
ldr x0, =__hyp_panic_string
|
||||
hyp_kimg_va x0, x6
|
||||
|
||||
/* Load the format arguments into x1-7. */
|
||||
mov x6, x3
|
||||
|
|
@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic)
|
|||
mrs x5, hpfar_el2
|
||||
|
||||
/* Enter the host, conditionally restoring the host context. */
|
||||
cmp x0, xzr
|
||||
mov x0, lr
|
||||
b.eq __host_enter_without_restoring
|
||||
cbz x29, __host_enter_without_restoring
|
||||
b __host_enter_for_panic
|
||||
SYM_FUNC_END(__hyp_do_panic)
|
||||
|
||||
|
|
|
|||
|
|
@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
|
|||
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
|
||||
}
|
||||
|
||||
static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
|
||||
|
||||
__kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
|
||||
__kvm_flush_cpu_context(kern_hyp_va(mmu));
|
||||
}
|
||||
|
||||
static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
|
|||
write_sysreg_el2(tmp, SYS_SCTLR);
|
||||
}
|
||||
|
||||
static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2();
|
||||
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
|
||||
}
|
||||
|
||||
static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -115,10 +115,10 @@ static const hcall_t host_hcall[] = {
|
|||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
HANDLE_FUNC(__kvm_enable_ssbs),
|
||||
HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
|
||||
HANDLE_FUNC(__vgic_v3_get_gic_config),
|
||||
HANDLE_FUNC(__vgic_v3_read_vmcr),
|
||||
HANDLE_FUNC(__vgic_v3_write_vmcr),
|
||||
HANDLE_FUNC(__vgic_v3_init_lrs),
|
||||
|
|
|
|||
|
|
@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
|
||||
|
||||
__sysreg_save_state_nvhe(host_ctxt);
|
||||
/*
|
||||
* We must flush and disable the SPE buffer for nVHE, as
|
||||
* the translation regime(EL1&0) is going to be loaded with
|
||||
* that of the guest. And we must do this before we change the
|
||||
* translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
|
||||
* before we load guest Stage1.
|
||||
*/
|
||||
__debug_save_host_buffers_nvhe(vcpu);
|
||||
|
||||
__adjust_pc(vcpu);
|
||||
|
||||
|
|
@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
|
||||
__fpsimd_save_fpexc32(vcpu);
|
||||
|
||||
__debug_switch_to_host(vcpu);
|
||||
/*
|
||||
* This must come after restoring the host sysregs, since a non-VHE
|
||||
* system may enable SPE here and make use of the TTBRs.
|
||||
*/
|
||||
__debug_switch_to_host(vcpu);
|
||||
__debug_restore_host_buffers_nvhe(vcpu);
|
||||
|
||||
if (pmu_switch_needed)
|
||||
__pmu_switch_to_host(host_ctxt);
|
||||
|
|
@ -257,7 +266,6 @@ void __noreturn hyp_panic(void)
|
|||
u64 spsr = read_sysreg_el2(SYS_SPSR);
|
||||
u64 elr = read_sysreg_el2(SYS_ELR);
|
||||
u64 par = read_sysreg_par();
|
||||
bool restore_host = true;
|
||||
struct kvm_cpu_context *host_ctxt;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
|
|
@ -271,7 +279,7 @@ void __noreturn hyp_panic(void)
|
|||
__sysreg_restore_state_nvhe(host_ctxt);
|
||||
}
|
||||
|
||||
__hyp_do_panic(restore_host, spsr, elr, par);
|
||||
__hyp_do_panic(host_ctxt, spsr, elr, par);
|
||||
unreachable();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
|||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
|
||||
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
|
|
@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
|
|||
__tlb_switch_to_guest(mmu, &cxt);
|
||||
|
||||
__tlbi(vmalle1);
|
||||
asm volatile("ic iallu");
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
|
|
|
|||
|
|
@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
|||
goto out;
|
||||
|
||||
if (!table) {
|
||||
data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
|
||||
data->addr += kvm_granule_size(level);
|
||||
goto out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -405,9 +405,45 @@ void __vgic_v3_init_lrs(void)
|
|||
__gic_v3_set_lr(0, i);
|
||||
}
|
||||
|
||||
u64 __vgic_v3_get_ich_vtr_el2(void)
|
||||
/*
|
||||
* Return the GIC CPU configuration:
|
||||
* - [31:0] ICH_VTR_EL2
|
||||
* - [62:32] RES0
|
||||
* - [63] MMIO (GICv2) capable
|
||||
*/
|
||||
u64 __vgic_v3_get_gic_config(void)
|
||||
{
|
||||
return read_gicreg(ICH_VTR_EL2);
|
||||
u64 val, sre = read_gicreg(ICC_SRE_EL1);
|
||||
unsigned long flags = 0;
|
||||
|
||||
/*
|
||||
* To check whether we have a MMIO-based (GICv2 compatible)
|
||||
* CPU interface, we need to disable the system register
|
||||
* view. To do that safely, we have to prevent any interrupt
|
||||
* from firing (which would be deadly).
|
||||
*
|
||||
* Note that this only makes sense on VHE, as interrupts are
|
||||
* already masked for nVHE as part of the exception entry to
|
||||
* EL2.
|
||||
*/
|
||||
if (has_vhe())
|
||||
flags = local_daif_save();
|
||||
|
||||
write_gicreg(0, ICC_SRE_EL1);
|
||||
isb();
|
||||
|
||||
val = read_gicreg(ICC_SRE_EL1);
|
||||
|
||||
write_gicreg(sre, ICC_SRE_EL1);
|
||||
isb();
|
||||
|
||||
if (has_vhe())
|
||||
local_daif_restore(flags);
|
||||
|
||||
val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
|
||||
val |= read_gicreg(ICH_VTR_EL2);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
u64 __vgic_v3_read_vmcr(void)
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
|
|||
__tlb_switch_to_host(&cxt);
|
||||
}
|
||||
|
||||
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
|
||||
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct tlb_inv_context cxt;
|
||||
|
||||
|
|
@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
|
|||
__tlb_switch_to_guest(mmu, &cxt);
|
||||
|
||||
__tlbi(vmalle1);
|
||||
asm volatile("ic iallu");
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
|
|
|
|||
|
|
@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
* Prevent userspace from creating a memory region outside of the IPA
|
||||
* space addressable by the KVM guest IPA space.
|
||||
*/
|
||||
if (memslot->base_gfn + memslot->npages >=
|
||||
(kvm_phys_size(kvm) >> PAGE_SHIFT))
|
||||
if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
|
||||
return -EFAULT;
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
|
||||
|
||||
static int kvm_is_in_guest(void)
|
||||
{
|
||||
return kvm_get_running_vcpu() != NULL;
|
||||
|
|
@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
|
|||
|
||||
int kvm_perf_init(void)
|
||||
{
|
||||
/*
|
||||
* Check if HW_PERF_EVENTS are supported by checking the number of
|
||||
* hardware performance counters. This could ensure the presence of
|
||||
* a physical PMU and CONFIG_PERF_EVENT is selected.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
|
||||
static_branch_enable(&kvm_arm_pmu_available);
|
||||
|
||||
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
|||
return val & mask;
|
||||
}
|
||||
|
||||
bool kvm_arm_support_pmu_v3(void)
|
||||
{
|
||||
/*
|
||||
* Check if HW_PERF_EVENTS are supported by checking the number of
|
||||
* hardware performance counters. This could ensure the presence of
|
||||
* a physical PMU and CONFIG_PERF_EVENT is selected.
|
||||
*/
|
||||
return (perf_num_counters() > 0);
|
||||
}
|
||||
|
||||
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_vcpu_has_pmu(vcpu))
|
||||
|
|
|
|||
|
|
@ -326,10 +326,9 @@ int kvm_set_ipa_limit(void)
|
|||
}
|
||||
|
||||
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
|
||||
WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
|
||||
"KVM IPA Size Limit (%d bits) is smaller than default size\n",
|
||||
kvm_ipa_limit);
|
||||
kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
|
||||
kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
|
||||
((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
|
||||
" (Reduced IPA size, limited VM/VMM compatibility)" : ""));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -358,6 +357,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
|||
return -EINVAL;
|
||||
} else {
|
||||
phys_shift = KVM_PHYS_SHIFT;
|
||||
if (phys_shift > kvm_ipa_limit) {
|
||||
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
|
||||
current->comm);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
|
|
|
|||
|
|
@ -574,9 +574,13 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
|
|||
*/
|
||||
int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
{
|
||||
u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
|
||||
u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
|
||||
bool has_v2;
|
||||
int ret;
|
||||
|
||||
has_v2 = ich_vtr_el2 >> 63;
|
||||
ich_vtr_el2 = (u32)ich_vtr_el2;
|
||||
|
||||
/*
|
||||
* The ListRegs field is 5 bits, but there is an architectural
|
||||
* maximum of 16 list registers. Just ignore bit 4...
|
||||
|
|
@ -594,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
|||
gicv4_enable ? "en" : "dis");
|
||||
}
|
||||
|
||||
kvm_vgic_global_state.vcpu_base = 0;
|
||||
|
||||
if (!info->vcpu.start) {
|
||||
kvm_info("GICv3: no GICV resource entry\n");
|
||||
kvm_vgic_global_state.vcpu_base = 0;
|
||||
} else if (!has_v2) {
|
||||
pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
|
||||
} else if (!PAGE_ALIGNED(info->vcpu.start)) {
|
||||
pr_warn("GICV physical address 0x%llx not page aligned\n",
|
||||
(unsigned long long)info->vcpu.start);
|
||||
kvm_vgic_global_state.vcpu_base = 0;
|
||||
} else {
|
||||
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
|
||||
kvm_vgic_global_state.can_emulate_gicv2 = true;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task,
|
|||
static inline long syscall_get_error(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return regs->r10 == -1 ? regs->r8:0;
|
||||
return regs->r10 == -1 ? -regs->r8:0;
|
||||
}
|
||||
|
||||
static inline long syscall_get_return_value(struct task_struct *task,
|
||||
|
|
|
|||
|
|
@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
|
|||
{
|
||||
struct syscall_get_set_args *args = data;
|
||||
struct pt_regs *pt = args->regs;
|
||||
unsigned long *krbs, cfm, ndirty;
|
||||
unsigned long *krbs, cfm, ndirty, nlocals, nouts;
|
||||
int i, count;
|
||||
|
||||
if (unw_unwind_to_user(info) < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We get here via a few paths:
|
||||
* - break instruction: cfm is shared with caller.
|
||||
* syscall args are in out= regs, locals are non-empty.
|
||||
* - epsinstruction: cfm is set by br.call
|
||||
* locals don't exist.
|
||||
*
|
||||
* For both cases argguments are reachable in cfm.sof - cfm.sol.
|
||||
* CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ]
|
||||
*/
|
||||
cfm = pt->cr_ifs;
|
||||
nlocals = (cfm >> 7) & 0x7f; /* aka sol */
|
||||
nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */
|
||||
krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
|
||||
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
|
||||
|
||||
count = 0;
|
||||
if (in_syscall(pt))
|
||||
count = min_t(int, args->n, cfm & 0x7f);
|
||||
count = min_t(int, args->n, nouts);
|
||||
|
||||
/* Iterate over outs. */
|
||||
for (i = 0; i < count; i++) {
|
||||
int j = ndirty + nlocals + i + args->i;
|
||||
if (args->rw)
|
||||
*ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
|
||||
args->args[i];
|
||||
*ia64_rse_skip_regs(krbs, j) = args->args[i];
|
||||
else
|
||||
args->args[i] = *ia64_rse_skip_regs(krbs,
|
||||
ndirty + i + args->i);
|
||||
args->args[i] = *ia64_rse_skip_regs(krbs, j);
|
||||
}
|
||||
|
||||
if (!args->rw) {
|
||||
|
|
|
|||
|
|
@ -73,9 +73,10 @@ void __patch_exception(int exc, unsigned long addr);
|
|||
#endif
|
||||
|
||||
#define OP_RT_RA_MASK 0xffff0000UL
|
||||
#define LIS_R2 0x3c020000UL
|
||||
#define ADDIS_R2_R12 0x3c4c0000UL
|
||||
#define ADDI_R2_R2 0x38420000UL
|
||||
#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2))
|
||||
#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12))
|
||||
#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2))
|
||||
|
||||
|
||||
static inline unsigned long ppc_function_entry(void *func)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
|
|||
DECLARE_INTERRUPT_HANDLER(CacheLockingException);
|
||||
DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
|
||||
DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
|
||||
DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
|
||||
DECLARE_INTERRUPT_HANDLER(WatchdogException);
|
||||
DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
|
||||
|
||||
|
|
@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
|
|||
|
||||
DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
|
||||
|
||||
void unrecoverable_exception(struct pt_regs *regs);
|
||||
|
||||
void replay_system_reset(void);
|
||||
void replay_soft_interrupts(void);
|
||||
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
|
|||
#define TRAP_FLAGS_MASK 0x11
|
||||
#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK)
|
||||
#define FULL_REGS(regs) (((regs)->trap & 1) == 0)
|
||||
#define SET_FULL_REGS(regs) ((regs)->trap |= 1)
|
||||
#define SET_FULL_REGS(regs) ((regs)->trap &= ~1)
|
||||
#endif
|
||||
#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs))
|
||||
#define NV_REG_POISON 0xdeadbeefdeadbeefUL
|
||||
|
|
@ -210,7 +210,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
|
|||
#define TRAP_FLAGS_MASK 0x1F
|
||||
#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK)
|
||||
#define FULL_REGS(regs) (((regs)->trap & 1) == 0)
|
||||
#define SET_FULL_REGS(regs) ((regs)->trap |= 1)
|
||||
#define SET_FULL_REGS(regs) ((regs)->trap &= ~1)
|
||||
#define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0)
|
||||
#define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0)
|
||||
#define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0)
|
||||
|
|
|
|||
|
|
@ -71,6 +71,16 @@ static inline void disable_kernel_vsx(void)
|
|||
{
|
||||
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
|
||||
}
|
||||
#else
|
||||
static inline void enable_kernel_vsx(void)
|
||||
{
|
||||
BUILD_BUG();
|
||||
}
|
||||
|
||||
static inline void disable_kernel_vsx(void)
|
||||
{
|
||||
BUILD_BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SPE
|
||||
|
|
|
|||
|
|
@ -466,7 +466,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
|||
|
||||
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
|
||||
/* MSR[RI] is clear iff using SRR regs */
|
||||
.if IHSRR == EXC_HV_OR_STD
|
||||
.if IHSRR_IF_HVMODE
|
||||
BEGIN_FTR_SECTION
|
||||
xori r10,r10,MSR_RI
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
|
||||
|
|
|
|||
|
|
@ -436,7 +436,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
|
|||
return ret;
|
||||
}
|
||||
|
||||
void unrecoverable_exception(struct pt_regs *regs);
|
||||
void preempt_schedule_irq(void);
|
||||
|
||||
notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
|
||||
|
|
|
|||
|
|
@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
|
|||
* in the MSR is 0. This indicates that SRR0/1 are live, and that
|
||||
* we therefore lost state by taking this exception.
|
||||
*/
|
||||
DEFINE_INTERRUPT_HANDLER(unrecoverable_exception)
|
||||
void unrecoverable_exception(struct pt_regs *regs)
|
||||
{
|
||||
pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
|
||||
regs->trap, regs->nip, regs->msr);
|
||||
|
|
|
|||
|
|
@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
|||
regs->ax = -EFAULT;
|
||||
|
||||
instrumentation_end();
|
||||
syscall_exit_to_user_mode(regs);
|
||||
local_irq_disable();
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
|
|||
/* Switch to the kernel stack */
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %r8 /* pt_regs->sp */
|
||||
|
|
|
|||
|
|
@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
|
|||
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
|
||||
|
||||
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
|
||||
/*
|
||||
* This one is magic, it will get called even when PMU init fails (because
|
||||
* there is no PMU), in which case it should simply return NULL.
|
||||
*/
|
||||
DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
|
||||
|
||||
u64 __read_mostly hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
|
|
@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
|
|||
x86_perf_event_update(event);
|
||||
}
|
||||
|
||||
static inline struct perf_guest_switch_msr *
|
||||
perf_guest_get_msrs_nop(int *nr)
|
||||
{
|
||||
*nr = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int __init init_hw_perf_events(void)
|
||||
{
|
||||
struct x86_pmu_quirk *quirk;
|
||||
|
|
@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
|
|||
x86_pmu.read = _x86_pmu_read;
|
||||
|
||||
if (!x86_pmu.guest_get_msrs)
|
||||
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
|
||||
x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
|
||||
|
||||
x86_pmu_static_call_update();
|
||||
|
||||
|
|
|
|||
|
|
@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
|
||||
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
||||
if (!(event->attr.sample_type &
|
||||
~intel_pmu_large_pebs_flags(event)))
|
||||
~intel_pmu_large_pebs_flags(event))) {
|
||||
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
|
||||
event->attach_state |= PERF_ATTACH_SCHED_CB;
|
||||
}
|
||||
}
|
||||
if (x86_pmu.pebs_aliases)
|
||||
x86_pmu.pebs_aliases(event);
|
||||
|
|
@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
ret = intel_pmu_setup_lbr_filter(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
event->attach_state |= PERF_ATTACH_SCHED_CB;
|
||||
|
||||
/*
|
||||
* BTS is set up earlier in this path, so don't account twice
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
|
|||
int insn_get_code_seg_params(struct pt_regs *regs);
|
||||
int insn_fetch_from_user(struct pt_regs *regs,
|
||||
unsigned char buf[MAX_INSN_SIZE]);
|
||||
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
|
||||
unsigned char buf[MAX_INSN_SIZE]);
|
||||
bool insn_decode(struct insn *insn, struct pt_regs *regs,
|
||||
unsigned char buf[MAX_INSN_SIZE], int buf_size);
|
||||
|
||||
|
|
|
|||
|
|
@ -963,7 +963,7 @@ struct kvm_arch {
|
|||
struct kvm_pit *vpit;
|
||||
atomic_t vapics_in_nmi_mode;
|
||||
struct mutex apic_map_lock;
|
||||
struct kvm_apic_map *apic_map;
|
||||
struct kvm_apic_map __rcu *apic_map;
|
||||
atomic_t apic_map_dirty;
|
||||
|
||||
bool apic_access_page_done;
|
||||
|
|
@ -1036,7 +1036,7 @@ struct kvm_arch {
|
|||
|
||||
bool bus_lock_detection_enabled;
|
||||
|
||||
struct kvm_pmu_event_filter *pmu_event_filter;
|
||||
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
|
||||
struct task_struct *nx_lpage_recovery_thread;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
|
|||
void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_SYSCALL_compat_safe_stack(void);
|
||||
void entry_INT80_compat(void);
|
||||
#ifdef CONFIG_XEN_PV
|
||||
void xen_entry_INT80_compat(void);
|
||||
|
|
|
|||
|
|
@ -94,6 +94,8 @@ struct pt_regs {
|
|||
#include <asm/paravirt_types.h>
|
||||
#endif
|
||||
|
||||
#include <asm/proto.h>
|
||||
|
||||
struct cpuinfo_x86;
|
||||
struct task_struct;
|
||||
|
||||
|
|
@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
|
|||
#ifdef CONFIG_X86_64
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
|
||||
static inline bool ip_within_syscall_gap(struct pt_regs *regs)
|
||||
{
|
||||
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
|
||||
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
|
||||
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
||||
|
|
|
|||
|
|
@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void)
|
|||
unsigned long flags;
|
||||
|
||||
asm volatile ("# smap_save\n\t"
|
||||
ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
|
||||
"pushf; pop %0; " __ASM_CLAC "\n\t"
|
||||
"1:"
|
||||
ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
|
||||
X86_FEATURE_SMAP)
|
||||
: "=rm" (flags) : : "memory", "cc");
|
||||
|
||||
return flags;
|
||||
|
|
@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void)
|
|||
static __always_inline void smap_restore(unsigned long flags)
|
||||
{
|
||||
asm volatile ("# smap_restore\n\t"
|
||||
ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
|
||||
"push %0; popf\n\t"
|
||||
"1:"
|
||||
ALTERNATIVE("", "push %0; popf\n\t",
|
||||
X86_FEATURE_SMAP)
|
||||
: : "g" (flags) : "memory", "cc");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void)
|
|||
|
||||
static int __init kvm_setup_vsyscall_timeinfo(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
u8 flags;
|
||||
|
||||
if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
|
||||
return 0;
|
||||
|
||||
flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
|
||||
if (!(flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
return 0;
|
||||
|
||||
kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
|
||||
#endif
|
||||
|
||||
kvmclock_init_mem();
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
|
||||
u8 flags;
|
||||
|
||||
flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
|
||||
if (!(flags & PVCLOCK_TSC_STABLE_BIT))
|
||||
return 0;
|
||||
|
||||
kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(kvm_setup_vsyscall_timeinfo);
|
||||
|
|
|
|||
|
|
@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
|
|||
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
static __always_inline bool on_vc_stack(unsigned long sp)
|
||||
static __always_inline bool on_vc_stack(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long sp = regs->sp;
|
||||
|
||||
/* User-mode RSP is not trusted */
|
||||
if (user_mode(regs))
|
||||
return false;
|
||||
|
||||
/* SYSCALL gap still has user-mode RSP */
|
||||
if (ip_within_syscall_gap(regs))
|
||||
return false;
|
||||
|
||||
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
|
||||
}
|
||||
|
||||
|
|
@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
|
|||
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
|
||||
|
||||
/* Make room on the IST stack */
|
||||
if (on_vc_stack(regs->sp))
|
||||
if (on_vc_stack(regs))
|
||||
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
|
||||
else
|
||||
new_ist = old_ist - sizeof(old_ist);
|
||||
|
|
@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
|
|||
int res;
|
||||
|
||||
if (user_mode(ctxt->regs)) {
|
||||
res = insn_fetch_from_user(ctxt->regs, buffer);
|
||||
res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
|
||||
if (!res) {
|
||||
ctxt->fi.vector = X86_TRAP_PF;
|
||||
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
|
||||
|
|
@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
|
|||
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
||||
{
|
||||
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
|
||||
irqentry_state_t irq_state;
|
||||
struct ghcb_state state;
|
||||
struct es_em_ctxt ctxt;
|
||||
enum es_result result;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/*
|
||||
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||
*/
|
||||
|
|
@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|||
return;
|
||||
}
|
||||
|
||||
irq_state = irqentry_nmi_enter(regs);
|
||||
lockdep_assert_irqs_disabled();
|
||||
instrumentation_begin();
|
||||
|
||||
/*
|
||||
|
|
@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|||
|
||||
out:
|
||||
instrumentation_end();
|
||||
irqentry_nmi_exit(regs, irq_state);
|
||||
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
|
|||
* In the SYSCALL entry path the RSP value comes from user-space - don't
|
||||
* trust it and switch to the current kernel stack
|
||||
*/
|
||||
if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
|
||||
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
|
||||
if (ip_within_syscall_gap(regs)) {
|
||||
sp = this_cpu_read(cpu_current_top_of_stack);
|
||||
goto sync;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#define orc_warn_current(args...) \
|
||||
({ \
|
||||
if (state->task == current) \
|
||||
if (state->task == current && !state->error) \
|
||||
orc_warn(args); \
|
||||
})
|
||||
|
||||
|
|
@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
|
|||
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
*ip = READ_ONCE_NOCHECK(regs->ip);
|
||||
*sp = READ_ONCE_NOCHECK(regs->sp);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
|
|||
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
*ip = READ_ONCE_NOCHECK(regs->ip);
|
||||
*sp = READ_ONCE_NOCHECK(regs->sp);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
|
|||
return false;
|
||||
|
||||
if (state->full_regs) {
|
||||
*val = ((unsigned long *)state->regs)[reg];
|
||||
*val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (state->prev_regs) {
|
||||
*val = ((unsigned long *)state->prev_regs)[reg];
|
||||
*val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
|
|||
}
|
||||
|
||||
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
|
||||
kvm_wait_lapic_expire(vcpu);
|
||||
/*
|
||||
* Ensure the guest's timer has truly expired before posting an
|
||||
* interrupt. Open code the relevant checks to avoid querying
|
||||
* lapic_timer_int_injected(), which will be false since the
|
||||
* interrupt isn't yet injected. Waiting until after injecting
|
||||
* is not an option since that won't help a posted interrupt.
|
||||
*/
|
||||
if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
|
||||
vcpu->arch.apic->lapic_timer.timer_advance_ns)
|
||||
__kvm_wait_lapic_expire(vcpu);
|
||||
kvm_apic_inject_pending_timer_irqs(apic);
|
||||
return;
|
||||
}
|
||||
|
|
@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
|||
|
||||
apic_update_ppr(apic);
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
apic->lapic_timer.expired_tscdeadline = 0;
|
||||
apic_update_lvtt(apic);
|
||||
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
|
||||
update_divide_count(apic);
|
||||
|
|
|
|||
|
|
@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
|
|||
cpu_relax();
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If the SPTE is not MMU-present, there is no backing
|
||||
* page associated with the SPTE and so no side effects
|
||||
* that need to be recorded, and exclusive ownership of
|
||||
* mmu_lock ensures the SPTE can't be made present.
|
||||
* Note, zapping MMIO SPTEs is also unnecessary as they
|
||||
* are guarded by the memslots generation, not by being
|
||||
* unreachable.
|
||||
*/
|
||||
old_child_spte = READ_ONCE(*sptep);
|
||||
if (!is_shadow_present_pte(old_child_spte))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Marking the SPTE as a removed SPTE is not
|
||||
|
|
|
|||
|
|
@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs {
|
|||
{ .index = MSR_INVALID, .always = false },
|
||||
};
|
||||
|
||||
/* enable NPT for AMD64 and X86 with PAE */
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
bool npt_enabled = true;
|
||||
#else
|
||||
bool npt_enabled;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
|
||||
* pause_filter_count: On processors that support Pause filtering(indicated
|
||||
|
|
@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444);
|
|||
static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
|
||||
module_param(pause_filter_count_max, ushort, 0444);
|
||||
|
||||
/* allow nested paging (virtualized MMU) for all guests */
|
||||
static int npt = true;
|
||||
module_param(npt, int, S_IRUGO);
|
||||
/*
|
||||
* Use nested page tables by default. Note, NPT may get forced off by
|
||||
* svm_hardware_setup() if it's unsupported by hardware or the host kernel.
|
||||
*/
|
||||
bool npt_enabled = true;
|
||||
module_param_named(npt, npt_enabled, bool, 0444);
|
||||
|
||||
/* allow nested virtualization in KVM/SVM */
|
||||
static int nested = true;
|
||||
|
|
@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void)
|
|||
goto err;
|
||||
}
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_NPT))
|
||||
/*
|
||||
* KVM's MMU doesn't support using 2-level paging for itself, and thus
|
||||
* NPT isn't supported if the host is using 2-level paging since host
|
||||
* CR4 is unchanged on VMRUN.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
|
||||
npt_enabled = false;
|
||||
|
||||
if (npt_enabled && !npt)
|
||||
if (!boot_cpu_has(X86_FEATURE_NPT))
|
||||
npt_enabled = false;
|
||||
|
||||
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
|
||||
|
|
|
|||
|
|
@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
|||
int i, nr_msrs;
|
||||
struct perf_guest_switch_msr *msrs;
|
||||
|
||||
/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
|
||||
msrs = perf_guest_get_msrs(&nr_msrs);
|
||||
|
||||
if (!msrs)
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -10601,7 +10601,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
|
|||
return (void __user *)hva;
|
||||
} else {
|
||||
if (!slot || !slot->npages)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
old_npages = slot->npages;
|
||||
hva = slot->userspace_addr;
|
||||
|
|
|
|||
|
|
@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned long insn_get_effective_ip(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long seg_base = 0;
|
||||
|
||||
/*
|
||||
* If not in user-space long mode, a custom code segment could be in
|
||||
* use. This is true in protected mode (if the process defined a local
|
||||
* descriptor table), or virtual-8086 mode. In most of the cases
|
||||
* seg_base will be zero as in USER_CS.
|
||||
*/
|
||||
if (!user_64bit_mode(regs)) {
|
||||
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
|
||||
if (seg_base == -1L)
|
||||
return 0;
|
||||
}
|
||||
|
||||
return seg_base + regs->ip;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_fetch_from_user() - Copy instruction bytes from user-space memory
|
||||
* @regs: Structure with register values as seen when entering kernel mode
|
||||
|
|
@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
|
|||
*/
|
||||
int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
|
||||
{
|
||||
unsigned long seg_base = 0;
|
||||
unsigned long ip;
|
||||
int not_copied;
|
||||
|
||||
/*
|
||||
* If not in user-space long mode, a custom code segment could be in
|
||||
* use. This is true in protected mode (if the process defined a local
|
||||
* descriptor table), or virtual-8086 mode. In most of the cases
|
||||
* seg_base will be zero as in USER_CS.
|
||||
*/
|
||||
if (!user_64bit_mode(regs)) {
|
||||
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
|
||||
if (seg_base == -1L)
|
||||
return 0;
|
||||
}
|
||||
ip = insn_get_effective_ip(regs);
|
||||
if (!ip)
|
||||
return 0;
|
||||
|
||||
not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
|
||||
|
||||
not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
|
||||
MAX_INSN_SIZE);
|
||||
return MAX_INSN_SIZE - not_copied;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
|
||||
* while in atomic code
|
||||
* @regs: Structure with register values as seen when entering kernel mode
|
||||
* @buf: Array to store the fetched instruction
|
||||
*
|
||||
* Gets the linear address of the instruction and copies the instruction bytes
|
||||
* to the buf. This function must be used in atomic context.
|
||||
*
|
||||
* Returns:
|
||||
*
|
||||
* Number of instruction bytes copied.
|
||||
*
|
||||
* 0 if nothing was copied.
|
||||
*/
|
||||
int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
|
||||
{
|
||||
unsigned long ip;
|
||||
int not_copied;
|
||||
|
||||
ip = insn_get_effective_ip(regs);
|
||||
if (!ip)
|
||||
return 0;
|
||||
|
||||
not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
|
||||
|
||||
return MAX_INSN_SIZE - not_copied;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev,
|
|||
struct bio_vec bio_vec;
|
||||
struct page *page;
|
||||
ssize_t ret = len;
|
||||
int mode;
|
||||
int mode, err;
|
||||
unsigned long blk_idx = 0;
|
||||
|
||||
if (sysfs_streq(buf, "idle"))
|
||||
|
|
@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev,
|
|||
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index);
|
||||
if (ret || index >= nr_pages)
|
||||
if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
|
||||
index >= nr_pages)
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = 1;
|
||||
|
|
@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev,
|
|||
goto release_init_lock;
|
||||
}
|
||||
|
||||
while (nr_pages--) {
|
||||
for (; nr_pages != 0; index++, nr_pages--) {
|
||||
struct bio_vec bvec;
|
||||
|
||||
bvec.bv_page = page;
|
||||
|
|
@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev,
|
|||
* XXX: A single page IO would be inefficient for write
|
||||
* but it would be not bad as starter.
|
||||
*/
|
||||
ret = submit_bio_wait(&bio);
|
||||
if (ret) {
|
||||
err = submit_bio_wait(&bio);
|
||||
if (err) {
|
||||
zram_slot_lock(zram, index);
|
||||
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
|
||||
zram_clear_flag(zram, index, ZRAM_IDLE);
|
||||
zram_slot_unlock(zram, index);
|
||||
/*
|
||||
* Return last IO error unless every IO were
|
||||
* not suceeded.
|
||||
*/
|
||||
ret = err;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -96,6 +96,18 @@ static void install_memreserve_table(void)
|
|||
efi_err("Failed to install memreserve config table!\n");
|
||||
}
|
||||
|
||||
static u32 get_supported_rt_services(void)
|
||||
{
|
||||
const efi_rt_properties_table_t *rt_prop_table;
|
||||
u32 supported = EFI_RT_SUPPORTED_ALL;
|
||||
|
||||
rt_prop_table = get_efi_config_table(EFI_RT_PROPERTIES_TABLE_GUID);
|
||||
if (rt_prop_table)
|
||||
supported &= rt_prop_table->runtime_services_supported;
|
||||
|
||||
return supported;
|
||||
}
|
||||
|
||||
/*
|
||||
* EFI entry point for the arm/arm64 EFI stubs. This is the entrypoint
|
||||
* that is described in the PE/COFF header. Most of the code is the same
|
||||
|
|
@ -250,6 +262,10 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
|
|||
(prop_tbl->memory_protection_attribute &
|
||||
EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA);
|
||||
|
||||
/* force efi_novamap if SetVirtualAddressMap() is unsupported */
|
||||
efi_novamap |= !(get_supported_rt_services() &
|
||||
EFI_RT_SUPPORTED_SET_VIRTUAL_ADDRESS_MAP);
|
||||
|
||||
/* hibernation expects the runtime regions to stay in the same place */
|
||||
if (!IS_ENABLED(CONFIG_HIBERNATION) && !efi_nokaslr && !flat_va_mapping) {
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
|
|||
vm_fault_t ret;
|
||||
pgoff_t fault_page_size;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool is_cow_mapping =
|
||||
(vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PMD:
|
||||
|
|
@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
|
|||
}
|
||||
|
||||
/* Always do write dirty-tracking and COW on PTE level. */
|
||||
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
|
||||
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
||||
ret = ttm_bo_vm_reserve(bo, vmf);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
|
|||
vma->vm_ops = &vmw_vm_ops;
|
||||
|
||||
/* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
|
||||
if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ config IRQCHIP
|
|||
config ARM_GIC
|
||||
bool
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select GENERIC_IRQ_EFFECTIVE_AFF_MASK
|
||||
|
||||
config ARM_GIC_PM
|
||||
|
|
@ -33,7 +32,6 @@ config GIC_NON_BANKED
|
|||
|
||||
config ARM_GIC_V3
|
||||
bool
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select IRQ_DOMAIN_HIERARCHY
|
||||
select PARTITION_PERCPU
|
||||
select GENERIC_IRQ_EFFECTIVE_AFF_MASK
|
||||
|
|
@ -64,7 +62,6 @@ config ARM_NVIC
|
|||
config ARM_VIC
|
||||
bool
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
|
||||
config ARM_VIC_NR
|
||||
int
|
||||
|
|
@ -99,14 +96,12 @@ config ATMEL_AIC_IRQ
|
|||
bool
|
||||
select GENERIC_IRQ_CHIP
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select SPARSE_IRQ
|
||||
|
||||
config ATMEL_AIC5_IRQ
|
||||
bool
|
||||
select GENERIC_IRQ_CHIP
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select SPARSE_IRQ
|
||||
|
||||
config I8259
|
||||
|
|
@ -153,7 +148,6 @@ config DW_APB_ICTL
|
|||
config FARADAY_FTINTC010
|
||||
bool
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select SPARSE_IRQ
|
||||
|
||||
config HISILICON_IRQ_MBIGEN
|
||||
|
|
@ -169,7 +163,6 @@ config IMGPDC_IRQ
|
|||
config IXP4XX_IRQ
|
||||
bool
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select SPARSE_IRQ
|
||||
|
||||
config MADERA_IRQ
|
||||
|
|
@ -186,7 +179,6 @@ config CLPS711X_IRQCHIP
|
|||
bool
|
||||
depends on ARCH_CLPS711X
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select SPARSE_IRQ
|
||||
default y
|
||||
|
||||
|
|
@ -205,7 +197,6 @@ config OMAP_IRQCHIP
|
|||
config ORION_IRQCHIP
|
||||
bool
|
||||
select IRQ_DOMAIN
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
|
||||
config PIC32_EVIC
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -179,5 +179,6 @@ static int __init ingenic_tcu_irq_init(struct device_node *np,
|
|||
}
|
||||
IRQCHIP_DECLARE(jz4740_tcu_irq, "ingenic,jz4740-tcu", ingenic_tcu_irq_init);
|
||||
IRQCHIP_DECLARE(jz4725b_tcu_irq, "ingenic,jz4725b-tcu", ingenic_tcu_irq_init);
|
||||
IRQCHIP_DECLARE(jz4760_tcu_irq, "ingenic,jz4760-tcu", ingenic_tcu_irq_init);
|
||||
IRQCHIP_DECLARE(jz4770_tcu_irq, "ingenic,jz4770-tcu", ingenic_tcu_irq_init);
|
||||
IRQCHIP_DECLARE(x1000_tcu_irq, "ingenic,x1000-tcu", ingenic_tcu_irq_init);
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ static int __init intc_2chip_of_init(struct device_node *node,
|
|||
{
|
||||
return ingenic_intc_of_init(node, 2);
|
||||
}
|
||||
IRQCHIP_DECLARE(jz4760_intc, "ingenic,jz4760-intc", intc_2chip_of_init);
|
||||
IRQCHIP_DECLARE(jz4770_intc, "ingenic,jz4770-intc", intc_2chip_of_init);
|
||||
IRQCHIP_DECLARE(jz4775_intc, "ingenic,jz4775-intc", intc_2chip_of_init);
|
||||
IRQCHIP_DECLARE(jz4780_intc, "ingenic,jz4780-intc", intc_2chip_of_init);
|
||||
|
|
|
|||
|
|
@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
|
|||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct dentry *root = sb->s_root, *dentry;
|
||||
int err = 0;
|
||||
struct file *f = NULL;
|
||||
|
||||
e = create_entry(buffer, count);
|
||||
|
||||
if (IS_ERR(e))
|
||||
return PTR_ERR(e);
|
||||
|
||||
if (e->flags & MISC_FMT_OPEN_FILE) {
|
||||
f = open_exec(e->interpreter);
|
||||
if (IS_ERR(f)) {
|
||||
pr_notice("register: failed to install interpreter file %s\n",
|
||||
e->interpreter);
|
||||
kfree(e);
|
||||
return PTR_ERR(f);
|
||||
}
|
||||
e->interp_file = f;
|
||||
}
|
||||
|
||||
inode_lock(d_inode(root));
|
||||
dentry = lookup_one_len(e->name, root, strlen(e->name));
|
||||
err = PTR_ERR(dentry);
|
||||
|
|
@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
|
|||
goto out2;
|
||||
}
|
||||
|
||||
if (e->flags & MISC_FMT_OPEN_FILE) {
|
||||
struct file *f;
|
||||
|
||||
f = open_exec(e->interpreter);
|
||||
if (IS_ERR(f)) {
|
||||
err = PTR_ERR(f);
|
||||
pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
|
||||
simple_release_fs(&bm_mnt, &entry_count);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
goto out2;
|
||||
}
|
||||
e->interp_file = f;
|
||||
}
|
||||
|
||||
e->dentry = dget(dentry);
|
||||
inode->i_private = e;
|
||||
inode->i_fop = &bm_entry_operations;
|
||||
|
|
@ -709,6 +706,8 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
|
|||
inode_unlock(d_inode(root));
|
||||
|
||||
if (err) {
|
||||
if (f)
|
||||
filp_close(f, NULL);
|
||||
kfree(e);
|
||||
return err;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1098,8 +1098,6 @@ struct clear_refs_private {
|
|||
|
||||
#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||
|
||||
#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
|
||||
|
||||
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
|
||||
{
|
||||
struct page *page;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,13 @@
|
|||
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
|
||||
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
|
||||
|
||||
static __always_inline bool kvm_arm_support_pmu_v3(void)
|
||||
{
|
||||
return static_branch_likely(&kvm_arm_pmu_available);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
|
||||
struct kvm_pmc {
|
||||
|
|
@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
|
|||
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
|
||||
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
||||
u64 select_idx);
|
||||
bool kvm_arm_support_pmu_v3(void);
|
||||
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
|
||||
|
|
@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
|
|||
static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
|
||||
static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
|
||||
u64 data, u64 select_idx) {}
|
||||
static inline bool kvm_arm_support_pmu_v3(void) { return false; }
|
||||
static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -31,6 +31,12 @@
|
|||
#define __no_sanitize_thread
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
|
||||
#define __HAVE_BUILTIN_BSWAP32__
|
||||
#define __HAVE_BUILTIN_BSWAP64__
|
||||
#define __HAVE_BUILTIN_BSWAP16__
|
||||
#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
|
||||
|
||||
#if __has_feature(undefined_behavior_sanitizer)
|
||||
/* GCC does not have __SANITIZE_UNDEFINED__ */
|
||||
#define __no_sanitize_undefined \
|
||||
|
|
|
|||
|
|
@ -150,7 +150,6 @@ struct irq_domain_chip_generic;
|
|||
* setting up one or more generic chips for interrupt controllers
|
||||
* drivers using the generic chip library which uses this pointer.
|
||||
* @parent: Pointer to parent irq_domain to support hierarchy irq_domains
|
||||
* @debugfs_file: dentry for the domain debugfs file
|
||||
*
|
||||
* Revmap data, used internally by irq_domain
|
||||
* @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
|
||||
|
|
@ -174,9 +173,6 @@ struct irq_domain {
|
|||
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
|
||||
struct irq_domain *parent;
|
||||
#endif
|
||||
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
|
||||
struct dentry *debugfs_file;
|
||||
#endif
|
||||
|
||||
/* reverse map data. The linear map gets appended to the irq_domain */
|
||||
irq_hw_number_t hwirq_max;
|
||||
|
|
|
|||
|
|
@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
|
|||
/*
|
||||
* Set the allocation direction to bottom-up or top-down.
|
||||
*/
|
||||
static inline void memblock_set_bottom_up(bool enable)
|
||||
static inline __init void memblock_set_bottom_up(bool enable)
|
||||
{
|
||||
memblock.bottom_up = enable;
|
||||
}
|
||||
|
|
@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable)
|
|||
* if this is true, that said, memblock will allocate memory
|
||||
* in bottom-up direction.
|
||||
*/
|
||||
static inline bool memblock_bottom_up(void)
|
||||
static inline __init bool memblock_bottom_up(void)
|
||||
{
|
||||
return memblock.bottom_up;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
void mem_cgroup_split_huge_fixup(struct page *head);
|
||||
#endif
|
||||
void split_page_memcg(struct page *head, unsigned int nr);
|
||||
|
||||
#else /* CONFIG_MEMCG */
|
||||
|
||||
|
|
@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
static inline void split_page_memcg(struct page *head, unsigned int nr)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page)
|
|||
GUP_PIN_COUNTING_BIAS;
|
||||
}
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should most likely only be called during fork() to see whether we
|
||||
* should break the cow immediately for a page on the src mm.
|
||||
*/
|
||||
static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
|
||||
struct page *page)
|
||||
{
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
return false;
|
||||
|
||||
if (!atomic_read(&vma->vm_mm->has_pinned))
|
||||
return false;
|
||||
|
||||
return page_maybe_dma_pinned(page);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define SECTION_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#endif
|
||||
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
|
||||
|
||||
#define INIT_PASID 0
|
||||
|
||||
struct address_space;
|
||||
struct mem_cgroup;
|
||||
|
|
|
|||
|
|
@ -606,6 +606,7 @@ struct swevent_hlist {
|
|||
#define PERF_ATTACH_TASK 0x04
|
||||
#define PERF_ATTACH_TASK_DATA 0x08
|
||||
#define PERF_ATTACH_ITRACE 0x10
|
||||
#define PERF_ATTACH_SCHED_CB 0x20
|
||||
|
||||
struct perf_cgroup;
|
||||
struct perf_buffer;
|
||||
|
|
@ -872,6 +873,7 @@ struct perf_cpu_context {
|
|||
struct list_head cgrp_cpuctx_entry;
|
||||
#endif
|
||||
|
||||
struct list_head sched_cb_entry;
|
||||
int sched_cb_usage;
|
||||
|
||||
int online;
|
||||
|
|
|
|||
|
|
@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk)
|
|||
* another oom-unkillable task does this it should blame itself.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
|
||||
ret = tsk->vfork_done &&
|
||||
rcu_dereference(tsk->real_parent)->mm == tsk->mm;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -664,10 +664,7 @@ typedef struct {
|
|||
* seqcount_latch_init() - runtime initializer for seqcount_latch_t
|
||||
* @s: Pointer to the seqcount_latch_t instance
|
||||
*/
|
||||
static inline void seqcount_latch_init(seqcount_latch_t *s)
|
||||
{
|
||||
seqcount_init(&s->seqcount);
|
||||
}
|
||||
#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount)
|
||||
|
||||
/**
|
||||
* raw_read_seqcount_latch() - pick even/odd latch data copy
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
|||
const struct cpumask *cpus);
|
||||
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
||||
static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
|
@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
static __always_inline int
|
||||
stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
|
||||
{
|
||||
return stop_machine_cpuslocked(fn, data, cpus);
|
||||
}
|
||||
|
||||
static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
static __always_inline int
|
||||
stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
return stop_machine(fn, data, cpus);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
|
||||
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
|
||||
#else
|
||||
static inline void u64_stats_init(struct u64_stats_sync *syncp)
|
||||
{
|
||||
#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
|
||||
seqcount_init(&syncp->seq);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -119,8 +119,7 @@ config INIT_ENV_ARG_LIMIT
|
|||
|
||||
config COMPILE_TEST
|
||||
bool "Compile also drivers which will not load"
|
||||
depends on !UML && !S390
|
||||
default n
|
||||
depends on HAS_IOMEM
|
||||
help
|
||||
Some drivers can be compiled on a different platform than they are
|
||||
intended to be run on. Despite they cannot be loaded there (or even
|
||||
|
|
|
|||
|
|
@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
|
|||
static atomic_t perf_sched_count;
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
|
|
@ -3461,11 +3462,16 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|||
}
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
|
||||
|
||||
void perf_sched_cb_dec(struct pmu *pmu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
--cpuctx->sched_cb_usage;
|
||||
this_cpu_dec(perf_sched_cb_usages);
|
||||
|
||||
if (!--cpuctx->sched_cb_usage)
|
||||
list_del(&cpuctx->sched_cb_entry);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
|||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
cpuctx->sched_cb_usage++;
|
||||
if (!cpuctx->sched_cb_usage++)
|
||||
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
|
||||
|
||||
this_cpu_inc(perf_sched_cb_usages);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
|
|||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
|
||||
static void perf_pmu_sched_task(struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
bool sched_in)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
|
||||
if (prev == next)
|
||||
return;
|
||||
|
||||
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
|
||||
/* will be handled in perf_event_context_sched_in/out */
|
||||
if (cpuctx->task_ctx)
|
||||
continue;
|
||||
|
||||
__perf_pmu_sched_task(cpuctx, sched_in);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_switch(struct task_struct *task,
|
||||
struct task_struct *next_prev, bool sched_in);
|
||||
|
||||
|
|
@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
|
|||
{
|
||||
int ctxn;
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(task, next, false);
|
||||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, next, false);
|
||||
|
||||
|
|
@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
|
|||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, prev, true);
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(prev, task, true);
|
||||
}
|
||||
|
||||
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
||||
|
|
@ -4657,7 +4690,7 @@ static void unaccount_event(struct perf_event *event)
|
|||
if (event->parent)
|
||||
return;
|
||||
|
||||
if (event->attach_state & PERF_ATTACH_TASK)
|
||||
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
|
||||
dec = true;
|
||||
if (event->attr.mmap || event->attr.mmap_data)
|
||||
atomic_dec(&nr_mmap_events);
|
||||
|
|
@ -11176,7 +11209,7 @@ static void account_event(struct perf_event *event)
|
|||
if (event->parent)
|
||||
return;
|
||||
|
||||
if (event->attach_state & PERF_ATTACH_TASK)
|
||||
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
|
||||
inc = true;
|
||||
if (event->attr.mmap || event->attr.mmap_data)
|
||||
atomic_inc(&nr_mmap_events);
|
||||
|
|
@ -12973,6 +13006,7 @@ static void __init perf_event_init_all_cpus(void)
|
|||
#ifdef CONFIG_CGROUP_PERF
|
||||
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
|
||||
#endif
|
||||
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -997,6 +997,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_pasid(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_IOMMU_SUPPORT
|
||||
mm->pasid = INIT_PASID;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_uprobes_state(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_UPROBES
|
||||
|
|
@ -1027,6 +1034,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
|||
mm_init_cpumask(mm);
|
||||
mm_init_aio(mm);
|
||||
mm_init_owner(mm, p);
|
||||
mm_init_pasid(mm);
|
||||
RCU_INIT_POINTER(mm->exe_file, NULL);
|
||||
mmu_notifier_subscriptions_init(mm);
|
||||
init_tlb_flush_pending(mm);
|
||||
|
|
|
|||
|
|
@ -1898,16 +1898,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
|
|||
|
||||
static void debugfs_add_domain_dir(struct irq_domain *d)
|
||||
{
|
||||
if (!d->name || !domain_dir || d->debugfs_file)
|
||||
if (!d->name || !domain_dir)
|
||||
return;
|
||||
d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
|
||||
&irq_domain_debug_fops);
|
||||
debugfs_create_file(d->name, 0444, domain_dir, d,
|
||||
&irq_domain_debug_fops);
|
||||
}
|
||||
|
||||
static void debugfs_remove_domain_dir(struct irq_domain *d)
|
||||
{
|
||||
debugfs_remove(d->debugfs_file);
|
||||
d->debugfs_file = NULL;
|
||||
debugfs_remove(debugfs_lookup(d->name, domain_dir));
|
||||
}
|
||||
|
||||
void __init irq_domain_debugfs_init(struct dentry *root)
|
||||
|
|
|
|||
|
|
@ -1893,8 +1893,13 @@ struct migration_arg {
|
|||
struct set_affinity_pending *pending;
|
||||
};
|
||||
|
||||
/*
|
||||
* @refs: number of wait_for_completion()
|
||||
* @stop_pending: is @stop_work in use
|
||||
*/
|
||||
struct set_affinity_pending {
|
||||
refcount_t refs;
|
||||
unsigned int stop_pending;
|
||||
struct completion done;
|
||||
struct cpu_stop_work stop_work;
|
||||
struct migration_arg arg;
|
||||
|
|
@ -1929,8 +1934,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
|
|||
*/
|
||||
static int migration_cpu_stop(void *data)
|
||||
{
|
||||
struct set_affinity_pending *pending;
|
||||
struct migration_arg *arg = data;
|
||||
struct set_affinity_pending *pending = arg->pending;
|
||||
struct task_struct *p = arg->task;
|
||||
int dest_cpu = arg->dest_cpu;
|
||||
struct rq *rq = this_rq();
|
||||
|
|
@ -1952,7 +1957,6 @@ static int migration_cpu_stop(void *data)
|
|||
raw_spin_lock(&p->pi_lock);
|
||||
rq_lock(rq, &rf);
|
||||
|
||||
pending = p->migration_pending;
|
||||
/*
|
||||
* If task_rq(p) != rq, it cannot be migrated here, because we're
|
||||
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
|
||||
|
|
@ -1963,21 +1967,14 @@ static int migration_cpu_stop(void *data)
|
|||
goto out;
|
||||
|
||||
if (pending) {
|
||||
p->migration_pending = NULL;
|
||||
if (p->migration_pending == pending)
|
||||
p->migration_pending = NULL;
|
||||
complete = true;
|
||||
}
|
||||
|
||||
/* migrate_enable() -- we must not race against SCA */
|
||||
if (dest_cpu < 0) {
|
||||
/*
|
||||
* When this was migrate_enable() but we no longer
|
||||
* have a @pending, a concurrent SCA 'fixed' things
|
||||
* and we should be valid again. Nothing to do.
|
||||
*/
|
||||
if (!pending) {
|
||||
WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
|
||||
if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
|
||||
goto out;
|
||||
}
|
||||
|
||||
dest_cpu = cpumask_any_distribute(&p->cpus_mask);
|
||||
}
|
||||
|
|
@ -1987,7 +1984,14 @@ static int migration_cpu_stop(void *data)
|
|||
else
|
||||
p->wake_cpu = dest_cpu;
|
||||
|
||||
} else if (dest_cpu < 0 || pending) {
|
||||
/*
|
||||
* XXX __migrate_task() can fail, at which point we might end
|
||||
* up running on a dodgy CPU, AFAICT this can only happen
|
||||
* during CPU hotplug, at which point we'll get pushed out
|
||||
* anyway, so it's probably not a big deal.
|
||||
*/
|
||||
|
||||
} else if (pending) {
|
||||
/*
|
||||
* This happens when we get migrated between migrate_enable()'s
|
||||
* preempt_enable() and scheduling the stopper task. At that
|
||||
|
|
@ -2002,43 +2006,32 @@ static int migration_cpu_stop(void *data)
|
|||
* ->pi_lock, so the allowed mask is stable - if it got
|
||||
* somewhere allowed, we're done.
|
||||
*/
|
||||
if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
|
||||
p->migration_pending = NULL;
|
||||
if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
|
||||
if (p->migration_pending == pending)
|
||||
p->migration_pending = NULL;
|
||||
complete = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* When this was migrate_enable() but we no longer have an
|
||||
* @pending, a concurrent SCA 'fixed' things and we should be
|
||||
* valid again. Nothing to do.
|
||||
*/
|
||||
if (!pending) {
|
||||
WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* When migrate_enable() hits a rq mis-match we can't reliably
|
||||
* determine is_migration_disabled() and so have to chase after
|
||||
* it.
|
||||
*/
|
||||
WARN_ON_ONCE(!pending->stop_pending);
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
|
||||
&pending->arg, &pending->stop_work);
|
||||
return 0;
|
||||
}
|
||||
out:
|
||||
if (pending)
|
||||
pending->stop_pending = false;
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
|
||||
if (complete)
|
||||
complete_all(&pending->done);
|
||||
|
||||
/* For pending->{arg,stop_work} */
|
||||
pending = arg->pending;
|
||||
if (pending && refcount_dec_and_test(&pending->refs))
|
||||
wake_up_var(&pending->refs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -2225,11 +2218,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
int dest_cpu, unsigned int flags)
|
||||
{
|
||||
struct set_affinity_pending my_pending = { }, *pending = NULL;
|
||||
struct migration_arg arg = {
|
||||
.task = p,
|
||||
.dest_cpu = dest_cpu,
|
||||
};
|
||||
bool complete = false;
|
||||
bool stop_pending, complete = false;
|
||||
|
||||
/* Can the task run on the task's current CPU? If so, we're done */
|
||||
if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
|
||||
|
|
@ -2241,12 +2230,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
push_task = get_task_struct(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are pending waiters, but no pending stop_work,
|
||||
* then complete now.
|
||||
*/
|
||||
pending = p->migration_pending;
|
||||
if (pending) {
|
||||
refcount_inc(&pending->refs);
|
||||
if (pending && !pending->stop_pending) {
|
||||
p->migration_pending = NULL;
|
||||
complete = true;
|
||||
}
|
||||
|
||||
task_rq_unlock(rq, p, rf);
|
||||
|
||||
if (push_task) {
|
||||
|
|
@ -2255,7 +2248,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
}
|
||||
|
||||
if (complete)
|
||||
goto do_complete;
|
||||
complete_all(&pending->done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2266,6 +2259,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
/* Install the request */
|
||||
refcount_set(&my_pending.refs, 1);
|
||||
init_completion(&my_pending.done);
|
||||
my_pending.arg = (struct migration_arg) {
|
||||
.task = p,
|
||||
.dest_cpu = -1, /* any */
|
||||
.pending = &my_pending,
|
||||
};
|
||||
|
||||
p->migration_pending = &my_pending;
|
||||
} else {
|
||||
pending = p->migration_pending;
|
||||
|
|
@ -2290,45 +2289,41 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (flags & SCA_MIGRATE_ENABLE) {
|
||||
|
||||
refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
|
||||
p->migration_flags &= ~MDF_PUSH;
|
||||
task_rq_unlock(rq, p, rf);
|
||||
|
||||
pending->arg = (struct migration_arg) {
|
||||
.task = p,
|
||||
.dest_cpu = -1,
|
||||
.pending = pending,
|
||||
};
|
||||
|
||||
stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
|
||||
&pending->arg, &pending->stop_work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
||||
/*
|
||||
* Lessen races (and headaches) by delegating
|
||||
* is_migration_disabled(p) checks to the stopper, which will
|
||||
* run on the same CPU as said p.
|
||||
* MIGRATE_ENABLE gets here because 'p == current', but for
|
||||
* anything else we cannot do is_migration_disabled(), punt
|
||||
* and have the stopper function handle it all race-free.
|
||||
*/
|
||||
task_rq_unlock(rq, p, rf);
|
||||
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
||||
stop_pending = pending->stop_pending;
|
||||
if (!stop_pending)
|
||||
pending->stop_pending = true;
|
||||
|
||||
if (flags & SCA_MIGRATE_ENABLE)
|
||||
p->migration_flags &= ~MDF_PUSH;
|
||||
|
||||
task_rq_unlock(rq, p, rf);
|
||||
|
||||
if (!stop_pending) {
|
||||
stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
|
||||
&pending->arg, &pending->stop_work);
|
||||
}
|
||||
|
||||
if (flags & SCA_MIGRATE_ENABLE)
|
||||
return 0;
|
||||
} else {
|
||||
|
||||
if (!is_migration_disabled(p)) {
|
||||
if (task_on_rq_queued(p))
|
||||
rq = move_queued_task(rq, rf, p, dest_cpu);
|
||||
|
||||
p->migration_pending = NULL;
|
||||
complete = true;
|
||||
if (!pending->stop_pending) {
|
||||
p->migration_pending = NULL;
|
||||
complete = true;
|
||||
}
|
||||
}
|
||||
task_rq_unlock(rq, p, rf);
|
||||
|
||||
do_complete:
|
||||
if (complete)
|
||||
complete_all(&pending->done);
|
||||
}
|
||||
|
|
@ -2336,7 +2331,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
wait_for_completion(&pending->done);
|
||||
|
||||
if (refcount_dec_and_test(&pending->refs))
|
||||
wake_up_var(&pending->refs);
|
||||
wake_up_var(&pending->refs); /* No UaF, just an address */
|
||||
|
||||
/*
|
||||
* Block the original owner of &pending until all subsequent callers
|
||||
|
|
@ -2344,6 +2339,9 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
|||
*/
|
||||
wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
|
||||
|
||||
/* ARGH */
|
||||
WARN_ON_ONCE(my_pending.stop_pending);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -471,9 +471,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
|
|||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
preempt_disable();
|
||||
smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
|
||||
preempt_enable();
|
||||
on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
|
||||
|
||||
free_cpumask_var(tmpmask);
|
||||
cpus_read_unlock();
|
||||
|
|
|
|||
|
|
@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod)
|
|||
struct static_call_site *site;
|
||||
|
||||
for (site = start; site != stop; site++) {
|
||||
unsigned long addr = (unsigned long)static_call_key(site);
|
||||
unsigned long s_key = (long)site->key + (long)&site->key;
|
||||
unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
|
||||
unsigned long key;
|
||||
|
||||
/*
|
||||
|
|
@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
site->key = (key - (long)&site->key) |
|
||||
(site->key & STATIC_CALL_SITE_FLAGS);
|
||||
key |= s_key & STATIC_CALL_SITE_FLAGS;
|
||||
site->key = key - (long)&site->key;
|
||||
}
|
||||
|
||||
return __static_call_init(mod, start, stop);
|
||||
|
|
|
|||
|
|
@ -2083,7 +2083,7 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
|
|||
* up to the caller to provide sane values here, otherwise userspace
|
||||
* tools which use this vector might be unhappy.
|
||||
*/
|
||||
unsigned long user_auxv[AT_VECTOR_SIZE];
|
||||
unsigned long user_auxv[AT_VECTOR_SIZE] = {};
|
||||
|
||||
if (len > sizeof(user_auxv))
|
||||
return -EINVAL;
|
||||
|
|
|
|||
|
|
@ -546,8 +546,11 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
|
|||
}
|
||||
|
||||
/*
|
||||
* Recomputes cpu_base::*next_timer and returns the earliest expires_next but
|
||||
* does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
|
||||
* Recomputes cpu_base::*next_timer and returns the earliest expires_next
|
||||
* but does not set cpu_base::*expires_next, that is done by
|
||||
* hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
|
||||
* cpu_base::*expires_next right away, reprogramming logic would no longer
|
||||
* work.
|
||||
*
|
||||
* When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
|
||||
* those timers will get run whenever the softirq gets handled, at the end of
|
||||
|
|
@ -588,6 +591,37 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
|
|||
return expires_next;
|
||||
}
|
||||
|
||||
static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
|
||||
{
|
||||
ktime_t expires_next, soft = KTIME_MAX;
|
||||
|
||||
/*
|
||||
* If the soft interrupt has already been activated, ignore the
|
||||
* soft bases. They will be handled in the already raised soft
|
||||
* interrupt.
|
||||
*/
|
||||
if (!cpu_base->softirq_activated) {
|
||||
soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
|
||||
/*
|
||||
* Update the soft expiry time. clock_settime() might have
|
||||
* affected it.
|
||||
*/
|
||||
cpu_base->softirq_expires_next = soft;
|
||||
}
|
||||
|
||||
expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
|
||||
/*
|
||||
* If a softirq timer is expiring first, update cpu_base->next_timer
|
||||
* and program the hardware with the soft expiry time.
|
||||
*/
|
||||
if (expires_next > soft) {
|
||||
cpu_base->next_timer = cpu_base->softirq_next_timer;
|
||||
expires_next = soft;
|
||||
}
|
||||
|
||||
return expires_next;
|
||||
}
|
||||
|
||||
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
|
||||
|
|
@ -628,23 +662,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
|
|||
{
|
||||
ktime_t expires_next;
|
||||
|
||||
/*
|
||||
* Find the current next expiration time.
|
||||
*/
|
||||
expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
|
||||
|
||||
if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
|
||||
/*
|
||||
* When the softirq is activated, hrtimer has to be
|
||||
* programmed with the first hard hrtimer because soft
|
||||
* timer interrupt could occur too late.
|
||||
*/
|
||||
if (cpu_base->softirq_activated)
|
||||
expires_next = __hrtimer_get_next_event(cpu_base,
|
||||
HRTIMER_ACTIVE_HARD);
|
||||
else
|
||||
cpu_base->softirq_expires_next = expires_next;
|
||||
}
|
||||
expires_next = hrtimer_update_next_event(cpu_base);
|
||||
|
||||
if (skip_equal && expires_next == cpu_base->expires_next)
|
||||
return;
|
||||
|
|
@ -1644,8 +1662,8 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
|||
|
||||
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
|
||||
|
||||
/* Reevaluate the clock bases for the next expiry */
|
||||
expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
|
||||
/* Reevaluate the clock bases for the [soft] next expiry */
|
||||
expires_next = hrtimer_update_next_event(cpu_base);
|
||||
/*
|
||||
* Store the new expiry value so the migration code can verify
|
||||
* against it.
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
|
|||
|
||||
config KASAN_STACK
|
||||
int
|
||||
depends on KASAN_GENERIC || KASAN_SW_TAGS
|
||||
default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
|
||||
default 0
|
||||
|
||||
|
|
|
|||
17
mm/highmem.c
17
mm/highmem.c
|
|
@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
|
|||
|
||||
BUG_ON(end1 > page_size(page) || end2 > page_size(page));
|
||||
|
||||
if (start1 >= end1)
|
||||
start1 = end1 = 0;
|
||||
if (start2 >= end2)
|
||||
start2 = end2 = 0;
|
||||
|
||||
for (i = 0; i < compound_nr(page); i++) {
|
||||
void *kaddr = NULL;
|
||||
|
||||
if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
|
||||
kaddr = kmap_atomic(page + i);
|
||||
|
||||
if (start1 >= PAGE_SIZE) {
|
||||
start1 -= PAGE_SIZE;
|
||||
end1 -= PAGE_SIZE;
|
||||
} else {
|
||||
unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
|
||||
|
||||
if (end1 > start1)
|
||||
if (end1 > start1) {
|
||||
kaddr = kmap_atomic(page + i);
|
||||
memset(kaddr + start1, 0, this_end - start1);
|
||||
}
|
||||
end1 -= this_end;
|
||||
start1 = 0;
|
||||
}
|
||||
|
|
@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
|
|||
} else {
|
||||
unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
|
||||
|
||||
if (end2 > start2)
|
||||
if (end2 > start2) {
|
||||
if (!kaddr)
|
||||
kaddr = kmap_atomic(page + i);
|
||||
memset(kaddr + start2, 0, this_end - start2);
|
||||
}
|
||||
end2 -= this_end;
|
||||
start2 = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
|||
* best effort that the pinned pages won't be replaced by another
|
||||
* random page during the coming copy-on-write.
|
||||
*/
|
||||
if (unlikely(is_cow_mapping(vma->vm_flags) &&
|
||||
atomic_read(&src_mm->has_pinned) &&
|
||||
page_maybe_dma_pinned(src_page))) {
|
||||
if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
|
||||
pte_free(dst_mm, pgtable);
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
|
|
@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
|||
}
|
||||
|
||||
/* Please refer to comments in copy_huge_pmd() */
|
||||
if (unlikely(is_cow_mapping(vma->vm_flags) &&
|
||||
atomic_read(&src_mm->has_pinned) &&
|
||||
page_maybe_dma_pinned(pud_page(pud)))) {
|
||||
if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) {
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
__split_huge_pud(vma, src_pud, addr);
|
||||
|
|
@ -2471,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
|||
int i;
|
||||
|
||||
/* complete memcg works before add pages to LRU */
|
||||
mem_cgroup_split_huge_fixup(head);
|
||||
split_page_memcg(head, nr);
|
||||
|
||||
if (PageAnon(head) && PageSwapCache(head)) {
|
||||
swp_entry_t entry = { .val = page_private(head) };
|
||||
|
|
|
|||
123
mm/hugetlb.c
123
mm/hugetlb.c
|
|
@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
|
|||
}
|
||||
}
|
||||
|
||||
static inline long
|
||||
hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
|
||||
long to, struct hstate *h, struct hugetlb_cgroup *cg,
|
||||
long *regions_needed)
|
||||
{
|
||||
struct file_region *nrg;
|
||||
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(map, from, to);
|
||||
record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(map, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
|
||||
return to - from;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with resv->lock held.
|
||||
*
|
||||
|
|
@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
|||
long add = 0;
|
||||
struct list_head *head = &resv->regions;
|
||||
long last_accounted_offset = f;
|
||||
struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
|
||||
struct file_region *rg = NULL, *trg = NULL;
|
||||
|
||||
if (regions_needed)
|
||||
*regions_needed = 0;
|
||||
|
|
@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
|||
/* When we find a region that starts beyond our range, we've
|
||||
* finished.
|
||||
*/
|
||||
if (rg->from > t)
|
||||
if (rg->from >= t)
|
||||
break;
|
||||
|
||||
/* Add an entry for last_accounted_offset -> rg->from, and
|
||||
* update last_accounted_offset.
|
||||
*/
|
||||
if (rg->from > last_accounted_offset) {
|
||||
add += rg->from - last_accounted_offset;
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(
|
||||
resv, last_accounted_offset, rg->from);
|
||||
record_hugetlb_cgroup_uncharge_info(h_cg, h,
|
||||
resv, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(resv, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
}
|
||||
if (rg->from > last_accounted_offset)
|
||||
add += hugetlb_resv_map_add(resv, rg,
|
||||
last_accounted_offset,
|
||||
rg->from, h, h_cg,
|
||||
regions_needed);
|
||||
|
||||
last_accounted_offset = rg->to;
|
||||
}
|
||||
|
|
@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
|||
/* Handle the case where our range extends beyond
|
||||
* last_accounted_offset.
|
||||
*/
|
||||
if (last_accounted_offset < t) {
|
||||
add += t - last_accounted_offset;
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(
|
||||
resv, last_accounted_offset, t);
|
||||
record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(resv, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
}
|
||||
if (last_accounted_offset < t)
|
||||
add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
|
||||
t, h, h_cg, regions_needed);
|
||||
|
||||
VM_BUG_ON(add < 0);
|
||||
return add;
|
||||
|
|
@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
|
||||
struct page *new_page)
|
||||
{
|
||||
__SetPageUptodate(new_page);
|
||||
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
|
||||
hugepage_add_new_anon_rmap(new_page, vma, addr);
|
||||
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
|
||||
ClearHPageRestoreReserve(new_page);
|
||||
SetHPageMigratable(new_page);
|
||||
}
|
||||
|
||||
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
pte_t *src_pte, *dst_pte, entry, dst_entry;
|
||||
struct page *ptepage;
|
||||
unsigned long addr;
|
||||
int cow;
|
||||
bool cow = is_cow_mapping(vma->vm_flags);
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
unsigned long sz = huge_page_size(h);
|
||||
unsigned long npages = pages_per_huge_page(h);
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
struct mmu_notifier_range range;
|
||||
int ret = 0;
|
||||
|
||||
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
|
||||
if (cow) {
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
|
||||
vma->vm_start,
|
||||
|
|
@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
entry = huge_ptep_get(src_pte);
|
||||
dst_entry = huge_ptep_get(dst_pte);
|
||||
again:
|
||||
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
|
||||
/*
|
||||
* Skip if src entry none. Also, skip in the
|
||||
|
|
@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||
}
|
||||
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
|
||||
} else {
|
||||
entry = huge_ptep_get(src_pte);
|
||||
ptepage = pte_page(entry);
|
||||
get_page(ptepage);
|
||||
|
||||
/*
|
||||
* This is a rare case where we see pinned hugetlb
|
||||
* pages while they're prone to COW. We need to do the
|
||||
* COW earlier during fork.
|
||||
*
|
||||
* When pre-allocating the page or copying data, we
|
||||
* need to be without the pgtable locks since we could
|
||||
* sleep during the process.
|
||||
*/
|
||||
if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
|
||||
pte_t src_pte_old = entry;
|
||||
struct page *new;
|
||||
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
/* Do not use reserve as it's private owned */
|
||||
new = alloc_huge_page(vma, addr, 1);
|
||||
if (IS_ERR(new)) {
|
||||
put_page(ptepage);
|
||||
ret = PTR_ERR(new);
|
||||
break;
|
||||
}
|
||||
copy_user_huge_page(new, ptepage, addr, vma,
|
||||
npages);
|
||||
put_page(ptepage);
|
||||
|
||||
/* Install the new huge page if src pte stable */
|
||||
dst_ptl = huge_pte_lock(h, dst, dst_pte);
|
||||
src_ptl = huge_pte_lockptr(h, src, src_pte);
|
||||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
entry = huge_ptep_get(src_pte);
|
||||
if (!pte_same(src_pte_old, entry)) {
|
||||
put_page(new);
|
||||
/* dst_entry won't change as in child */
|
||||
goto again;
|
||||
}
|
||||
hugetlb_install_page(vma, dst_pte, addr, new);
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cow) {
|
||||
/*
|
||||
* No need to notify as we are downgrading page
|
||||
|
|
@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
|||
*/
|
||||
huge_ptep_set_wrprotect(src, addr, src_pte);
|
||||
}
|
||||
entry = huge_ptep_get(src_pte);
|
||||
ptepage = pte_page(entry);
|
||||
get_page(ptepage);
|
||||
|
||||
page_dup_rmap(ptepage, true);
|
||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||
hugetlb_count_add(pages_per_huge_page(h), dst);
|
||||
hugetlb_count_add(npages, dst);
|
||||
}
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
|
|
|
|||
|
|
@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page)
|
|||
*/
|
||||
#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* These three helpers classifies VMAs for virtual memory accounting.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -20,6 +20,11 @@
|
|||
|
||||
#include "kfence.h"
|
||||
|
||||
/* May be overridden by <asm/kfence.h>. */
|
||||
#ifndef ARCH_FUNC_PREFIX
|
||||
#define ARCH_FUNC_PREFIX ""
|
||||
#endif
|
||||
|
||||
extern bool no_hash_pointers;
|
||||
|
||||
/* Helper function to either print to a seq_file or to console. */
|
||||
|
|
@ -67,8 +72,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
|
|||
for (skipnr = 0; skipnr < num_entries; skipnr++) {
|
||||
int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
|
||||
|
||||
if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
|
||||
!strncmp(buf, "__slab_free", len)) {
|
||||
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
|
||||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
|
||||
/*
|
||||
* In case of tail calls from any of the below
|
||||
* to any of the above.
|
||||
|
|
@ -77,10 +83,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
|
|||
}
|
||||
|
||||
/* Also the *_bulk() variants by only checking prefixes. */
|
||||
if (str_has_prefix(buf, "kfree") ||
|
||||
str_has_prefix(buf, "kmem_cache_free") ||
|
||||
str_has_prefix(buf, "__kmalloc") ||
|
||||
str_has_prefix(buf, "kmem_cache_alloc"))
|
||||
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
|
||||
goto found;
|
||||
}
|
||||
if (fallback < num_entries)
|
||||
|
|
@ -116,12 +122,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
|
|||
lockdep_assert_held(&meta->lock);
|
||||
|
||||
if (meta->state == KFENCE_OBJECT_UNUSED) {
|
||||
seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata);
|
||||
seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
|
||||
return;
|
||||
}
|
||||
|
||||
seq_con_printf(seq,
|
||||
"kfence-#%zd [0x%p-0x%p"
|
||||
"kfence-#%td [0x%p-0x%p"
|
||||
", size=%d, cache=%s] allocated by task %d:\n",
|
||||
meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
|
||||
(cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
|
||||
|
|
@ -204,7 +210,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
|||
|
||||
pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
|
||||
(void *)stack_entries[skipnr]);
|
||||
pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n",
|
||||
pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
|
||||
get_access_type(is_write), (void *)address,
|
||||
left_of_object ? meta->addr - address : address - meta->addr,
|
||||
left_of_object ? "left" : "right", object_index);
|
||||
|
|
@ -213,14 +219,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
|||
case KFENCE_ERROR_UAF:
|
||||
pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
|
||||
(void *)stack_entries[skipnr]);
|
||||
pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n",
|
||||
pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
|
||||
get_access_type(is_write), (void *)address, object_index);
|
||||
break;
|
||||
case KFENCE_ERROR_CORRUPTION:
|
||||
pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
|
||||
pr_err("Corrupted memory at 0x%p ", (void *)address);
|
||||
print_diff_canary(address, 16, meta);
|
||||
pr_cont(" (in kfence-#%zd):\n", object_index);
|
||||
pr_cont(" (in kfence-#%td):\n", object_index);
|
||||
break;
|
||||
case KFENCE_ERROR_INVALID:
|
||||
pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
|
||||
|
|
@ -230,7 +236,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
|||
break;
|
||||
case KFENCE_ERROR_INVALID_FREE:
|
||||
pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
|
||||
pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address,
|
||||
pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
|
||||
object_index);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
13
mm/madvise.c
13
mm/madvise.c
|
|
@ -1198,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||
goto release_task;
|
||||
}
|
||||
|
||||
mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
|
||||
/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
|
||||
mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
|
||||
if (IS_ERR_OR_NULL(mm)) {
|
||||
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
|
||||
goto release_task;
|
||||
}
|
||||
|
||||
/*
|
||||
* Require CAP_SYS_NICE for influencing process performance. Note that
|
||||
* only non-destructive hints are currently supported.
|
||||
*/
|
||||
if (!capable(CAP_SYS_NICE)) {
|
||||
ret = -EPERM;
|
||||
goto release_mm;
|
||||
}
|
||||
|
||||
total_len = iov_iter_count(&iter);
|
||||
|
||||
while (iov_iter_count(&iter)) {
|
||||
|
|
@ -1218,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
|||
if (ret == 0)
|
||||
ret = total_len - iov_iter_count(&iter);
|
||||
|
||||
release_mm:
|
||||
mmput(mm);
|
||||
release_task:
|
||||
put_task_struct(task);
|
||||
|
|
|
|||
|
|
@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
|
|||
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/*
|
||||
* Because page_memcg(head) is not set on compound tails, set it now.
|
||||
* Because page_memcg(head) is not set on tails, set it now.
|
||||
*/
|
||||
void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
void split_page_memcg(struct page *head, unsigned int nr)
|
||||
{
|
||||
struct mem_cgroup *memcg = page_memcg(head);
|
||||
int i;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
if (mem_cgroup_disabled() || !memcg)
|
||||
return;
|
||||
|
||||
for (i = 1; i < HPAGE_PMD_NR; i++) {
|
||||
css_get(&memcg->css);
|
||||
head[i].memcg_data = (unsigned long)memcg;
|
||||
}
|
||||
for (i = 1; i < nr; i++)
|
||||
head[i].memcg_data = head->memcg_data;
|
||||
css_get_many(&memcg->css, nr - 1);
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
/**
|
||||
|
|
|
|||
16
mm/memory.c
16
mm/memory.c
|
|
@ -823,12 +823,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
|||
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
|
||||
struct page **prealloc, pte_t pte, struct page *page)
|
||||
{
|
||||
struct mm_struct *src_mm = src_vma->vm_mm;
|
||||
struct page *new_page;
|
||||
|
||||
if (!is_cow_mapping(src_vma->vm_flags))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* What we want to do is to check whether this page may
|
||||
* have been pinned by the parent process. If so,
|
||||
|
|
@ -842,9 +838,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
|||
* the page count. That might give false positives for
|
||||
* for pinning, but it will work correctly.
|
||||
*/
|
||||
if (likely(!atomic_read(&src_mm->has_pinned)))
|
||||
return 1;
|
||||
if (likely(!page_maybe_dma_pinned(page)))
|
||||
if (likely(!page_needs_cow_for_dma(src_vma, page)))
|
||||
return 1;
|
||||
|
||||
new_page = *prealloc;
|
||||
|
|
@ -3117,6 +3111,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
|
|||
return handle_userfault(vmf, VM_UFFD_WP);
|
||||
}
|
||||
|
||||
/*
|
||||
* Userfaultfd write-protect can defer flushes. Ensure the TLB
|
||||
* is flushed in this case before copying.
|
||||
*/
|
||||
if (unlikely(userfaultfd_wp(vmf->vma) &&
|
||||
mm_tlb_flush_pending(vmf->vma->vm_mm)))
|
||||
flush_tlb_page(vmf->vma, vmf->address);
|
||||
|
||||
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
|
||||
if (!vmf->page) {
|
||||
/*
|
||||
|
|
|
|||
167
mm/page_alloc.c
167
mm/page_alloc.c
|
|
@ -1293,6 +1293,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
|||
|
||||
kernel_poison_pages(page, 1 << order);
|
||||
|
||||
/*
|
||||
* With hardware tag-based KASAN, memory tags must be set before the
|
||||
* page becomes unavailable via debug_pagealloc or arch_free_page.
|
||||
*/
|
||||
kasan_free_nondeferred_pages(page, order);
|
||||
|
||||
/*
|
||||
* arch_free_page() can make the page's contents inaccessible. s390
|
||||
* does this. So nothing which can access the page's contents should
|
||||
|
|
@ -1302,8 +1308,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
|||
|
||||
debug_pagealloc_unmap_pages(page, 1 << order);
|
||||
|
||||
kasan_free_nondeferred_pages(page, order);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3322,6 +3326,7 @@ void split_page(struct page *page, unsigned int order)
|
|||
for (i = 1; i < (1 << order); i++)
|
||||
set_page_refcounted(page + i);
|
||||
split_page_owner(page, 1 << order);
|
||||
split_page_memcg(page, 1 << order);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(split_page);
|
||||
|
||||
|
|
@ -6271,12 +6276,65 @@ static void __meminit zone_init_free_lists(struct zone *zone)
|
|||
}
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
|
||||
/*
|
||||
* Only struct pages that correspond to ranges defined by memblock.memory
|
||||
* are zeroed and initialized by going through __init_single_page() during
|
||||
* memmap_init_zone().
|
||||
*
|
||||
* But, there could be struct pages that correspond to holes in
|
||||
* memblock.memory. This can happen because of the following reasons:
|
||||
* - physical memory bank size is not necessarily the exact multiple of the
|
||||
* arbitrary section size
|
||||
* - early reserved memory may not be listed in memblock.memory
|
||||
* - memory layouts defined with memmap= kernel parameter may not align
|
||||
* nicely with memmap sections
|
||||
*
|
||||
* Explicitly initialize those struct pages so that:
|
||||
* - PG_Reserved is set
|
||||
* - zone and node links point to zone and node that span the page if the
|
||||
* hole is in the middle of a zone
|
||||
* - zone and node links point to adjacent zone/node if the hole falls on
|
||||
* the zone boundary; the pages in such holes will be prepended to the
|
||||
* zone/node above the hole except for the trailing pages in the last
|
||||
* section that will be appended to the zone/node below.
|
||||
*/
|
||||
static u64 __meminit init_unavailable_range(unsigned long spfn,
|
||||
unsigned long epfn,
|
||||
int zone, int node)
|
||||
{
|
||||
unsigned long pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for (pfn = spfn; pfn < epfn; pfn++) {
|
||||
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
|
||||
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
|
||||
+ pageblock_nr_pages - 1;
|
||||
continue;
|
||||
}
|
||||
__init_single_page(pfn_to_page(pfn), pfn, zone, node);
|
||||
__SetPageReserved(pfn_to_page(pfn));
|
||||
pgcnt++;
|
||||
}
|
||||
|
||||
return pgcnt;
|
||||
}
|
||||
#else
|
||||
static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
|
||||
int zone, int node)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __meminit __weak memmap_init_zone(struct zone *zone)
|
||||
{
|
||||
unsigned long zone_start_pfn = zone->zone_start_pfn;
|
||||
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
|
||||
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
|
||||
static unsigned long hole_pfn;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
|
||||
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
|
||||
|
|
@ -6286,7 +6344,29 @@ void __meminit __weak memmap_init_zone(struct zone *zone)
|
|||
memmap_init_range(end_pfn - start_pfn, nid,
|
||||
zone_id, start_pfn, zone_end_pfn,
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
|
||||
if (hole_pfn < start_pfn)
|
||||
pgcnt += init_unavailable_range(hole_pfn, start_pfn,
|
||||
zone_id, nid);
|
||||
hole_pfn = end_pfn;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM
|
||||
/*
|
||||
* Initialize the hole in the range [zone_end_pfn, section_end].
|
||||
* If zone boundary falls in the middle of a section, this hole
|
||||
* will be re-initialized during the call to this function for the
|
||||
* higher zone.
|
||||
*/
|
||||
end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
|
||||
if (hole_pfn < end_pfn)
|
||||
pgcnt += init_unavailable_range(hole_pfn, end_pfn,
|
||||
zone_id, nid);
|
||||
#endif
|
||||
|
||||
if (pgcnt)
|
||||
pr_info(" %s zone: %llu pages in unavailable ranges\n",
|
||||
zone->name, pgcnt);
|
||||
}
|
||||
|
||||
static int zone_batchsize(struct zone *zone)
|
||||
|
|
@ -7083,88 +7163,6 @@ void __init free_area_init_memoryless_node(int nid)
|
|||
free_area_init_node(nid);
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
|
||||
/*
|
||||
* Initialize all valid struct pages in the range [spfn, epfn) and mark them
|
||||
* PageReserved(). Return the number of struct pages that were initialized.
|
||||
*/
|
||||
static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
|
||||
{
|
||||
unsigned long pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for (pfn = spfn; pfn < epfn; pfn++) {
|
||||
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
|
||||
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
|
||||
+ pageblock_nr_pages - 1;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Use a fake node/zone (0) for now. Some of these pages
|
||||
* (in memblock.reserved but not in memblock.memory) will
|
||||
* get re-initialized via reserve_bootmem_region() later.
|
||||
*/
|
||||
__init_single_page(pfn_to_page(pfn), pfn, 0, 0);
|
||||
__SetPageReserved(pfn_to_page(pfn));
|
||||
pgcnt++;
|
||||
}
|
||||
|
||||
return pgcnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only struct pages that are backed by physical memory are zeroed and
|
||||
* initialized by going through __init_single_page(). But, there are some
|
||||
* struct pages which are reserved in memblock allocator and their fields
|
||||
* may be accessed (for example page_to_pfn() on some configuration accesses
|
||||
* flags). We must explicitly initialize those struct pages.
|
||||
*
|
||||
* This function also addresses a similar issue where struct pages are left
|
||||
* uninitialized because the physical address range is not covered by
|
||||
* memblock.memory or memblock.reserved. That could happen when memblock
|
||||
* layout is manually configured via memmap=, or when the highest physical
|
||||
* address (max_pfn) does not end on a section boundary.
|
||||
*/
|
||||
static void __init init_unavailable_mem(void)
|
||||
{
|
||||
phys_addr_t start, end;
|
||||
u64 i, pgcnt;
|
||||
phys_addr_t next = 0;
|
||||
|
||||
/*
|
||||
* Loop through unavailable ranges not covered by memblock.memory.
|
||||
*/
|
||||
pgcnt = 0;
|
||||
for_each_mem_range(i, &start, &end) {
|
||||
if (next < start)
|
||||
pgcnt += init_unavailable_range(PFN_DOWN(next),
|
||||
PFN_UP(start));
|
||||
next = end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Early sections always have a fully populated memmap for the whole
|
||||
* section - see pfn_valid(). If the last section has holes at the
|
||||
* end and that section is marked "online", the memmap will be
|
||||
* considered initialized. Make sure that memmap has a well defined
|
||||
* state.
|
||||
*/
|
||||
pgcnt += init_unavailable_range(PFN_DOWN(next),
|
||||
round_up(max_pfn, PAGES_PER_SECTION));
|
||||
|
||||
/*
|
||||
* Struct pages that do not have backing memory. This could be because
|
||||
* firmware is using some of this memory, or for some other reasons.
|
||||
*/
|
||||
if (pgcnt)
|
||||
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
|
||||
}
|
||||
#else
|
||||
static inline void __init init_unavailable_mem(void)
|
||||
{
|
||||
}
|
||||
#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
|
||||
|
||||
#if MAX_NUMNODES > 1
|
||||
/*
|
||||
* Figure out the number of possible node ids.
|
||||
|
|
@ -7588,7 +7586,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
|||
/* Initialise every node */
|
||||
mminit_verify_pageflags_layout();
|
||||
setup_nr_node_ids();
|
||||
init_unavailable_mem();
|
||||
for_each_online_node(nid) {
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
free_area_init_node(nid);
|
||||
|
|
|
|||
|
|
@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
|||
gfp_t flags, void *objp, unsigned long caller)
|
||||
{
|
||||
WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
|
||||
if (!objp)
|
||||
if (!objp || is_kfence_address(objp))
|
||||
return objp;
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
check_poison_obj(cachep, objp);
|
||||
|
|
|
|||
|
|
@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
|
|||
if (update_cfi_state(insn, &state->cfi, op))
|
||||
return 1;
|
||||
|
||||
if (!insn->alt_group)
|
||||
continue;
|
||||
|
||||
if (op->dest.type == OP_DEST_PUSHF) {
|
||||
if (!state->uaccess_stack) {
|
||||
state->uaccess_stack = 1;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user