diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c8e292e9a24d..70850e591350 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1297,12 +1297,25 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, int rc; struct read_cache *mc = &ctxt->mem_read; + /* + * If the read gets a cache hit, simply copy the value from the cache. + * A "hit" here means that there is unused data in the cache, i.e. when + * re-emulating an instruction to complete a userspace exit, KVM relies + * on "no decode" to ensure the instruction is re-emulated in the same + * sequence, so that multiple reads are fulfilled in the correct order. + */ if (mc->pos < mc->end) goto read_cached; if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt)) return X86EMUL_UNHANDLEABLE; + /* + * Route all reads to the cache. This allows @dest to be an on-stack + * variable without triggering use-after-free if KVM needs to exit to + * userspace to handle an MMIO read (the MMIO fragment will point at + * the current location in the cache). + */ rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9bb9d7f078fc..b0bb9cd9df8a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8108,8 +8108,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, } struct read_write_emulator_ops { - int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val, - int bytes); int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes); int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -8119,18 +8117,6 @@ struct read_write_emulator_ops { bool write; }; -static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) -{ - if (vcpu->mmio_read_completed) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, val); - vcpu->mmio_read_completed = 0; - return 1; - } - - return 0; -} - static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { @@ -8166,7 +8152,6 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, } static const struct read_write_emulator_ops read_emultor = { - .read_write_prepare = read_prepare, .read_write_emulate = read_emulate, .read_write_mmio = vcpu_mmio_read, .read_write_exit_mmio = read_exit_mmio, @@ -8249,9 +8234,23 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt, if (WARN_ON_ONCE((bytes > 8u || !ops->write) && object_is_on_stack(val))) return X86EMUL_UNHANDLEABLE; - if (ops->read_write_prepare && - ops->read_write_prepare(vcpu, val, bytes)) + /* + * If the read was already completed via a userspace MMIO exit, there's + * nothing left to do except trace the MMIO read. When completing MMIO + * reads, KVM re-emulates the instruction to propagate the value into + * the correct destination, e.g. into the correct register, but the + * value itself has already been copied to the read cache. + * + * Note! This is *tightly* coupled to read_emulated() satisfying reads + * from the emulator's mem_read cache, so that the MMIO fragment data + * is copied to the correct chunk of the correct operand. + */ + if (!ops->write && vcpu->mmio_read_completed) { + trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, + vcpu->mmio_fragments[0].gpa, val); + vcpu->mmio_read_completed = 0; return X86EMUL_CONTINUE; + } vcpu->mmio_nr_fragments = 0;