mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 23:22:31 +02:00
After a kexec the logical processors and virtual processors already exist in the hypervisor because they were created by the previous kernel. Attempting to add them again causes either a BUG_ON or corrupted VP state leading to MCEs in the new kernel. Add hv_lp_exists() to probe whether an LP is already present by calling HVCALL_GET_LOGICAL_PROCESSOR_RUN_TIME. When it succeeds the LP exists and we skip the add-LP and create-VP loops entirely. Also add hv_call_notify_all_processors_started() which informs the hypervisor that all processors are online. This is required after adding LPs (fresh boot) and is a no-op on kexec since we skip that path. Co-developed-by: Anirudh Rayabharam <anrayabh@linux.microsoft.com> Signed-off-by: Anirudh Rayabharam <anrayabh@linux.microsoft.com> Co-developed-by: Stanislav Kinsburskii <stanislav.kinsburskii@gmail.com> Signed-off-by: Stanislav Kinsburskii <stanislav.kinsburskii@gmail.com> Co-developed-by: Mukesh Rathor <mrathor@linux.microsoft.com> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com> Signed-off-by: Jork Loeser <jloeser@linux.microsoft.com> Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org>
289 lines
7.0 KiB
C
289 lines
7.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/types.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/clockchips.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/cpuhotplug.h>
|
|
#include <linux/minmax.h>
|
|
#include <linux/export.h>
|
|
#include <asm/mshyperv.h>
|
|
|
|
/*
|
|
* See struct hv_deposit_memory. The first u64 is partition ID, the rest
|
|
* are GPAs.
|
|
*/
|
|
#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
|
|
|
|
/* Deposits exact number of pages. Must be called with interrupts enabled. */
|
|
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
|
|
{
|
|
struct page **pages, *page;
|
|
int *counts;
|
|
int num_allocations;
|
|
int i, j, page_count;
|
|
int order;
|
|
u64 status;
|
|
int ret;
|
|
u64 base_pfn;
|
|
struct hv_deposit_memory *input_page;
|
|
unsigned long flags;
|
|
|
|
if (num_pages > HV_DEPOSIT_MAX)
|
|
return -E2BIG;
|
|
if (!num_pages)
|
|
return 0;
|
|
|
|
/* One buffer for page pointers and counts */
|
|
page = alloc_page(GFP_KERNEL);
|
|
if (!page)
|
|
return -ENOMEM;
|
|
pages = page_address(page);
|
|
|
|
counts = kzalloc_objs(int, HV_DEPOSIT_MAX);
|
|
if (!counts) {
|
|
free_page((unsigned long)pages);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* Allocate all the pages before disabling interrupts */
|
|
i = 0;
|
|
|
|
while (num_pages) {
|
|
/* Find highest order we can actually allocate */
|
|
order = 31 - __builtin_clz(num_pages);
|
|
|
|
while (1) {
|
|
pages[i] = alloc_pages_node(node, GFP_KERNEL, order);
|
|
if (pages[i])
|
|
break;
|
|
if (!order) {
|
|
ret = -ENOMEM;
|
|
num_allocations = i;
|
|
goto err_free_allocations;
|
|
}
|
|
--order;
|
|
}
|
|
|
|
split_page(pages[i], order);
|
|
counts[i] = 1 << order;
|
|
num_pages -= counts[i];
|
|
i++;
|
|
}
|
|
num_allocations = i;
|
|
|
|
local_irq_save(flags);
|
|
|
|
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
|
|
input_page->partition_id = partition_id;
|
|
|
|
/* Populate gpa_page_list - these will fit on the input page */
|
|
for (i = 0, page_count = 0; i < num_allocations; ++i) {
|
|
base_pfn = page_to_pfn(pages[i]);
|
|
for (j = 0; j < counts[i]; ++j, ++page_count)
|
|
input_page->gpa_page_list[page_count] = base_pfn + j;
|
|
}
|
|
status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY,
|
|
page_count, 0, input_page, NULL);
|
|
local_irq_restore(flags);
|
|
if (!hv_result_success(status)) {
|
|
hv_status_err(status, "\n");
|
|
ret = hv_result_to_errno(status);
|
|
goto err_free_allocations;
|
|
}
|
|
|
|
ret = 0;
|
|
goto free_buf;
|
|
|
|
err_free_allocations:
|
|
for (i = 0; i < num_allocations; ++i) {
|
|
base_pfn = page_to_pfn(pages[i]);
|
|
for (j = 0; j < counts[i]; ++j)
|
|
__free_page(pfn_to_page(base_pfn + j));
|
|
}
|
|
|
|
free_buf:
|
|
free_page((unsigned long)pages);
|
|
kfree(counts);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
|
|
|
|
int hv_deposit_memory_node(int node, u64 partition_id,
|
|
u64 hv_status)
|
|
{
|
|
u32 num_pages = 1;
|
|
|
|
switch (hv_result(hv_status)) {
|
|
case HV_STATUS_INSUFFICIENT_MEMORY:
|
|
break;
|
|
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
|
|
num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
|
|
break;
|
|
|
|
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
|
|
num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
|
|
fallthrough;
|
|
case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
|
|
if (!hv_root_partition()) {
|
|
hv_status_err(hv_status, "Unexpected root memory deposit\n");
|
|
return -ENOMEM;
|
|
}
|
|
partition_id = HV_PARTITION_ID_SELF;
|
|
break;
|
|
|
|
default:
|
|
hv_status_err(hv_status, "Unexpected!\n");
|
|
return -ENOMEM;
|
|
}
|
|
return hv_call_deposit_pages(node, partition_id, num_pages);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_deposit_memory_node);
|
|
|
|
bool hv_result_needs_memory(u64 status)
|
|
{
|
|
switch (hv_result(status)) {
|
|
case HV_STATUS_INSUFFICIENT_MEMORY:
|
|
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
|
|
case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
|
|
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_result_needs_memory);
|
|
|
|
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
|
|
{
|
|
struct hv_input_add_logical_processor *input;
|
|
struct hv_output_add_logical_processor *output;
|
|
u64 status;
|
|
unsigned long flags;
|
|
int ret = 0;
|
|
|
|
/*
|
|
* When adding a logical processor, the hypervisor may return
|
|
* HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more
|
|
* pages and retry.
|
|
*/
|
|
do {
|
|
local_irq_save(flags);
|
|
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
/* We don't do anything with the output right now */
|
|
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
|
|
|
input->lp_index = lp_index;
|
|
input->apic_id = apic_id;
|
|
input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
|
|
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
|
|
input, output);
|
|
local_irq_restore(flags);
|
|
|
|
if (!hv_result_needs_memory(status)) {
|
|
if (!hv_result_success(status)) {
|
|
hv_status_err(status, "cpu %u apic ID: %u\n",
|
|
lp_index, apic_id);
|
|
ret = hv_result_to_errno(status);
|
|
}
|
|
break;
|
|
}
|
|
ret = hv_deposit_memory_node(node, hv_current_partition_id,
|
|
status);
|
|
} while (!ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
|
|
{
|
|
struct hv_create_vp *input;
|
|
u64 status;
|
|
unsigned long irq_flags;
|
|
int ret = 0;
|
|
|
|
/* Root VPs don't seem to need pages deposited */
|
|
if (partition_id != hv_current_partition_id) {
|
|
/* The value 90 is empirically determined. It may change. */
|
|
ret = hv_call_deposit_pages(node, partition_id, 90);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
do {
|
|
local_irq_save(irq_flags);
|
|
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
|
|
input->partition_id = partition_id;
|
|
input->vp_index = vp_index;
|
|
input->flags = flags;
|
|
input->subnode_type = HV_SUBNODE_ANY;
|
|
input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
|
|
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
|
|
local_irq_restore(irq_flags);
|
|
|
|
if (!hv_result_needs_memory(status)) {
|
|
if (!hv_result_success(status)) {
|
|
hv_status_err(status, "vcpu: %u, lp: %u\n",
|
|
vp_index, flags);
|
|
ret = hv_result_to_errno(status);
|
|
}
|
|
break;
|
|
}
|
|
ret = hv_deposit_memory_node(node, partition_id, status);
|
|
|
|
} while (!ret);
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_call_create_vp);
|
|
|
|
int hv_call_notify_all_processors_started(void)
|
|
{
|
|
struct hv_input_notify_partition_event *input;
|
|
u64 status;
|
|
unsigned long irq_flags;
|
|
int ret = 0;
|
|
|
|
local_irq_save(irq_flags);
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
memset(input, 0, sizeof(*input));
|
|
input->event = HV_PARTITION_ALL_LOGICAL_PROCESSORS_STARTED;
|
|
status = hv_do_hypercall(HVCALL_NOTIFY_PARTITION_EVENT,
|
|
input, NULL);
|
|
local_irq_restore(irq_flags);
|
|
|
|
if (!hv_result_success(status)) {
|
|
hv_status_err(status, "\n");
|
|
ret = hv_result_to_errno(status);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool hv_lp_exists(u32 lp_index)
|
|
{
|
|
struct hv_input_get_logical_processor_run_time *input;
|
|
struct hv_output_get_logical_processor_run_time *output;
|
|
unsigned long flags;
|
|
u64 status;
|
|
|
|
local_irq_save(flags);
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
|
|
|
input->lp_index = lp_index;
|
|
status = hv_do_hypercall(HVCALL_GET_LOGICAL_PROCESSOR_RUN_TIME,
|
|
input, output);
|
|
local_irq_restore(flags);
|
|
|
|
if (!hv_result_success(status) &&
|
|
hv_result(status) != HV_STATUS_INVALID_LP_INDEX) {
|
|
hv_status_err(status, "\n");
|
|
BUG();
|
|
}
|
|
|
|
return hv_result_success(status);
|
|
}
|