scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU

When selecting an outgoing channel for I/O, storvsc tries to select a
channel with a returning CPU that is not the same as issuing CPU. This
worked well in the past, however it doesn't work well when the Hyper-V
exposes a large number of channels (up to the number of all CPUs). Use a
different CPU for returning channel is not efficient on Hyper-V.

Change this behavior by preferring to the channel with the same CPU as
the current I/O issuing CPU whenever possible.

Tests have shown improvements in newer Hyper-V/Azure environment, and no
regression with older Hyper-V/Azure environments.

Tested-by: Raheel Abdul Faizy <rabdulfaizy@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
Message-Id: <1759381530-7414-1-git-send-email-longli@linux.microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Long Li 2025-10-01 22:05:30 -07:00 committed by Martin K. Petersen
parent 558ae45798
commit b69ffeaa0a

View File

@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
}
/*
* Our channel array is sparsley populated and we
* Our channel array could be sparsley populated and we
* initiated I/O on a processor/hw-q that does not
* currently have a designated channel. Fix this.
* The strategy is simple:
* I. Ensure NUMA locality
* II. Distribute evenly (best effort)
* I. Prefer the channel associated with the current CPU
* II. Ensure NUMA locality
* III. Distribute evenly (best effort)
*/
/* Prefer the channel on the I/O issuing processor/hw-q */
if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
return stor_device->stor_chns[q_num];
node_mask = cpumask_of_node(cpu_to_node(q_num));
num_channels = 0;
@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
/* See storvsc_change_target_cpu(). */
outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
if (outgoing_channel != NULL) {
if (outgoing_channel->target_cpu == q_num) {
/*
* Ideally, we want to pick a different channel if
* available on the same NUMA node.
*/
node_mask = cpumask_of_node(cpu_to_node(q_num));
for_each_cpu_wrap(tgt_cpu,
&stor_device->alloced_cpus, q_num + 1) {
if (!cpumask_test_cpu(tgt_cpu, node_mask))
continue;
if (tgt_cpu == q_num)
continue;
channel = READ_ONCE(
stor_device->stor_chns[tgt_cpu]);
if (channel == NULL)
continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
}
}
if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
> ring_avail_percent_lowater)
goto found_channel;
/*
* All the other channels on the same NUMA node are
* busy. Try to use the channel on the current CPU
*/
if (hv_get_avail_to_write_percent(
&outgoing_channel->outbound)
> ring_avail_percent_lowater)
/*
* Channel is busy, try to find a channel on the same NUMA node
*/
node_mask = cpumask_of_node(cpu_to_node(q_num));
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
q_num + 1) {
if (!cpumask_test_cpu(tgt_cpu, node_mask))
continue;
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
if (!channel)
continue;
if (hv_get_avail_to_write_percent(&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
/*
* If we reach here, all the channels on the current
* NUMA node are busy. Try to find a channel in
* other NUMA nodes
*/
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
if (cpumask_test_cpu(tgt_cpu, node_mask))
continue;
channel = READ_ONCE(
stor_device->stor_chns[tgt_cpu]);
if (channel == NULL)
continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
}
}
}
/*
* If we reach here, all the channels on the current
* NUMA node are busy. Try to find a channel in
* all NUMA nodes
*/
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
q_num + 1) {
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
if (!channel)
continue;
if (hv_get_avail_to_write_percent(&channel->outbound)
> ring_avail_percent_lowater) {
outgoing_channel = channel;
goto found_channel;
}
}
/*
* If we reach here, all the channels are busy. Use the
* original channel found.
*/
} else {
spin_lock_irqsave(&stor_device->lock, flags);
outgoing_channel = stor_device->stor_chns[q_num];