mirror of
https://github.com/torvalds/linux.git
synced 2026-06-04 20:46:48 +02:00
scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU
When selecting an outgoing channel for I/O, storvsc tries to select a channel with a returning CPU that is not the same as issuing CPU. This worked well in the past, however it doesn't work well when the Hyper-V exposes a large number of channels (up to the number of all CPUs). Use a different CPU for returning channel is not efficient on Hyper-V. Change this behavior by preferring to the channel with the same CPU as the current I/O issuing CPU whenever possible. Tests have shown improvements in newer Hyper-V/Azure environment, and no regression with older Hyper-V/Azure environments. Tested-by: Raheel Abdul Faizy <rabdulfaizy@microsoft.com> Signed-off-by: Long Li <longli@microsoft.com> Message-Id: <1759381530-7414-1-git-send-email-longli@linux.microsoft.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
558ae45798
commit
b69ffeaa0a
|
|
@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
|
|||
}
|
||||
|
||||
/*
|
||||
* Our channel array is sparsley populated and we
|
||||
* Our channel array could be sparsley populated and we
|
||||
* initiated I/O on a processor/hw-q that does not
|
||||
* currently have a designated channel. Fix this.
|
||||
* The strategy is simple:
|
||||
* I. Ensure NUMA locality
|
||||
* II. Distribute evenly (best effort)
|
||||
* I. Prefer the channel associated with the current CPU
|
||||
* II. Ensure NUMA locality
|
||||
* III. Distribute evenly (best effort)
|
||||
*/
|
||||
|
||||
/* Prefer the channel on the I/O issuing processor/hw-q */
|
||||
if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
|
||||
return stor_device->stor_chns[q_num];
|
||||
|
||||
node_mask = cpumask_of_node(cpu_to_node(q_num));
|
||||
|
||||
num_channels = 0;
|
||||
|
|
@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
|
|||
/* See storvsc_change_target_cpu(). */
|
||||
outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
|
||||
if (outgoing_channel != NULL) {
|
||||
if (outgoing_channel->target_cpu == q_num) {
|
||||
/*
|
||||
* Ideally, we want to pick a different channel if
|
||||
* available on the same NUMA node.
|
||||
*/
|
||||
node_mask = cpumask_of_node(cpu_to_node(q_num));
|
||||
for_each_cpu_wrap(tgt_cpu,
|
||||
&stor_device->alloced_cpus, q_num + 1) {
|
||||
if (!cpumask_test_cpu(tgt_cpu, node_mask))
|
||||
continue;
|
||||
if (tgt_cpu == q_num)
|
||||
continue;
|
||||
channel = READ_ONCE(
|
||||
stor_device->stor_chns[tgt_cpu]);
|
||||
if (channel == NULL)
|
||||
continue;
|
||||
if (hv_get_avail_to_write_percent(
|
||||
&channel->outbound)
|
||||
> ring_avail_percent_lowater) {
|
||||
outgoing_channel = channel;
|
||||
goto found_channel;
|
||||
}
|
||||
}
|
||||
if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
|
||||
> ring_avail_percent_lowater)
|
||||
goto found_channel;
|
||||
|
||||
/*
|
||||
* All the other channels on the same NUMA node are
|
||||
* busy. Try to use the channel on the current CPU
|
||||
*/
|
||||
if (hv_get_avail_to_write_percent(
|
||||
&outgoing_channel->outbound)
|
||||
> ring_avail_percent_lowater)
|
||||
/*
|
||||
* Channel is busy, try to find a channel on the same NUMA node
|
||||
*/
|
||||
node_mask = cpumask_of_node(cpu_to_node(q_num));
|
||||
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
|
||||
q_num + 1) {
|
||||
if (!cpumask_test_cpu(tgt_cpu, node_mask))
|
||||
continue;
|
||||
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
|
||||
if (!channel)
|
||||
continue;
|
||||
if (hv_get_avail_to_write_percent(&channel->outbound)
|
||||
> ring_avail_percent_lowater) {
|
||||
outgoing_channel = channel;
|
||||
goto found_channel;
|
||||
|
||||
/*
|
||||
* If we reach here, all the channels on the current
|
||||
* NUMA node are busy. Try to find a channel in
|
||||
* other NUMA nodes
|
||||
*/
|
||||
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
|
||||
if (cpumask_test_cpu(tgt_cpu, node_mask))
|
||||
continue;
|
||||
channel = READ_ONCE(
|
||||
stor_device->stor_chns[tgt_cpu]);
|
||||
if (channel == NULL)
|
||||
continue;
|
||||
if (hv_get_avail_to_write_percent(
|
||||
&channel->outbound)
|
||||
> ring_avail_percent_lowater) {
|
||||
outgoing_channel = channel;
|
||||
goto found_channel;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we reach here, all the channels on the current
|
||||
* NUMA node are busy. Try to find a channel in
|
||||
* all NUMA nodes
|
||||
*/
|
||||
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
|
||||
q_num + 1) {
|
||||
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
|
||||
if (!channel)
|
||||
continue;
|
||||
if (hv_get_avail_to_write_percent(&channel->outbound)
|
||||
> ring_avail_percent_lowater) {
|
||||
outgoing_channel = channel;
|
||||
goto found_channel;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* If we reach here, all the channels are busy. Use the
|
||||
* original channel found.
|
||||
*/
|
||||
} else {
|
||||
spin_lock_irqsave(&stor_device->lock, flags);
|
||||
outgoing_channel = stor_device->stor_chns[q_num];
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user