mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
btrfs: introduce RAID1 round-robin read balancing
Add round-robin read policy that balances reads over available devices (all RAID1 block group profiles). Switch to the next devices is done after a number of blocks is read, which is 256K by default and is configurable in sysfs. The format is "round-robin:<min-contig-read>" and can be set in file /sys/fs/btrfs/FSID/read_policy Signed-off-by: Anand Jain <anand.jain@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
22fb0d99c9
commit
6d7a915495
|
|
@ -1305,7 +1305,12 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
|
|||
}
|
||||
BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
|
||||
|
||||
static const char * const btrfs_read_policy_name[] = { "pid" };
|
||||
static const char *btrfs_read_policy_name[] = {
|
||||
"pid",
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
"round-robin",
|
||||
#endif
|
||||
};
|
||||
|
||||
static int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
|
||||
{
|
||||
|
|
@ -1355,6 +1360,12 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
|||
|
||||
ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
if (i == BTRFS_READ_POLICY_RR)
|
||||
ret += sysfs_emit_at(buf, ret, ":%u",
|
||||
READ_ONCE(fs_devices->rr_min_contig_read));
|
||||
#endif
|
||||
|
||||
if (i == policy)
|
||||
ret += sysfs_emit_at(buf, ret, "]");
|
||||
}
|
||||
|
|
@ -1376,6 +1387,41 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
|
|||
if (index < 0)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* If moving from RR then disable collecting fs stats. */
|
||||
if (fs_devices->read_policy == BTRFS_READ_POLICY_RR && index != BTRFS_READ_POLICY_RR)
|
||||
fs_devices->collect_fs_stats = false;
|
||||
|
||||
if (index == BTRFS_READ_POLICY_RR) {
|
||||
if (value != -1) {
|
||||
const u32 sectorsize = fs_devices->fs_info->sectorsize;
|
||||
|
||||
if (!IS_ALIGNED(value, sectorsize)) {
|
||||
u64 temp_value = round_up(value, sectorsize);
|
||||
|
||||
btrfs_debug(fs_devices->fs_info,
|
||||
"read_policy: min contig read %lld should be multiple of sectorsize %u, rounded to %llu",
|
||||
value, sectorsize, temp_value);
|
||||
value = temp_value;
|
||||
}
|
||||
} else {
|
||||
value = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
|
||||
}
|
||||
|
||||
if (index != READ_ONCE(fs_devices->read_policy) ||
|
||||
value != READ_ONCE(fs_devices->rr_min_contig_read)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, index);
|
||||
WRITE_ONCE(fs_devices->rr_min_contig_read, value);
|
||||
|
||||
btrfs_info(fs_devices->fs_info, "read policy set to '%s:%lld'",
|
||||
btrfs_read_policy_name[index], value);
|
||||
}
|
||||
|
||||
fs_devices->collect_fs_stats = true;
|
||||
|
||||
return len;
|
||||
}
|
||||
#endif
|
||||
if (index != READ_ONCE(fs_devices->read_policy)) {
|
||||
WRITE_ONCE(fs_devices->read_policy, index);
|
||||
btrfs_info(fs_devices->fs_info, "read policy set to '%s'",
|
||||
|
|
|
|||
|
|
@ -1329,6 +1329,9 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
|
|||
fs_devices->total_rw_bytes = 0;
|
||||
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
|
||||
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
fs_devices->rr_min_contig_read = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -5953,6 +5956,63 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
|||
return len;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
struct stripe_mirror {
|
||||
u64 devid;
|
||||
int num;
|
||||
};
|
||||
|
||||
static int btrfs_cmp_devid(const void *a, const void *b)
|
||||
{
|
||||
const struct stripe_mirror *s1 = (const struct stripe_mirror *)a;
|
||||
const struct stripe_mirror *s2 = (const struct stripe_mirror *)b;
|
||||
|
||||
if (s1->devid < s2->devid)
|
||||
return -1;
|
||||
if (s1->devid > s2->devid)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select a stripe for reading using the round-robin algorithm.
|
||||
*
|
||||
* 1. Compute the read cycle as the total sectors read divided by the minimum
|
||||
* sectors per device.
|
||||
* 2. Determine the stripe number for the current read by taking the modulus
|
||||
* of the read cycle with the total number of stripes:
|
||||
*
|
||||
* stripe index = (total sectors / min sectors per dev) % num stripes
|
||||
*
|
||||
* The calculated stripe index is then used to select the corresponding device
|
||||
* from the list of devices, which is ordered by devid.
|
||||
*/
|
||||
static int btrfs_read_rr(const struct btrfs_chunk_map *map, int first, int num_stripes)
|
||||
{
|
||||
struct stripe_mirror stripes[BTRFS_RAID1_MAX_MIRRORS] = { 0 };
|
||||
struct btrfs_device *device = map->stripes[first].dev;
|
||||
struct btrfs_fs_info *fs_info = device->fs_devices->fs_info;
|
||||
unsigned int read_cycle;
|
||||
unsigned int total_reads;
|
||||
unsigned int min_reads_per_dev;
|
||||
|
||||
total_reads = percpu_counter_sum(&fs_info->stats_read_blocks);
|
||||
min_reads_per_dev = READ_ONCE(fs_info->fs_devices->rr_min_contig_read) >>
|
||||
fs_info->sectorsize_bits;
|
||||
|
||||
for (int index = 0, i = first; i < first + num_stripes; i++) {
|
||||
stripes[index].devid = map->stripes[i].dev->devid;
|
||||
stripes[index].num = i;
|
||||
index++;
|
||||
}
|
||||
sort(stripes, num_stripes, sizeof(struct stripe_mirror),
|
||||
btrfs_cmp_devid, NULL);
|
||||
|
||||
read_cycle = total_reads / min_reads_per_dev;
|
||||
return stripes[read_cycle % num_stripes].num;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_chunk_map *map, int first,
|
||||
int dev_replace_is_ongoing)
|
||||
|
|
@ -5982,6 +6042,11 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
|||
case BTRFS_READ_POLICY_PID:
|
||||
preferred_mirror = first + (current->pid % num_stripes);
|
||||
break;
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
case BTRFS_READ_POLICY_RR:
|
||||
preferred_mirror = btrfs_read_rr(map, first, num_stripes);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (dev_replace_is_ongoing &&
|
||||
|
|
|
|||
|
|
@ -296,6 +296,9 @@ enum btrfs_chunk_allocation_policy {
|
|||
BTRFS_CHUNK_ALLOC_ZONED,
|
||||
};
|
||||
|
||||
#define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K)
|
||||
/* Keep in sync with raid_attr table, current maximum is RAID1C4. */
|
||||
#define BTRFS_RAID1_MAX_MIRRORS (4)
|
||||
/*
|
||||
* Read policies for mirrored block group profiles, read picks the stripe based
|
||||
* on these policies.
|
||||
|
|
@ -303,6 +306,10 @@ enum btrfs_chunk_allocation_policy {
|
|||
enum btrfs_read_policy {
|
||||
/* Use process PID to choose the stripe */
|
||||
BTRFS_READ_POLICY_PID,
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* Balancing RAID1 reads across all striped devices (round-robin). */
|
||||
BTRFS_READ_POLICY_RR,
|
||||
#endif
|
||||
BTRFS_NR_READ_POLICY,
|
||||
};
|
||||
|
||||
|
|
@ -433,6 +440,12 @@ struct btrfs_fs_devices {
|
|||
enum btrfs_read_policy read_policy;
|
||||
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/*
|
||||
* Minimum contiguous reads before switching to next device, the unit
|
||||
* is one block/sectorsize.
|
||||
*/
|
||||
u32 rr_min_contig_read;
|
||||
|
||||
/* Checksum mode - offload it or do it synchronously. */
|
||||
enum btrfs_offload_csum_mode offload_csum_mode;
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user