mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
md: remove recovery_disabled
'recovery_disabled' logic is complex and confusing, originally intended to preserve raid in extreme scenarios. It was used in following cases: - When sync fails and setting badblocks also fails, kick out non-In_sync rdev and block spare rdev from joining to preserve raid [1] - When last backup is unavailable, prevent repeated add-remove of spares triggering recovery [2] The original issues are now resolved: - Error handlers in all raid types prevent last rdev from being kicked out - Disks with failed recovery are marked Faulty and can't re-join Therefore, remove 'recovery_disabled' as it's no longer needed. [1]5389042ffa("md: change managed of recovery_disabled.") [2]4044ba58dd("md: don't retry recovery of raid1 that fails due to error on source drive.") Link: https://lore.kernel.org/linux-raid/20260105110300.1442509-13-linan666@huaweicloud.com Signed-off-by: Li Nan <linan122@huawei.com> Signed-off-by: Yu Kuai <yukuai@fnnas.com>
This commit is contained in:
parent
7435b73f05
commit
5d1dd57929
|
|
@ -2618,9 +2618,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
|||
list_add_rcu(&rdev->same_set, &mddev->disks);
|
||||
bd_link_disk_holder(rdev->bdev, mddev->gendisk);
|
||||
|
||||
/* May as well allow recovery to be retried once */
|
||||
mddev->recovery_disabled++;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
|
|
|
|||
|
|
@ -505,12 +505,6 @@ struct mddev {
|
|||
int ok_start_degraded;
|
||||
|
||||
unsigned long recovery;
|
||||
/* If a RAID personality determines that recovery (of a particular
|
||||
* device) will fail due to a read error on the source device, it
|
||||
* takes a copy of this number and does not attempt recovery again
|
||||
* until this number changes.
|
||||
*/
|
||||
int recovery_disabled;
|
||||
|
||||
int in_sync; /* know to not need resync */
|
||||
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
|
||||
|
|
|
|||
|
|
@ -1760,7 +1760,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
|
|||
set_bit(MD_BROKEN, &mddev->flags);
|
||||
|
||||
if (!test_bit(MD_FAILLAST_DEV, &mddev->flags)) {
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1904,7 +1903,6 @@ static bool raid1_remove_conf(struct r1conf *conf, int disk)
|
|||
|
||||
/* Only remove non-faulty devices if recovery is not possible. */
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
rdev->mddev->degraded < conf->raid_disks)
|
||||
return false;
|
||||
|
||||
|
|
@ -1924,9 +1922,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
int first = 0;
|
||||
int last = conf->raid_disks - 1;
|
||||
|
||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||
return -EBUSY;
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
|
|
@ -2346,7 +2341,6 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
|
|||
*/
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) ||
|
||||
!fix_sync_read_error(r1_bio)) {
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
md_done_sync(mddev, r1_bio->sectors);
|
||||
md_sync_error(mddev);
|
||||
put_buf(r1_bio);
|
||||
|
|
@ -2948,16 +2942,12 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
*skipped = 1;
|
||||
put_buf(r1_bio);
|
||||
|
||||
if (!ok) {
|
||||
/* Cannot record the badblocks, so need to
|
||||
if (!ok)
|
||||
/* Cannot record the badblocks, md_error has set INTR,
|
||||
* abort the resync.
|
||||
* If there are multiple read targets, could just
|
||||
* fail the really bad ones ???
|
||||
*/
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
return 0;
|
||||
} else
|
||||
else
|
||||
return min_bad;
|
||||
|
||||
}
|
||||
|
|
@ -3144,7 +3134,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
|||
init_waitqueue_head(&conf->wait_barrier);
|
||||
|
||||
bio_list_init(&conf->pending_bio_list);
|
||||
conf->recovery_disabled = mddev->recovery_disabled - 1;
|
||||
|
||||
err = -EIO;
|
||||
for (i = 0; i < conf->raid_disks * 2; i++) {
|
||||
|
|
|
|||
|
|
@ -93,11 +93,6 @@ struct r1conf {
|
|||
*/
|
||||
int fullsync;
|
||||
|
||||
/* When the same as mddev->recovery_disabled we don't allow
|
||||
* recovery to be attempted as we expect a read error.
|
||||
*/
|
||||
int recovery_disabled;
|
||||
|
||||
mempool_t *r1bio_pool;
|
||||
mempool_t r1buf_pool;
|
||||
|
||||
|
|
|
|||
|
|
@ -2130,8 +2130,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
mirror = first;
|
||||
for ( ; mirror <= last ; mirror++) {
|
||||
p = &conf->mirrors[mirror];
|
||||
if (p->recovery_disabled == mddev->recovery_disabled)
|
||||
continue;
|
||||
if (p->rdev) {
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
p->replacement == NULL && repl_slot < 0)
|
||||
|
|
@ -2143,7 +2141,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
if (err)
|
||||
return err;
|
||||
p->head_position = 0;
|
||||
p->recovery_disabled = mddev->recovery_disabled - 1;
|
||||
rdev->raid_disk = mirror;
|
||||
err = 0;
|
||||
if (rdev->saved_raid_disk != mirror)
|
||||
|
|
@ -2196,7 +2193,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
* is not possible.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != p->recovery_disabled &&
|
||||
(!p->replacement || p->replacement == rdev) &&
|
||||
number < conf->geo.raid_disks &&
|
||||
enough(conf, -1)) {
|
||||
|
|
@ -2535,8 +2531,6 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
|
|||
pr_notice("md/raid10:%s: recovery aborted due to read error\n",
|
||||
mdname(mddev));
|
||||
|
||||
conf->mirrors[dw].recovery_disabled
|
||||
= mddev->recovery_disabled;
|
||||
set_bit(MD_RECOVERY_INTR,
|
||||
&mddev->recovery);
|
||||
break;
|
||||
|
|
@ -4075,8 +4069,6 @@ static int raid10_run(struct mddev *mddev)
|
|||
disk->replacement->saved_raid_disk < 0) {
|
||||
conf->fullsync = 1;
|
||||
}
|
||||
|
||||
disk->recovery_disabled = mddev->recovery_disabled - 1;
|
||||
}
|
||||
|
||||
if (mddev->resync_offset != MaxSector)
|
||||
|
|
|
|||
|
|
@ -18,11 +18,6 @@
|
|||
struct raid10_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
sector_t head_position;
|
||||
int recovery_disabled; /* matches
|
||||
* mddev->recovery_disabled
|
||||
* when we shouldn't try
|
||||
* recovering this device.
|
||||
*/
|
||||
};
|
||||
|
||||
struct r10conf {
|
||||
|
|
|
|||
|
|
@ -2922,7 +2922,6 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
|||
|
||||
if (has_failed(conf)) {
|
||||
set_bit(MD_BROKEN, &conf->mddev->flags);
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
|
||||
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
|
||||
mdname(mddev), mddev->degraded, conf->raid_disks);
|
||||
|
|
@ -3727,10 +3726,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
|||
}
|
||||
md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf));
|
||||
|
||||
if (abort) {
|
||||
conf->recovery_disabled = conf->mddev->recovery_disabled;
|
||||
if (abort)
|
||||
md_sync_error(conf->mddev);
|
||||
}
|
||||
}
|
||||
|
||||
static int want_replace(struct stripe_head *sh, int disk_idx)
|
||||
|
|
@ -7548,8 +7545,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|||
}
|
||||
|
||||
conf->bypass_threshold = BYPASS_THRESHOLD;
|
||||
conf->recovery_disabled = mddev->recovery_disabled - 1;
|
||||
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
if (mddev->reshape_position == MaxSector)
|
||||
conf->previous_raid_disks = mddev->raid_disks;
|
||||
|
|
@ -8249,7 +8244,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
* isn't possible.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
!has_failed(conf) &&
|
||||
(!p->replacement || p->replacement == rdev) &&
|
||||
number < conf->raid_disks) {
|
||||
|
|
@ -8310,8 +8304,6 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
|
||||
return 0;
|
||||
}
|
||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||
return -EBUSY;
|
||||
|
||||
if (rdev->saved_raid_disk < 0 && has_failed(conf))
|
||||
/* no point adding a device */
|
||||
|
|
|
|||
|
|
@ -640,7 +640,6 @@ struct r5conf {
|
|||
* (fresh device added).
|
||||
* Cleared when a sync completes.
|
||||
*/
|
||||
int recovery_disabled;
|
||||
/* per cpu variables */
|
||||
struct raid5_percpu __percpu *percpu;
|
||||
int scribble_disks;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user