Pull MD fixes from Yu:

"- fix REQ_RAHEAD and REQ_NOWAIT IO err handling for raid1/10
 - fix max_write_behind setting for dm-raid
 - some minor cleanups"

* tag 'md-6.16-20250530' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux:
  md/md-bitmap: remove parameter slot from bitmap_create()
  md/md-bitmap: cleanup bitmap_ops->startwrite()
  md/dm-raid: remove max_write_behind setting limit
  md/md-bitmap: fix dm-raid max_write_behind setting
  md/raid1,raid10: don't handle IO error for REQ_RAHEAD and REQ_NOWAIT
This commit is contained in:
Jens Axboe 2025-05-30 04:42:53 -06:00
commit f4d22ac96a
7 changed files with 60 additions and 52 deletions

View File

@ -1356,11 +1356,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
return -EINVAL;
}
/*
* In device-mapper, we specify things in sectors, but
* MD records this value in kB
*/
if (value < 0 || value / 2 > COUNTER_MAX) {
if (value < 0) {
rs->ti->error = "Max write-behind limit out of range";
return -EINVAL;
}

View File

@ -105,9 +105,19 @@
*
*/
typedef __u16 bitmap_counter_t;
#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
#define COUNTER_BITS 16
#define COUNTER_BIT_SHIFT 4
#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
@ -789,7 +799,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
* is a good choice? We choose COUNTER_MAX / 2 arbitrarily.
*/
write_behind = bitmap->mddev->bitmap_info.max_write_behind;
if (write_behind > COUNTER_MAX)
if (write_behind > COUNTER_MAX / 2)
write_behind = COUNTER_MAX / 2;
sb->write_behind = cpu_to_le32(write_behind);
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
@ -1672,13 +1682,13 @@ __acquires(bitmap->lock)
&(bitmap->bp[page].map[pageoff]);
}
static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors)
static void bitmap_start_write(struct mddev *mddev, sector_t offset,
unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return 0;
return;
while (sectors) {
sector_t blocks;
@ -1688,7 +1698,7 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
if (!bmc) {
spin_unlock_irq(&bitmap->counts.lock);
return 0;
return;
}
if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
@ -1724,11 +1734,10 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
else
sectors = 0;
}
return 0;
}
static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors)
static void bitmap_end_write(struct mddev *mddev, sector_t offset,
unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;
@ -2205,9 +2214,9 @@ static struct bitmap *__bitmap_create(struct mddev *mddev, int slot)
return ERR_PTR(err);
}
static int bitmap_create(struct mddev *mddev, int slot)
static int bitmap_create(struct mddev *mddev)
{
struct bitmap *bitmap = __bitmap_create(mddev, slot);
struct bitmap *bitmap = __bitmap_create(mddev, -1);
if (IS_ERR(bitmap))
return PTR_ERR(bitmap);
@ -2670,7 +2679,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
}
mddev->bitmap_info.offset = offset;
rv = bitmap_create(mddev, -1);
rv = bitmap_create(mddev);
if (rv)
goto out;
@ -3003,8 +3012,8 @@ static struct bitmap_operations bitmap_ops = {
.end_behind_write = bitmap_end_behind_write,
.wait_behind_writes = bitmap_wait_behind_writes,
.startwrite = bitmap_startwrite,
.endwrite = bitmap_endwrite,
.start_write = bitmap_start_write,
.end_write = bitmap_end_write,
.start_sync = bitmap_start_sync,
.end_sync = bitmap_end_sync,
.cond_end_sync = bitmap_cond_end_sync,

View File

@ -9,15 +9,6 @@
#define BITMAP_MAGIC 0x6d746962
typedef __u16 bitmap_counter_t;
#define COUNTER_BITS 16
#define COUNTER_BIT_SHIFT 4
#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
@ -72,7 +63,7 @@ struct md_bitmap_stats {
struct bitmap_operations {
bool (*enabled)(struct mddev *mddev);
int (*create)(struct mddev *mddev, int slot);
int (*create)(struct mddev *mddev);
int (*resize)(struct mddev *mddev, sector_t blocks, int chunksize,
bool init);
@ -89,10 +80,10 @@ struct bitmap_operations {
void (*end_behind_write)(struct mddev *mddev);
void (*wait_behind_writes)(struct mddev *mddev);
int (*startwrite)(struct mddev *mddev, sector_t offset,
void (*start_write)(struct mddev *mddev, sector_t offset,
unsigned long sectors);
void (*end_write)(struct mddev *mddev, sector_t offset,
unsigned long sectors);
void (*endwrite)(struct mddev *mddev, sector_t offset,
unsigned long sectors);
bool (*start_sync)(struct mddev *mddev, sector_t offset,
sector_t *blocks, bool degraded);
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);

View File

@ -6225,7 +6225,7 @@ int md_run(struct mddev *mddev)
}
if (err == 0 && pers->sync_request &&
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
err = mddev->bitmap_ops->create(mddev, -1);
err = mddev->bitmap_ops->create(mddev);
if (err)
pr_warn("%s: failed to create bitmap (%d)\n",
mdname(mddev), err);
@ -7285,7 +7285,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
err = 0;
if (mddev->pers) {
if (fd >= 0) {
err = mddev->bitmap_ops->create(mddev, -1);
err = mddev->bitmap_ops->create(mddev);
if (!err)
err = mddev->bitmap_ops->load(mddev);
@ -7601,7 +7601,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
rv = mddev->bitmap_ops->create(mddev, -1);
rv = mddev->bitmap_ops->create(mddev);
if (!rv)
rv = mddev->bitmap_ops->load(mddev);
@ -8799,14 +8799,14 @@ static void md_bitmap_start(struct mddev *mddev,
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
&md_io_clone->sectors);
mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
md_io_clone->sectors);
mddev->bitmap_ops->start_write(mddev, md_io_clone->offset,
md_io_clone->sectors);
}
static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
{
mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
md_io_clone->sectors);
mddev->bitmap_ops->end_write(mddev, md_io_clone->offset,
md_io_clone->sectors);
}
static void md_end_clone_io(struct bio *bio)

View File

@ -293,3 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
return false;
}
/*
* bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is
* submitted to the underlying disks, hence don't record badblocks or retry
* in this case.
*/
static inline bool raid1_should_handle_error(struct bio *bio)
{
return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT));
}

View File

@ -373,14 +373,16 @@ static void raid1_end_read_request(struct bio *bio)
*/
update_head_pos(r1_bio->read_disk, r1_bio);
if (uptodate)
if (uptodate) {
set_bit(R1BIO_Uptodate, &r1_bio->state);
else if (test_bit(FailFast, &rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
} else if (test_bit(FailFast, &rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state)) {
/* This was a fail-fast read so we definitely
* want to retry */
;
else {
} else if (!raid1_should_handle_error(bio)) {
uptodate = 1;
} else {
/* If all other devices have failed, we want to return
* the error upwards rather than fail the last device.
* Here we redefine "uptodate" to mean "Don't want to retry"
@ -451,16 +453,15 @@ static void raid1_end_write_request(struct bio *bio)
struct bio *to_put = NULL;
int mirror = find_bio_disk(r1_bio, bio);
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error;
sector_t lo = r1_bio->sector;
sector_t hi = r1_bio->sector + r1_bio->sectors;
discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
bool ignore_error = !raid1_should_handle_error(bio) ||
(bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
/*
* 'one mirror IO has finished' event handler:
*/
if (bio->bi_status && !discard_error) {
if (bio->bi_status && !ignore_error) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
@ -511,7 +512,7 @@ static void raid1_end_write_request(struct bio *bio)
/* Maybe we can clear some bad blocks. */
if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
!discard_error) {
!ignore_error) {
r1_bio->bios[mirror] = IO_MADE_GOOD;
set_bit(R1BIO_MadeGood, &r1_bio->state);
}

View File

@ -399,6 +399,8 @@ static void raid10_end_read_request(struct bio *bio)
* wait for the 'master' bio.
*/
set_bit(R10BIO_Uptodate, &r10_bio->state);
} else if (!raid1_should_handle_error(bio)) {
uptodate = 1;
} else {
/* If all other devices that store this block have
* failed, we want to return the error upwards rather
@ -456,9 +458,8 @@ static void raid10_end_write_request(struct bio *bio)
int slot, repl;
struct md_rdev *rdev = NULL;
struct bio *to_put = NULL;
bool discard_error;
discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
bool ignore_error = !raid1_should_handle_error(bio) ||
(bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
@ -472,7 +473,7 @@ static void raid10_end_write_request(struct bio *bio)
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
if (bio->bi_status && !discard_error) {
if (bio->bi_status && !ignore_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@ -527,7 +528,7 @@ static void raid10_end_write_request(struct bio *bio)
/* Maybe we can clear some bad blocks. */
if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
r10_bio->sectors) &&
!discard_error) {
!ignore_error) {
bio_put(bio);
if (repl)
r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;