mirror of
https://github.com/torvalds/linux.git
synced 2026-06-14 10:03:05 +02:00
All the RESYNC messages are sent with resync lock held, the only
exception is resync_finish which releases resync_lockres before
send the last resync message, this should be changed as well.
Otherwise, we can see deadlock issue as follows:
clustermd2-gqjiang2:~ # cat /proc/mdstat
Personalities : [raid10] [raid1]
md0 : active raid1 sdg[0] sdf[1]
134144 blocks super 1.2 [2/2] [UU]
[===================>.] resync = 99.6% (134144/134144) finish=0.0min speed=26K/sec
bitmap: 1/1 pages [4KB], 65536KB chunk
unused devices: <none>
clustermd2-gqjiang2:~ # ps aux|grep md|grep D
root 20497 0.0 0.0 0 0 ? D 16:00 0:00 [md0_raid1]
clustermd2-gqjiang2:~ # cat /proc/20497/stack
[<ffffffffc05ff51e>] dlm_lock_sync+0x8e/0xc0 [md_cluster]
[<ffffffffc05ff7e8>] __sendmsg+0x98/0x130 [md_cluster]
[<ffffffffc05ff900>] sendmsg+0x20/0x30 [md_cluster]
[<ffffffffc05ffc35>] resync_info_update+0xb5/0xc0 [md_cluster]
[<ffffffffc0593e84>] md_reap_sync_thread+0x134/0x170 [md_mod]
[<ffffffffc059514c>] md_check_recovery+0x28c/0x510 [md_mod]
[<ffffffffc060c882>] raid1d+0x42/0x800 [raid1]
[<ffffffffc058ab61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9a0a5b3f>] kthread+0xff/0x140
[<ffffffff9a800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
clustermd-gqjiang1:~ # ps aux|grep md|grep D
root 20531 0.0 0.0 0 0 ? D 16:00 0:00 [md0_raid1]
root 20537 0.0 0.0 0 0 ? D 16:00 0:00 [md0_cluster_rec]
root 20676 0.0 0.0 0 0 ? D 16:01 0:00 [md0_resync]
clustermd-gqjiang1:~ # cat /proc/mdstat
Personalities : [raid10] [raid1]
md0 : active raid1 sdf[1] sdg[0]
134144 blocks super 1.2 [2/2] [UU]
[===================>.] resync = 97.3% (131072/134144) finish=8076.8min speed=0K/sec
bitmap: 1/1 pages [4KB], 65536KB chunk
unused devices: <none>
clustermd-gqjiang1:~ # cat /proc/20531/stack
[<ffffffffc080974d>] metadata_update_start+0xcd/0xd0 [md_cluster]
[<ffffffffc079c897>] md_update_sb.part.61+0x97/0x820 [md_mod]
[<ffffffffc079f15b>] md_check_recovery+0x29b/0x510 [md_mod]
[<ffffffffc0816882>] raid1d+0x42/0x800 [raid1]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
clustermd-gqjiang1:~ # cat /proc/20537/stack
[<ffffffffc0813222>] freeze_array+0xf2/0x140 [raid1]
[<ffffffffc080a56e>] recv_daemon+0x41e/0x580 [md_cluster]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
clustermd-gqjiang1:~ # cat /proc/20676/stack
[<ffffffffc080951e>] dlm_lock_sync+0x8e/0xc0 [md_cluster]
[<ffffffffc080957f>] lock_token+0x2f/0xa0 [md_cluster]
[<ffffffffc0809622>] lock_comm+0x32/0x90 [md_cluster]
[<ffffffffc08098f5>] sendmsg+0x15/0x30 [md_cluster]
[<ffffffffc0809c0a>] resync_info_update+0x8a/0xc0 [md_cluster]
[<ffffffffc08130ba>] raid1_sync_request+0xa9a/0xb10 [raid1]
[<ffffffffc079b8ea>] md_do_sync+0xbaa/0xf90 [md_mod]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
|
||
|---|---|---|
| .. | ||
| bcache | ||
| persistent-data | ||
| dm-bio-prison-v1.c | ||
| dm-bio-prison-v1.h | ||
| dm-bio-prison-v2.c | ||
| dm-bio-prison-v2.h | ||
| dm-bio-record.h | ||
| dm-bufio.c | ||
| dm-builtin.c | ||
| dm-cache-background-tracker.c | ||
| dm-cache-background-tracker.h | ||
| dm-cache-block-types.h | ||
| dm-cache-metadata.c | ||
| dm-cache-metadata.h | ||
| dm-cache-policy-internal.h | ||
| dm-cache-policy-smq.c | ||
| dm-cache-policy.c | ||
| dm-cache-policy.h | ||
| dm-cache-target.c | ||
| dm-core.h | ||
| dm-crypt.c | ||
| dm-delay.c | ||
| dm-era-target.c | ||
| dm-exception-store.c | ||
| dm-exception-store.h | ||
| dm-flakey.c | ||
| dm-integrity.c | ||
| dm-io.c | ||
| dm-ioctl.c | ||
| dm-kcopyd.c | ||
| dm-linear.c | ||
| dm-log-userspace-base.c | ||
| dm-log-userspace-transfer.c | ||
| dm-log-userspace-transfer.h | ||
| dm-log-writes.c | ||
| dm-log.c | ||
| dm-mpath.c | ||
| dm-mpath.h | ||
| dm-path-selector.c | ||
| dm-path-selector.h | ||
| dm-queue-length.c | ||
| dm-raid.c | ||
| dm-raid1.c | ||
| dm-region-hash.c | ||
| dm-round-robin.c | ||
| dm-rq.c | ||
| dm-rq.h | ||
| dm-service-time.c | ||
| dm-snap-persistent.c | ||
| dm-snap-transient.c | ||
| dm-snap.c | ||
| dm-stats.c | ||
| dm-stats.h | ||
| dm-stripe.c | ||
| dm-switch.c | ||
| dm-sysfs.c | ||
| dm-table.c | ||
| dm-target.c | ||
| dm-thin-metadata.c | ||
| dm-thin-metadata.h | ||
| dm-thin.c | ||
| dm-uevent.c | ||
| dm-uevent.h | ||
| dm-unstripe.c | ||
| dm-verity-fec.c | ||
| dm-verity-fec.h | ||
| dm-verity-target.c | ||
| dm-verity.h | ||
| dm-writecache.c | ||
| dm-zero.c | ||
| dm-zoned-metadata.c | ||
| dm-zoned-reclaim.c | ||
| dm-zoned-target.c | ||
| dm-zoned.h | ||
| dm.c | ||
| dm.h | ||
| Kconfig | ||
| Makefile | ||
| md-bitmap.c | ||
| md-bitmap.h | ||
| md-cluster.c | ||
| md-cluster.h | ||
| md-faulty.c | ||
| md-linear.c | ||
| md-linear.h | ||
| md-multipath.c | ||
| md-multipath.h | ||
| md.c | ||
| md.h | ||
| raid1-10.c | ||
| raid1.c | ||
| raid1.h | ||
| raid5-cache.c | ||
| raid5-log.h | ||
| raid5-ppl.c | ||
| raid5.c | ||
| raid5.h | ||
| raid10.c | ||
| raid10.h | ||
| raid0.c | ||
| raid0.h | ||