Skip to content

Commit

Permalink
Fix deadlocks resulting from the post-kmem rework merge
Browse files Browse the repository at this point in the history
Put threads holding z_hold_mtx[i] under PF_STRANS to avoid lock
inversions typically involving zfs_zinactive() and zfs_zget().

Use MUTEX_FSTRANS for db_mtx to avoid the following lock inversion:

    Process 1:
	dbuf_find()
	    acquire hash_mutexes[i]
	    wait for db_mtx

    Process 2:
	dbuf_dirty()
	    acquire db_mtx
	<reclaim>
	zfs_zinactive()
	    wait for z_hold_mtx[i]

    Process 3:
	<doing a zpl_lookup>
	zfs_zget()
	    acquire z_hold_mtx[i]
	sa_buf_hold()
	...
	dbuf_find()
	    wait for hash_mutexes[i]
  • Loading branch information
dweeezil committed Mar 29, 2015
1 parent 0f7d2a4 commit 1c05d84
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
2 changes: 1 addition & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
dmu_buf_impl_t *db = vdb;
bzero(db, sizeof (dmu_buf_impl_t));

mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&db->db_mtx, NULL, MUTEX_FSTRANS, NULL);
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
refcount_create(&db->db_holds);
list_link_init(&db->db_link);
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/dmu_zfetch.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,15 @@ dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
{
uint64_t fetchsz;
uint64_t i;
fstrans_cookie_t cookie;

fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);

cookie = spl_fstrans_mark();
for (i = 0; i < fetchsz; i++) {
dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_ASYNC_READ);
}
spl_fstrans_unmark(cookie);

return (fetchsz);
}
Expand Down
33 changes: 24 additions & 9 deletions module/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
sa_bulk_attr_t *sa_attrs;
int cnt = 0;
zfs_acl_locator_cb_t locate = { 0 };
fstrans_cookie_t cookie;

if (zsb->z_replay) {
obj = vap->va_nodeid;
Expand Down Expand Up @@ -610,6 +611,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
}

cookie = spl_fstrans_mark();
ZFS_OBJ_HOLD_ENTER(zsb, obj);
VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));

Expand Down Expand Up @@ -785,6 +787,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
ZFS_OBJ_HOLD_EXIT(zsb, obj);
spl_fstrans_unmark(cookie);
}

/*
Expand Down Expand Up @@ -890,6 +893,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
znode_t *zp;
int err;
sa_handle_t *hdl;
fstrans_cookie_t cookie = spl_fstrans_mark();

*zpp = NULL;

Expand All @@ -899,6 +903,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (err);
}

Expand All @@ -909,6 +914,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (SET_ERROR(EINVAL));
}

Expand Down Expand Up @@ -956,6 +962,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
mutex_exit(&zp->z_lock);
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (err);
}

Expand All @@ -977,6 +984,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
*zpp = zp;
}
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (err);
}

Expand All @@ -992,7 +1000,9 @@ zfs_rezget(znode_t *zp)
int err;
int count = 0;
uint64_t gen;
fstrans_cookie_t cookie;

cookie = spl_fstrans_mark();
ZFS_OBJ_HOLD_ENTER(zsb, obj_num);

mutex_enter(&zp->z_acl_lock);
Expand All @@ -1018,6 +1028,7 @@ zfs_rezget(znode_t *zp)
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (err);
}

Expand All @@ -1028,6 +1039,7 @@ zfs_rezget(znode_t *zp)
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (SET_ERROR(EINVAL));
}

Expand All @@ -1054,6 +1066,7 @@ zfs_rezget(znode_t *zp)
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (SET_ERROR(EIO));
}

Expand All @@ -1062,6 +1075,7 @@ zfs_rezget(znode_t *zp)
if (gen != zp->z_gen) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);
return (SET_ERROR(EIO));
}

Expand All @@ -1070,6 +1084,7 @@ zfs_rezget(znode_t *zp)
zfs_inode_update(zp);

ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
spl_fstrans_unmark(cookie);

return (0);
}
Expand All @@ -1081,6 +1096,7 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
objset_t *os = zsb->z_os;
uint64_t obj = zp->z_id;
uint64_t acl_obj = zfs_external_acl(zp);
fstrans_cookie_t cookie = spl_fstrans_mark();

ZFS_OBJ_HOLD_ENTER(zsb, obj);
if (acl_obj) {
Expand All @@ -1090,14 +1106,15 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
VERIFY(0 == dmu_object_free(os, obj, tx));
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj);
spl_fstrans_unmark(cookie);
}

void
zfs_zinactive(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
boolean_t drop_mutex = 0;
fstrans_cookie_t cookie;

ASSERT(zp->z_sa_hdl);

Expand All @@ -1110,10 +1127,8 @@ zfs_zinactive(znode_t *zp)
* zfs_zinactive() call path. To avoid this deadlock the process
* must not reacquire the mutex when it is already holding it.
*/
if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {

This comment has been minimized.

Copy link
@behlendorf

behlendorf Mar 30, 2015

[style] Normally I'm all for comments but in this case I wouldn't object to dropping this one to bemore sililar to upstream. The use of spl_fstrans_mark() I'd consider self-documenting.

ZFS_OBJ_HOLD_ENTER(zsb, z_id);
drop_mutex = 1;
}
cookie = spl_fstrans_mark();
ZFS_OBJ_HOLD_ENTER(zsb, z_id);

mutex_enter(&zp->z_lock);

Expand All @@ -1124,8 +1139,8 @@ zfs_zinactive(znode_t *zp)
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);

if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
spl_fstrans_unmark(cookie);

zfs_rmnode(zp);
return;
Expand All @@ -1134,8 +1149,8 @@ zfs_zinactive(znode_t *zp)
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);

if (drop_mutex)
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
ZFS_OBJ_HOLD_EXIT(zsb, z_id);
spl_fstrans_unmark(cookie);
}

static inline int
Expand Down

0 comments on commit 1c05d84

Please sign in to comment.