Skip to content

Commit

Permalink
Prevent direct reclaim in critical sections marked by the ZFS object …
Browse files Browse the repository at this point in the history
…hash mutex

The following deadlock occurred on the buildbot:

[ 3774.649030] VERIFY3(((*(volatile typeof((((&((zsb))->z_hold_mtx[(((z_id)) & (256 - 1))])))->m_owner) *)&((((&((zsb))->z_hold_mtx[(((z_id)) & (256 - 1))])))->m_owner))) != get_current()) failed (ffff880036362dc0 != ffff880036362dc0)
[ 3774.649407] PANIC at zfs_znode.c:1108:zfs_zinactive()
[ 3774.649415] Showing stack for process 32119
[ 3774.649425] CPU: 3 PID: 32119 Comm: filebench Tainted: PF          O 3.11.10-100.fc18.x86_64 #1
[ 3774.649428] Hardware name: Red Hat RHEV Hypervisor, BIOS 0.5.1 01/01/2007
[ 3774.649428]  ffffffffa03a3af8 ffff880047cf2bb8 ffffffff81666676 0000000000000007
[ 3774.649430]  ffffffffa03a3b73 ffff880047cf2bc8 ffffffffa01c73e4 ffff880047cf2d68
[ 3774.649435]  ffffffffa01c761d 0000000000000003 ffff88004b1accc0 0000000000000030
[ 3774.649447] Call Trace:
[ 3774.649457]  [<ffffffff81666676>] dump_stack+0x46/0x58
[ 3774.649465]  [<ffffffffa01c73e4>] spl_dumpstack+0x44/0x50 [spl]
[ 3774.649468]  [<ffffffffa01c761d>] spl_panic+0xbd/0x100 [spl]
[ 3774.649476]  [<ffffffff81675440>] ? ftrace_call+0x5/0x2f
[ 3774.649493]  [<ffffffffa03369d5>] zfs_zinactive+0x1f5/0x240 [zfs]
[ 3774.649538]  [<ffffffffa032fb9c>] zfs_inactive+0x7c/0x430 [zfs]
[ 3774.649546]  [<ffffffffa03506fe>] zpl_evict_inode+0x4e/0xa0 [zfs]
[ 3774.649546]  [<ffffffff811c8e12>] evict+0xa2/0x1a0
[ 3774.649546]  [<ffffffff811c8f4e>] dispose_list+0x3e/0x60
[ 3774.649546]  [<ffffffff811c9cd1>] prune_icache_sb+0x161/0x300
[ 3774.649546]  [<ffffffff811b2e35>] prune_super+0xe5/0x1b0
[ 3774.649546]  [<ffffffff81153771>] shrink_slab+0x151/0x2e0
[ 3774.649546]  [<ffffffff811a9809>] ? vmpressure+0x29/0x90
[ 3774.649546]  [<ffffffff811a97e5>] ? vmpressure+0x5/0x90
[ 3774.649546]  [<ffffffff81156979>] do_try_to_free_pages+0x3e9/0x5a0
[ 3774.649548]  [<ffffffff811527ff>] ? throttle_direct_reclaim.isra.45+0x8f/0x280
[ 3774.649552]  [<ffffffff81156e38>] try_to_free_pages+0xf8/0x180
[ 3774.649556]  [<ffffffff8114ae3a>] __alloc_pages_nodemask+0x6aa/0xae0
[ 3774.649562]  [<ffffffff81189fb8>] alloc_pages_current+0xb8/0x190
[ 3774.649565]  [<ffffffff81193e30>] new_slab+0x2d0/0x3a0
[ 3774.649577]  [<ffffffff81664d2d>] __slab_alloc+0x393/0x560
[ 3774.649579]  [<ffffffffa01c1b30>] ? spl_kmem_cache_alloc+0xb0/0xee0 [spl]
[ 3774.649583]  [<ffffffffa01c1b30>] ? spl_kmem_cache_alloc+0xb0/0xee0 [spl]
[ 3774.649583]  [<ffffffffa01c1b30>] ? spl_kmem_cache_alloc+0xb0/0xee0 [spl]
[ 3774.649585]  [<ffffffff81195230>] kmem_cache_alloc+0x1a0/0x200
[ 3774.649589]  [<ffffffffa01c1b30>] ? spl_kmem_cache_alloc+0xb0/0xee0 [spl]
[ 3774.649594]  [<ffffffffa01c1b30>] spl_kmem_cache_alloc+0xb0/0xee0 [spl]
[ 3774.649596]  [<ffffffff81675440>] ? ftrace_call+0x5/0x2f
[ 3774.649599]  [<ffffffff81675440>] ? ftrace_call+0x5/0x2f
[ 3774.649624]  [<ffffffffa03505c0>] ? zpl_inode_destroy+0x60/0x60 [zfs]
[ 3774.649687]  [<ffffffffa033266f>] zfs_inode_alloc+0x1f/0x40 [zfs]
[ 3774.649687]  [<ffffffffa03505da>] zpl_inode_alloc+0x1a/0x70 [zfs]
[ 3774.649687]  [<ffffffff811c7e16>] alloc_inode+0x26/0xa0
[ 3774.649687]  [<ffffffff811c9e83>] new_inode_pseudo+0x13/0x60
[ 3774.649687]  [<ffffffff811c9eed>] new_inode+0x1d/0x40
[ 3774.649710]  [<ffffffffa0332ac7>] zfs_znode_alloc+0x47/0x730 [zfs]
[ 3774.649770]  [<ffffffffa02c8f4e>] ? sa_build_index+0xbe/0x1b0 [zfs]
[ 3774.649770]  [<ffffffffa02c9775>] ? sa_build_layouts+0x6b5/0xc80 [zfs]
[ 3774.649770]  [<ffffffff81675440>] ? ftrace_call+0x5/0x2f
[ 3774.649794]  [<ffffffffa0333b5e>] zfs_mknode+0x93e/0xe90 [zfs]
[ 3774.649813]  [<ffffffffa032be5b>] zfs_create+0x5db/0x780 [zfs]
[ 3774.649840]  [<ffffffffa0350ba5>] zpl_xattr_set_dir.isra.9+0x245/0x2a0 [zfs]
[ 3774.649843]  [<ffffffff81675440>] ? ftrace_call+0x5/0x2f
[ 3774.649895]  [<ffffffffa0351140>] zpl_xattr_set+0xe0/0x3f0 [zfs]
[ 3774.649895]  [<ffffffffa03516a4>] __zpl_xattr_security_init+0x64/0xb0 [zfs]
[ 3774.649968]  [<ffffffffa0351640>] ? zpl_xattr_trusted_set+0xb0/0xb0 [zfs]
[ 3774.649972]  [<ffffffff812a737c>] security_inode_init_security+0xbc/0xf0
[ 3774.649977]  [<ffffffffa0352028>] zpl_xattr_security_init+0x18/0x20 [zfs]
[ 3774.650017]  [<ffffffffa0350134>] zpl_create+0x154/0x240 [zfs]
[ 3774.650018]  [<ffffffff811bde85>] vfs_create+0xb5/0x120
[ 3774.650018]  [<ffffffff811be874>] do_last+0x984/0xe40
[ 3774.650020]  [<ffffffff811baf55>] ? link_path_walk+0x255/0x880
[ 3774.650023]  [<ffffffff811bedf2>] path_openat+0xc2/0x680
[ 3774.650026]  [<ffffffff811bf653>] do_filp_open+0x43/0xa0
[ 3774.650030]  [<ffffffff811bf615>] ? do_filp_open+0x5/0xa0
[ 3774.650034]  [<ffffffff811ae7fc>] do_sys_open+0x13c/0x230
[ 3774.650037]  [<ffffffff811ae912>] SyS_open+0x22/0x30
[ 3774.650040]  [<ffffffff81675819>] system_call_fastpath+0x16/0x1b

`zfs_mknode()` grabbed an object hash mutex via `ZFS_OBJ_HOLD_ENTER()`,
tried to allocate a znode with `zfs_znode_alloc()` and entered direct
reclaim, which tried to do `ZFS_OBJ_HOLD_ENTER()`. This is an edge case
that the kmem-rework missed. Consequently, it is a regression from
79c76d5.

We can fix this by making `ZFS_OBJ_HOLD_ENTER()` and
`ZFS_OBJ_HOLD_EXIT()` do calls to `spl_fstrans_mark()` and
`spl_fstrans_unmark()` respectively. We resolve this by allocating an
array for each superblock to hold the cookies.  Each cookie is protected
by the corresponding `->z_hold_mtx`.

Closes openzfs#3331

Signed-off-by: Richard Yao <ryao@gentoo.org>
  • Loading branch information
ryao committed Apr 22, 2015
1 parent b467db4 commit 7e692d8
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/sys/zfs_vfsops.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ typedef struct zfs_sb {
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
#define ZFS_OBJ_MTX_SZ 256
kmutex_t *z_hold_mtx; /* znode hold locks */
fstrans_cookie_t *z_hold_cookie; /* znode hold cookie */
} zfs_sb_t;

#define ZFS_SUPER_MAGIC 0x2fc12fc1
Expand Down
7 changes: 6 additions & 1 deletion include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,11 +279,16 @@ typedef struct znode {
#define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
#define ZFS_OBJ_MUTEX(zsb, obj_num) \
(&(zsb)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
#define ZFS_OBJ_COOKIE(zsb, obj_num) \
(&(zsb)->z_hold_cookie[ZFS_OBJ_HASH(obj_num)])
#define ZFS_OBJ_HOLD_ENTER(zsb, obj_num) \
mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num))) \
ZFS_OBJ_COOKIE(zsb, (obj_num)) = spl_fstrans_mark();

#define ZFS_OBJ_HOLD_TRYENTER(zsb, obj_num) \
mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
#define ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \
spl_fstrans_unmark(ZFS_OBJ_COOKIE(zsb, (obj_num))); \
mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num)))
#define ZFS_OBJ_HOLD_OWNED(zsb, obj_num) \
mutex_owned(ZFS_OBJ_MUTEX((zsb), (obj_num)))
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp)

zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ,
KM_SLEEP);
zsb->z_hold_cookie = vmem_zalloc(sizeof (fstrans_cookie_t) *
ZFS_OBJ_MTX_SZ, KM_SLEEP);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);

Expand Down Expand Up @@ -898,6 +900,8 @@ zfs_sb_free(zfs_sb_t *zsb)
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zsb->z_hold_mtx[i]);
vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
vmem_free(zsb->z_hold_cookie, sizeof (fstrans_cookie_t) *
ZFS_OBJ_MTX_SZ);
mutex_destroy(&zsb->z_ctldir_lock);
avl_destroy(&zsb->z_ctldir_snaps);
kmem_free(zsb, sizeof (zfs_sb_t));
Expand Down

0 comments on commit 7e692d8

Please sign in to comment.