forked from luck/tmp_suning_uos_patched
ceph: fix leaked inode ref due to snap metadata writeback race
We create a ceph_cap_snap if there is dirty cap metadata (for writeback to mds) OR dirty pages (for writeback to osd). It is thus possible that the metadata has been written back to the MDS but the OSD data has not when the cap_snap is created. This results in a cap_snap with dirty(caps) == 0. The problem is that cap writeback to the MDS isn't necessary, and a FLUSHSNAP cap op gets no ack from the MDS. This leaves the cap_snap attached to the inode along with its inode reference. Fix the problem by dropping the cap_snap if it becomes 'complete' (all pages written out) and dirty(caps) == 0 in ceph_put_wrbuffer_cap_refs(). Also, BUG() in __ceph_flush_snaps() if we encounter a cap_snap with dirty(caps) == 0. Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
parent
6298a33757
commit
819ccbfa44
|
@ -1204,6 +1204,12 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
|
|||
if (capsnap->dirty_pages || capsnap->writing)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* if cap writeback already occurred, we should have dropped
|
||||
* the capsnap in ceph_put_wrbuffer_cap_refs.
|
||||
*/
|
||||
BUG_ON(capsnap->dirty == 0);
|
||||
|
||||
/* pick mds, take s_mutex */
|
||||
mds = __ceph_get_cap_mds(ci, &mseq);
|
||||
if (session && session->s_mds != mds) {
|
||||
|
@ -2117,8 +2123,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
|
|||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
|
||||
last ? "last" : "");
|
||||
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
|
||||
last ? " last" : "", put ? " put" : "");
|
||||
|
||||
if (last && !flushsnaps)
|
||||
ceph_check_caps(ci, 0, NULL);
|
||||
|
@ -2142,7 +2148,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
|||
{
|
||||
struct inode *inode = &ci->vfs_inode;
|
||||
int last = 0;
|
||||
int last_snap = 0;
|
||||
int complete_capsnap = 0;
|
||||
int drop_capsnap = 0;
|
||||
int found = 0;
|
||||
struct ceph_cap_snap *capsnap = NULL;
|
||||
|
||||
|
@ -2165,19 +2172,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
|||
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
|
||||
if (capsnap->context == snapc) {
|
||||
found = 1;
|
||||
capsnap->dirty_pages -= nr;
|
||||
last_snap = !capsnap->dirty_pages;
|
||||
break;
|
||||
}
|
||||
}
|
||||
BUG_ON(!found);
|
||||
capsnap->dirty_pages -= nr;
|
||||
if (capsnap->dirty_pages == 0) {
|
||||
complete_capsnap = 1;
|
||||
if (capsnap->dirty == 0)
|
||||
/* cap writeback completed before we created
|
||||
* the cap_snap; no FLUSHSNAP is needed */
|
||||
drop_capsnap = 1;
|
||||
}
|
||||
dout("put_wrbuffer_cap_refs on %p cap_snap %p "
|
||||
" snap %lld %d/%d -> %d/%d %s%s\n",
|
||||
" snap %lld %d/%d -> %d/%d %s%s%s\n",
|
||||
inode, capsnap, capsnap->context->seq,
|
||||
ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
|
||||
ci->i_wrbuffer_ref, capsnap->dirty_pages,
|
||||
last ? " (wrbuffer last)" : "",
|
||||
last_snap ? " (capsnap last)" : "");
|
||||
complete_capsnap ? " (complete capsnap)" : "",
|
||||
drop_capsnap ? " (drop capsnap)" : "");
|
||||
if (drop_capsnap) {
|
||||
ceph_put_snap_context(capsnap->context);
|
||||
list_del(&capsnap->ci_item);
|
||||
list_del(&capsnap->flushing_item);
|
||||
ceph_put_cap_snap(capsnap);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
@ -2185,10 +2205,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
|
|||
if (last) {
|
||||
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
|
||||
iput(inode);
|
||||
} else if (last_snap) {
|
||||
} else if (complete_capsnap) {
|
||||
ceph_flush_snaps(ci);
|
||||
wake_up(&ci->i_cap_wq);
|
||||
}
|
||||
if (drop_capsnap)
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2464,8 +2486,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
|
|||
break;
|
||||
}
|
||||
WARN_ON(capsnap->dirty_pages || capsnap->writing);
|
||||
dout(" removing cap_snap %p follows %lld\n",
|
||||
capsnap, follows);
|
||||
dout(" removing %p cap_snap %p follows %lld\n",
|
||||
inode, capsnap, follows);
|
||||
ceph_put_snap_context(capsnap->context);
|
||||
list_del(&capsnap->ci_item);
|
||||
list_del(&capsnap->flushing_item);
|
||||
|
|
|
@ -521,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
|
|||
capsnap->ctime = inode->i_ctime;
|
||||
capsnap->time_warp_seq = ci->i_time_warp_seq;
|
||||
if (capsnap->dirty_pages) {
|
||||
dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu "
|
||||
dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
|
||||
"still has %d dirty pages\n", inode, capsnap,
|
||||
capsnap->context, capsnap->context->seq,
|
||||
capsnap->size, capsnap->dirty_pages);
|
||||
ceph_cap_string(capsnap->dirty), capsnap->size,
|
||||
capsnap->dirty_pages);
|
||||
return 0;
|
||||
}
|
||||
dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n",
|
||||
dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
|
||||
inode, capsnap, capsnap->context,
|
||||
capsnap->context->seq, capsnap->size);
|
||||
capsnap->context->seq, ceph_cap_string(capsnap->dirty),
|
||||
capsnap->size);
|
||||
|
||||
spin_lock(&mdsc->snap_flush_lock);
|
||||
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
|
||||
|
|
Loading…
Reference in New Issue
Block a user