diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ac04ef97eac2..bd925180a2b0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -284,18 +284,32 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) u32 xor_sum = 0; if (!get_ldev(mdev)) { - dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); + dev_err(DEV, + "disk is %s, cannot start al transaction (-%d +%d)\n", + drbd_disk_str(mdev->state.disk), evicted, new_enr); complete(&((struct update_al_work *)w)->event); return 1; } /* do we have to do a bitmap write, first? * TODO reduce maximum latency: * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. */ + * instead of doing two synchronous sector writes. + * For now, we must not write the transaction, + * if we cannot write out the bitmap of the evicted extent. */ if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); - mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ + /* The bitmap write may have failed, causing a state change. */ + if (mdev->state.disk < D_INCONSISTENT) { + dev_err(DEV, + "disk is %s, cannot write al transaction (-%d +%d)\n", + drbd_disk_str(mdev->state.disk), evicted, new_enr); + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } + + mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ buffer = (struct al_transaction *)page_address(mdev->md_io_page); buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); @@ -739,7 +753,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) unsigned int enr; unsigned long add = 0; char ppb[10]; - int i; + int i, tmp; wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -747,7 +761,9 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; - add += drbd_bm_ALe_set_all(mdev, enr); + tmp = drbd_bm_ALe_set_all(mdev, enr); + dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); + add += tmp; } lc_unlock(mdev->act_log); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 04a823b01da5..1146faa7ae38 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1995,10 +1995,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned break; } - if (mdev->state.pdsk == D_DISKLESS) { + if (mdev->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ drbd_set_out_of_sync(mdev, e->sector, e->size); e->flags |= EE_CALL_AL_COMPLETE_IO; + e->flags &= ~EE_MAY_SET_IN_SYNC; drbd_al_begin_io(mdev, e->sector); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9e91a2545fc8..d26b213dbf15 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -942,12 +942,21 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) if (local) { req->private_bio->bi_bdev = mdev->ldev->backing_bdev; - if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR - : rw == READ ? DRBD_FAULT_DT_RD - : DRBD_FAULT_DT_RA)) + /* State may have changed since we grabbed our reference on the + * mdev->ldev member. Double check, and short-circuit to endio. + * In case the last activity log transaction failed to get on + * stable storage, and this is a WRITE, we may not even submit + * this bio. */ + if (get_ldev(mdev)) { + if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR + : rw == READ ? DRBD_FAULT_DT_RD + : DRBD_FAULT_DT_RA)) + bio_endio(req->private_bio, -EIO); + else + generic_make_request(req->private_bio); + put_ldev(mdev); + } else bio_endio(req->private_bio, -EIO); - else - generic_make_request(req->private_bio); } /* we need to plug ALWAYS since we possibly need to kick lo_dev.