forked from luck/tmp_suning_uos_patched
41d7db0ab4
This patch removes the completion (which is rather large) from struct gdlm_lock in favour of using the wait_on_bit() functions. We don't need to add any extra fields to the structure to do this, so we save 32 bytes (on x86_64) per structure. This adds up to quite a lot when we may potentially have millions of these lock structures, Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Acked-by: David Teigland <teigland@redhat.com>
528 lines
12 KiB
C
528 lines
12 KiB
C
/*
|
|
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
|
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU General Public License version 2.
|
|
*/
|
|
|
|
#include "lock_dlm.h"
|
|
|
|
static char junk_lvb[GDLM_LVB_SIZE];
|
|
|
|
static void queue_complete(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_ls *ls = lp->ls;
|
|
|
|
clear_bit(LFL_ACTIVE, &lp->flags);
|
|
|
|
spin_lock(&ls->async_lock);
|
|
list_add_tail(&lp->clist, &ls->complete);
|
|
spin_unlock(&ls->async_lock);
|
|
wake_up(&ls->thread_wait);
|
|
}
|
|
|
|
static inline void gdlm_ast(void *astarg)
|
|
{
|
|
queue_complete(astarg);
|
|
}
|
|
|
|
static inline void gdlm_bast(void *astarg, int mode)
|
|
{
|
|
struct gdlm_lock *lp = astarg;
|
|
struct gdlm_ls *ls = lp->ls;
|
|
|
|
if (!mode) {
|
|
printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
|
|
lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number);
|
|
return;
|
|
}
|
|
|
|
spin_lock(&ls->async_lock);
|
|
if (!lp->bast_mode) {
|
|
list_add_tail(&lp->blist, &ls->blocking);
|
|
lp->bast_mode = mode;
|
|
} else if (lp->bast_mode < mode)
|
|
lp->bast_mode = mode;
|
|
spin_unlock(&ls->async_lock);
|
|
wake_up(&ls->thread_wait);
|
|
}
|
|
|
|
void gdlm_queue_delayed(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_ls *ls = lp->ls;
|
|
|
|
spin_lock(&ls->async_lock);
|
|
list_add_tail(&lp->delay_list, &ls->delayed);
|
|
spin_unlock(&ls->async_lock);
|
|
}
|
|
|
|
/* convert gfs lock-state to dlm lock-mode */
|
|
|
|
static s16 make_mode(s16 lmstate)
|
|
{
|
|
switch (lmstate) {
|
|
case LM_ST_UNLOCKED:
|
|
return DLM_LOCK_NL;
|
|
case LM_ST_EXCLUSIVE:
|
|
return DLM_LOCK_EX;
|
|
case LM_ST_DEFERRED:
|
|
return DLM_LOCK_CW;
|
|
case LM_ST_SHARED:
|
|
return DLM_LOCK_PR;
|
|
}
|
|
gdlm_assert(0, "unknown LM state %d", lmstate);
|
|
return -1;
|
|
}
|
|
|
|
/* convert dlm lock-mode to gfs lock-state */
|
|
|
|
s16 gdlm_make_lmstate(s16 dlmmode)
|
|
{
|
|
switch (dlmmode) {
|
|
case DLM_LOCK_IV:
|
|
case DLM_LOCK_NL:
|
|
return LM_ST_UNLOCKED;
|
|
case DLM_LOCK_EX:
|
|
return LM_ST_EXCLUSIVE;
|
|
case DLM_LOCK_CW:
|
|
return LM_ST_DEFERRED;
|
|
case DLM_LOCK_PR:
|
|
return LM_ST_SHARED;
|
|
}
|
|
gdlm_assert(0, "unknown DLM mode %d", dlmmode);
|
|
return -1;
|
|
}
|
|
|
|
/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
|
|
DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
|
|
|
|
static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
|
|
{
|
|
s16 cur = make_mode(cur_state);
|
|
if (lp->cur != DLM_LOCK_IV)
|
|
gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
|
|
}
|
|
|
|
static inline unsigned int make_flags(struct gdlm_lock *lp,
|
|
unsigned int gfs_flags,
|
|
s16 cur, s16 req)
|
|
{
|
|
unsigned int lkf = 0;
|
|
|
|
if (gfs_flags & LM_FLAG_TRY)
|
|
lkf |= DLM_LKF_NOQUEUE;
|
|
|
|
if (gfs_flags & LM_FLAG_TRY_1CB) {
|
|
lkf |= DLM_LKF_NOQUEUE;
|
|
lkf |= DLM_LKF_NOQUEUEBAST;
|
|
}
|
|
|
|
if (gfs_flags & LM_FLAG_PRIORITY) {
|
|
lkf |= DLM_LKF_NOORDER;
|
|
lkf |= DLM_LKF_HEADQUE;
|
|
}
|
|
|
|
if (gfs_flags & LM_FLAG_ANY) {
|
|
if (req == DLM_LOCK_PR)
|
|
lkf |= DLM_LKF_ALTCW;
|
|
else if (req == DLM_LOCK_CW)
|
|
lkf |= DLM_LKF_ALTPR;
|
|
}
|
|
|
|
if (lp->lksb.sb_lkid != 0) {
|
|
lkf |= DLM_LKF_CONVERT;
|
|
|
|
/* Conversion deadlock avoidance by DLM */
|
|
|
|
if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
|
|
!(lkf & DLM_LKF_NOQUEUE) &&
|
|
cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
|
|
lkf |= DLM_LKF_CONVDEADLK;
|
|
}
|
|
|
|
if (lp->lvb)
|
|
lkf |= DLM_LKF_VALBLK;
|
|
|
|
return lkf;
|
|
}
|
|
|
|
/* make_strname - convert GFS lock numbers to a string */
|
|
|
|
static inline void make_strname(const struct lm_lockname *lockname,
|
|
struct gdlm_strname *str)
|
|
{
|
|
sprintf(str->name, "%8x%16llx", lockname->ln_type,
|
|
(unsigned long long)lockname->ln_number);
|
|
str->namelen = GDLM_STRNAME_BYTES;
|
|
}
|
|
|
|
static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
|
|
struct gdlm_lock **lpp)
|
|
{
|
|
struct gdlm_lock *lp;
|
|
|
|
lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
|
|
if (!lp)
|
|
return -ENOMEM;
|
|
|
|
lp->lockname = *name;
|
|
make_strname(name, &lp->strname);
|
|
lp->ls = ls;
|
|
lp->cur = DLM_LOCK_IV;
|
|
lp->lvb = NULL;
|
|
lp->hold_null = NULL;
|
|
INIT_LIST_HEAD(&lp->clist);
|
|
INIT_LIST_HEAD(&lp->blist);
|
|
INIT_LIST_HEAD(&lp->delay_list);
|
|
|
|
spin_lock(&ls->async_lock);
|
|
list_add(&lp->all_list, &ls->all_locks);
|
|
ls->all_locks_count++;
|
|
spin_unlock(&ls->async_lock);
|
|
|
|
*lpp = lp;
|
|
return 0;
|
|
}
|
|
|
|
void gdlm_delete_lp(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_ls *ls = lp->ls;
|
|
|
|
spin_lock(&ls->async_lock);
|
|
if (!list_empty(&lp->clist))
|
|
list_del_init(&lp->clist);
|
|
if (!list_empty(&lp->blist))
|
|
list_del_init(&lp->blist);
|
|
if (!list_empty(&lp->delay_list))
|
|
list_del_init(&lp->delay_list);
|
|
gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number);
|
|
list_del_init(&lp->all_list);
|
|
ls->all_locks_count--;
|
|
spin_unlock(&ls->async_lock);
|
|
|
|
kfree(lp);
|
|
}
|
|
|
|
int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
|
|
void **lockp)
|
|
{
|
|
struct gdlm_lock *lp;
|
|
int error;
|
|
|
|
error = gdlm_create_lp(lockspace, name, &lp);
|
|
|
|
*lockp = lp;
|
|
return error;
|
|
}
|
|
|
|
void gdlm_put_lock(void *lock)
|
|
{
|
|
gdlm_delete_lp(lock);
|
|
}
|
|
|
|
unsigned int gdlm_do_lock(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_ls *ls = lp->ls;
|
|
int error, bast = 1;
|
|
|
|
/*
|
|
* When recovery is in progress, delay lock requests for submission
|
|
* once recovery is done. Requests for recovery (NOEXP) and unlocks
|
|
* can pass.
|
|
*/
|
|
|
|
if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
|
|
!test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
|
|
gdlm_queue_delayed(lp);
|
|
return LM_OUT_ASYNC;
|
|
}
|
|
|
|
/*
|
|
* Submit the actual lock request.
|
|
*/
|
|
|
|
if (test_bit(LFL_NOBAST, &lp->flags))
|
|
bast = 0;
|
|
|
|
set_bit(LFL_ACTIVE, &lp->flags);
|
|
|
|
log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
|
|
lp->cur, lp->req, lp->lkf);
|
|
|
|
error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
|
|
lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
|
|
lp, bast ? gdlm_bast : NULL);
|
|
|
|
if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
|
|
lp->lksb.sb_status = -EAGAIN;
|
|
queue_complete(lp);
|
|
error = 0;
|
|
}
|
|
|
|
if (error) {
|
|
log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
|
|
"flags=%lx", ls->fsname, lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, error,
|
|
lp->cur, lp->req, lp->lkf, lp->flags);
|
|
return LM_OUT_ERROR;
|
|
}
|
|
return LM_OUT_ASYNC;
|
|
}
|
|
|
|
static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_ls *ls = lp->ls;
|
|
unsigned int lkf = 0;
|
|
int error;
|
|
|
|
set_bit(LFL_DLM_UNLOCK, &lp->flags);
|
|
set_bit(LFL_ACTIVE, &lp->flags);
|
|
|
|
if (lp->lvb)
|
|
lkf = DLM_LKF_VALBLK;
|
|
|
|
log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number,
|
|
lp->lksb.sb_lkid, lp->cur, lkf);
|
|
|
|
error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
|
|
|
|
if (error) {
|
|
log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
|
|
"flags=%lx", ls->fsname, lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, error,
|
|
lp->cur, lp->req, lp->lkf, lp->flags);
|
|
return LM_OUT_ERROR;
|
|
}
|
|
return LM_OUT_ASYNC;
|
|
}
|
|
|
|
unsigned int gdlm_lock(void *lock, unsigned int cur_state,
|
|
unsigned int req_state, unsigned int flags)
|
|
{
|
|
struct gdlm_lock *lp = lock;
|
|
|
|
clear_bit(LFL_DLM_CANCEL, &lp->flags);
|
|
if (flags & LM_FLAG_NOEXP)
|
|
set_bit(LFL_NOBLOCK, &lp->flags);
|
|
|
|
check_cur_state(lp, cur_state);
|
|
lp->req = make_mode(req_state);
|
|
lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
|
|
|
|
return gdlm_do_lock(lp);
|
|
}
|
|
|
|
unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
|
|
{
|
|
struct gdlm_lock *lp = lock;
|
|
|
|
clear_bit(LFL_DLM_CANCEL, &lp->flags);
|
|
if (lp->cur == DLM_LOCK_IV)
|
|
return 0;
|
|
return gdlm_do_unlock(lp);
|
|
}
|
|
|
|
void gdlm_cancel(void *lock)
|
|
{
|
|
struct gdlm_lock *lp = lock;
|
|
struct gdlm_ls *ls = lp->ls;
|
|
int error, delay_list = 0;
|
|
|
|
if (test_bit(LFL_DLM_CANCEL, &lp->flags))
|
|
return;
|
|
|
|
log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, lp->flags);
|
|
|
|
spin_lock(&ls->async_lock);
|
|
if (!list_empty(&lp->delay_list)) {
|
|
list_del_init(&lp->delay_list);
|
|
delay_list = 1;
|
|
}
|
|
spin_unlock(&ls->async_lock);
|
|
|
|
if (delay_list) {
|
|
set_bit(LFL_CANCEL, &lp->flags);
|
|
set_bit(LFL_ACTIVE, &lp->flags);
|
|
queue_complete(lp);
|
|
return;
|
|
}
|
|
|
|
if (!test_bit(LFL_ACTIVE, &lp->flags) ||
|
|
test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
|
|
log_info("gdlm_cancel skip %x,%llx flags %lx",
|
|
lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, lp->flags);
|
|
return;
|
|
}
|
|
|
|
/* the lock is blocked in the dlm */
|
|
|
|
set_bit(LFL_DLM_CANCEL, &lp->flags);
|
|
set_bit(LFL_ACTIVE, &lp->flags);
|
|
|
|
error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
|
|
NULL, lp);
|
|
|
|
log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
|
|
lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number, lp->flags);
|
|
|
|
if (error == -EBUSY)
|
|
clear_bit(LFL_DLM_CANCEL, &lp->flags);
|
|
}
|
|
|
|
static int gdlm_add_lvb(struct gdlm_lock *lp)
|
|
{
|
|
char *lvb;
|
|
|
|
lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
|
|
if (!lvb)
|
|
return -ENOMEM;
|
|
|
|
lp->lksb.sb_lvbptr = lvb;
|
|
lp->lvb = lvb;
|
|
return 0;
|
|
}
|
|
|
|
static void gdlm_del_lvb(struct gdlm_lock *lp)
|
|
{
|
|
kfree(lp->lvb);
|
|
lp->lvb = NULL;
|
|
lp->lksb.sb_lvbptr = NULL;
|
|
}
|
|
|
|
static int gdlm_ast_wait(void *word)
|
|
{
|
|
schedule();
|
|
return 0;
|
|
}
|
|
|
|
/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
|
|
the completion) because gfs won't call hold_lvb() during a callback (from
|
|
the context of a lock_dlm thread). */
|
|
|
|
static int hold_null_lock(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_lock *lpn = NULL;
|
|
int error;
|
|
|
|
if (lp->hold_null) {
|
|
printk(KERN_INFO "lock_dlm: lvb already held\n");
|
|
return 0;
|
|
}
|
|
|
|
error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
|
|
if (error)
|
|
goto out;
|
|
|
|
lpn->lksb.sb_lvbptr = junk_lvb;
|
|
lpn->lvb = junk_lvb;
|
|
|
|
lpn->req = DLM_LOCK_NL;
|
|
lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
|
|
set_bit(LFL_NOBAST, &lpn->flags);
|
|
set_bit(LFL_INLOCK, &lpn->flags);
|
|
set_bit(LFL_AST_WAIT, &lpn->flags);
|
|
|
|
gdlm_do_lock(lpn);
|
|
wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
|
|
error = lpn->lksb.sb_status;
|
|
if (error) {
|
|
printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
|
|
error);
|
|
gdlm_delete_lp(lpn);
|
|
lpn = NULL;
|
|
}
|
|
out:
|
|
lp->hold_null = lpn;
|
|
return error;
|
|
}
|
|
|
|
/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
|
|
the completion) because gfs may call unhold_lvb() during a callback (from
|
|
the context of a lock_dlm thread) which could cause a deadlock since the
|
|
other lock_dlm thread could be engaged in recovery. */
|
|
|
|
static void unhold_null_lock(struct gdlm_lock *lp)
|
|
{
|
|
struct gdlm_lock *lpn = lp->hold_null;
|
|
|
|
gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
|
|
(unsigned long long)lp->lockname.ln_number);
|
|
lpn->lksb.sb_lvbptr = NULL;
|
|
lpn->lvb = NULL;
|
|
set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
|
|
gdlm_do_unlock(lpn);
|
|
lp->hold_null = NULL;
|
|
}
|
|
|
|
/* Acquire a NL lock because gfs requires the value block to remain
|
|
intact on the resource while the lvb is "held" even if it's holding no locks
|
|
on the resource. */
|
|
|
|
int gdlm_hold_lvb(void *lock, char **lvbp)
|
|
{
|
|
struct gdlm_lock *lp = lock;
|
|
int error;
|
|
|
|
error = gdlm_add_lvb(lp);
|
|
if (error)
|
|
return error;
|
|
|
|
*lvbp = lp->lvb;
|
|
|
|
error = hold_null_lock(lp);
|
|
if (error)
|
|
gdlm_del_lvb(lp);
|
|
|
|
return error;
|
|
}
|
|
|
|
void gdlm_unhold_lvb(void *lock, char *lvb)
|
|
{
|
|
struct gdlm_lock *lp = lock;
|
|
|
|
unhold_null_lock(lp);
|
|
gdlm_del_lvb(lp);
|
|
}
|
|
|
|
void gdlm_submit_delayed(struct gdlm_ls *ls)
|
|
{
|
|
struct gdlm_lock *lp, *safe;
|
|
|
|
spin_lock(&ls->async_lock);
|
|
list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
|
|
list_del_init(&lp->delay_list);
|
|
list_add_tail(&lp->delay_list, &ls->submit);
|
|
}
|
|
spin_unlock(&ls->async_lock);
|
|
wake_up(&ls->thread_wait);
|
|
}
|
|
|
|
int gdlm_release_all_locks(struct gdlm_ls *ls)
|
|
{
|
|
struct gdlm_lock *lp, *safe;
|
|
int count = 0;
|
|
|
|
spin_lock(&ls->async_lock);
|
|
list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
|
|
list_del_init(&lp->all_list);
|
|
|
|
if (lp->lvb && lp->lvb != junk_lvb)
|
|
kfree(lp->lvb);
|
|
kfree(lp);
|
|
count++;
|
|
}
|
|
spin_unlock(&ls->async_lock);
|
|
|
|
return count;
|
|
}
|
|
|