libceph: store timeouts in jiffies, verify user input

There are currently three libceph-level timeouts that the user can
specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive.  All of
these are in seconds and no checking is done on user input: negative
values are accepted, we multiply them all by HZ which may or may not
overflow, arbitrarily large jiffies then get added together, etc.

There is also a bug in the way mount_timeout=0 is handled.  It's
supposed to mean "infinite timeout", but that's not how wait.h APIs
treat it and so __ceph_open_session() for example will busy loop
without much chance of being interrupted if none of ceph-mons are
there.

Fix all this by verifying user input, storing timeouts capped by
msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies()
helper for all user-specified waits to handle infinite timeouts
correctly.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
This commit is contained in:
Ilya Dryomov 2015-05-15 12:02:17 +03:00
parent d50c97b566
commit a319bf56a6
9 changed files with 71 additions and 38 deletions

View File

@ -4963,8 +4963,8 @@ static int rbd_add_parse_args(const char *buf,
*/
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{
struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch;
unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0;
int ret;
@ -4979,7 +4979,8 @@ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
newest_epoch, timeout);
newest_epoch,
opts->mount_timeout);
goto again;
} else {
/* the osdmap we have is new enough */

View File

@ -1260,7 +1260,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
if (req->r_timeout) {
unsigned long time_left = wait_for_completion_timeout(
&req->r_safe_completion,
req->r_timeout);
ceph_timeout_jiffies(req->r_timeout));
if (time_left > 0)
ret = 0;
else

View File

@ -2268,7 +2268,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
dout("do_request waiting\n");
if (req->r_timeout) {
err = (long)wait_for_completion_killable_timeout(
&req->r_completion, req->r_timeout);
&req->r_completion,
ceph_timeout_jiffies(req->r_timeout));
if (err == 0)
err = -EIO;
} else if (req->r_wait_for_completion) {
@ -3424,8 +3425,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
*/
static void wait_requests(struct ceph_mds_client *mdsc)
{
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_request *req;
struct ceph_fs_client *fsc = mdsc->fsc;
mutex_lock(&mdsc->mutex);
if (__get_oldest_req(mdsc)) {
@ -3433,7 +3434,7 @@ static void wait_requests(struct ceph_mds_client *mdsc)
dout("wait_requests waiting for requests\n");
wait_for_completion_timeout(&mdsc->safe_umount_waiters,
fsc->client->options->mount_timeout * HZ);
ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining requests */
mutex_lock(&mdsc->mutex);
@ -3556,10 +3557,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc)
*/
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_session *session;
int i;
struct ceph_fs_client *fsc = mdsc->fsc;
unsigned long timeout = fsc->client->options->mount_timeout * HZ;
dout("close_sessions\n");
@ -3580,7 +3580,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
dout("waiting for sessions to close\n");
wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
timeout);
ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining sessions */
mutex_lock(&mdsc->mutex);

View File

@ -227,7 +227,7 @@ struct ceph_mds_request {
int r_err;
bool r_aborted;
unsigned long r_timeout; /* optional. jiffies */
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */
unsigned long r_request_started; /* start time for mds request only,
used to measure lease durations */

View File

@ -742,7 +742,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
req->r_timeout = fsc->client->options->mount_timeout * HZ;
req->r_timeout = fsc->client->options->mount_timeout;
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);

View File

@ -43,9 +43,9 @@ struct ceph_options {
int flags;
struct ceph_fsid fsid;
struct ceph_entity_addr my_addr;
int mount_timeout;
int osd_idle_ttl;
int osd_keepalive_timeout;
unsigned long mount_timeout; /* jiffies */
unsigned long osd_idle_ttl; /* jiffies */
unsigned long osd_keepalive_timeout; /* jiffies */
/*
* any type that can't be simply compared or doesn't need need
@ -63,9 +63,9 @@ struct ceph_options {
/*
* defaults
*/
#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
#define CEPH_OSD_KEEPALIVE_DEFAULT 5
#define CEPH_OSD_IDLE_TTL_DEFAULT 60
#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@ -93,6 +93,11 @@ enum {
CEPH_MOUNT_SHUTDOWN,
};
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
{
return timeout ?: MAX_SCHEDULE_TIMEOUT;
}
struct ceph_mds_client;
/*

View File

@ -352,8 +352,8 @@ ceph_parse_options(char *options, const char *dev_name,
/* start with defaults */
opt->flags = CEPH_OPT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@ -439,13 +439,32 @@ ceph_parse_options(char *options, const char *dev_name,
pr_warn("ignoring deprecated osdtimeout option\n");
break;
case Opt_osdkeepalivetimeout:
opt->osd_keepalive_timeout = intval;
/* 0 isn't well defined right now, reject it */
if (intval < 1 || intval > INT_MAX / 1000) {
pr_err("osdkeepalive out of range\n");
err = -EINVAL;
goto out;
}
opt->osd_keepalive_timeout =
msecs_to_jiffies(intval * 1000);
break;
case Opt_osd_idle_ttl:
opt->osd_idle_ttl = intval;
/* 0 isn't well defined right now, reject it */
if (intval < 1 || intval > INT_MAX / 1000) {
pr_err("osd_idle_ttl out of range\n");
err = -EINVAL;
goto out;
}
opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
break;
case Opt_mount_timeout:
opt->mount_timeout = intval;
/* 0 is "wait forever" (i.e. infinite timeout) */
if (intval < 0 || intval > INT_MAX / 1000) {
pr_err("mount_timeout out of range\n");
err = -EINVAL;
goto out;
}
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break;
case Opt_share:
@ -512,12 +531,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
seq_puts(m, "notcp_nodelay,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
seq_printf(m, "mount_timeout=%d,",
jiffies_to_msecs(opt->mount_timeout) / 1000);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
seq_printf(m, "osd_idle_ttl=%d,",
jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, "osdkeepalivetimeout=%d,",
opt->osd_keepalive_timeout);
jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
/* drop redundant comma */
if (m->count != pos)
@ -627,7 +648,7 @@ static int have_mon_and_osd_map(struct ceph_client *client)
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
int err;
unsigned long timeout = client->options->mount_timeout * HZ;
unsigned long timeout = client->options->mount_timeout;
/* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc);
@ -643,7 +664,7 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
timeout);
ceph_timeout_jiffies(timeout));
if (err == -EINTR || err == -ERESTARTSYS)
return err;
if (client->auth_err < 0)

View File

@ -298,6 +298,12 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
}
EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
/*
* Wait for an osdmap with a given epoch.
*
* @epoch: epoch to wait for
* @timeout: in jiffies, 0 means "wait forever"
*/
int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
unsigned long timeout)
{
@ -308,11 +314,12 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
while (monc->have_osdmap < epoch) {
mutex_unlock(&monc->mutex);
if (timeout != 0 && time_after_eq(jiffies, started + timeout))
if (timeout && time_after_eq(jiffies, started + timeout))
return -ETIMEDOUT;
ret = wait_event_interruptible_timeout(monc->client->auth_wq,
monc->have_osdmap >= epoch, timeout);
monc->have_osdmap >= epoch,
ceph_timeout_jiffies(timeout));
if (ret < 0)
return ret;

View File

@ -1097,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc,
BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
}
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@ -1208,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
{
schedule_delayed_work(&osdc->timeout_work,
osdc->client->options->osd_keepalive_timeout * HZ);
osdc->client->options->osd_keepalive_timeout);
}
static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@ -1576,10 +1576,9 @@ static void handle_timeout(struct work_struct *work)
{
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work);
struct ceph_options *opts = osdc->client->options;
struct ceph_osd_request *req;
struct ceph_osd *osd;
unsigned long keepalive =
osdc->client->options->osd_keepalive_timeout * HZ;
struct list_head slow_osds;
dout("timeout\n");
down_read(&osdc->map_sem);
@ -1595,7 +1594,8 @@ static void handle_timeout(struct work_struct *work)
*/
INIT_LIST_HEAD(&slow_osds);
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
if (time_before(jiffies, req->r_stamp + keepalive))
if (time_before(jiffies,
req->r_stamp + opts->osd_keepalive_timeout))
break;
osd = req->r_osd;
@ -1622,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work)
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client,
osds_timeout_work.work);
unsigned long delay =
osdc->client->options->osd_idle_ttl * HZ >> 2;
unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
dout("osds timeout\n");
down_read(&osdc->map_sem);
@ -2628,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
osdc->event_count = 0;
schedule_delayed_work(&osdc->osds_timeout_work,
round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
round_jiffies_relative(osdc->client->options->osd_idle_ttl));
err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10,