diff --git a/ipc/mqueue.c b/ipc/mqueue.c index af1692556c52..8ce57691e7b6 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -69,6 +69,7 @@ struct mqueue_inode_info { wait_queue_head_t wait_q; struct rb_root msg_tree; + struct posix_msg_tree_node *node_cache; struct mq_attr attr; struct sigevent notify; @@ -134,15 +135,20 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) else p = &(*p)->rb_right; } - leaf = kzalloc(sizeof(*leaf), GFP_ATOMIC); - if (!leaf) - return -ENOMEM; - rb_init_node(&leaf->rb_node); - INIT_LIST_HEAD(&leaf->msg_list); + if (info->node_cache) { + leaf = info->node_cache; + info->node_cache = NULL; + } else { + leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC); + if (!leaf) + return -ENOMEM; + rb_init_node(&leaf->rb_node); + INIT_LIST_HEAD(&leaf->msg_list); + info->qsize += sizeof(*leaf); + } leaf->priority = msg->m_type; rb_link_node(&leaf->rb_node, parent, p); rb_insert_color(&leaf->rb_node, &info->msg_tree); - info->qsize += sizeof(struct posix_msg_tree_node); insert_msg: info->attr.mq_curmsgs++; info->qsize += msg->m_ts; @@ -177,13 +183,17 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) return NULL; } leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); - if (list_empty(&leaf->msg_list)) { + if (unlikely(list_empty(&leaf->msg_list))) { pr_warn_once("Inconsistency in POSIX message queue, " "empty leaf node but we haven't implemented " "lazy leaf delete!\n"); rb_erase(&leaf->rb_node, &info->msg_tree); - info->qsize -= sizeof(struct posix_msg_tree_node); - kfree(leaf); + if (info->node_cache) { + info->qsize -= sizeof(*leaf); + kfree(leaf); + } else { + info->node_cache = leaf; + } goto try_again; } else { msg = list_first_entry(&leaf->msg_list, @@ -191,8 +201,12 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) list_del(&msg->m_list); if (list_empty(&leaf->msg_list)) { rb_erase(&leaf->rb_node, &info->msg_tree); - info->qsize -= sizeof(struct posix_msg_tree_node); - kfree(leaf); + if (info->node_cache) { + info->qsize -= sizeof(*leaf); + kfree(leaf); + } else { + info->node_cache = leaf; + } } } info->attr.mq_curmsgs--; @@ -235,6 +249,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->qsize = 0; info->user = NULL; /* set when all is ok */ info->msg_tree = RB_ROOT; + info->node_cache = NULL; memset(&info->attr, 0, sizeof(info->attr)); info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, ipc_ns->mq_msg_default); @@ -367,6 +382,7 @@ static void mqueue_evict_inode(struct inode *inode) spin_lock(&info->lock); while ((msg = msg_get(info)) != NULL) free_msg(msg); + kfree(info->node_cache); spin_unlock(&info->lock); /* Total amount of bytes accounted for the mqueue */ @@ -964,7 +980,8 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, struct mqueue_inode_info *info; ktime_t expires, *timeout = NULL; struct timespec ts; - int ret; + struct posix_msg_tree_node *new_leaf = NULL; + int ret = 0; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1012,39 +1029,60 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, msg_ptr->m_ts = msg_len; msg_ptr->m_type = msg_prio; + /* + * msg_insert really wants us to have a valid, spare node struct so + * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will + * fall back to that if necessary. + */ + if (!info->node_cache) + new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); + spin_lock(&info->lock); + if (!info->node_cache && new_leaf) { + /* Save our speculative allocation into the cache */ + rb_init_node(&new_leaf->rb_node); + INIT_LIST_HEAD(&new_leaf->msg_list); + info->node_cache = new_leaf; + info->qsize += sizeof(*new_leaf); + new_leaf = NULL; + } else { + kfree(new_leaf); + } + if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { if (filp->f_flags & O_NONBLOCK) { - spin_unlock(&info->lock); ret = -EAGAIN; } else { wait.task = current; wait.msg = (void *) msg_ptr; wait.state = STATE_NONE; ret = wq_sleep(info, SEND, timeout, &wait); + /* + * wq_sleep must be called with info->lock held, and + * returns with the lock released + */ + goto out_free; } - if (ret < 0) - free_msg(msg_ptr); } else { receiver = wq_get_first_waiter(info, RECV); if (receiver) { pipelined_send(info, msg_ptr, receiver); } else { /* adds message to the queue */ - if (msg_insert(msg_ptr, info)) { - free_msg(msg_ptr); - ret = -ENOMEM; - spin_unlock(&info->lock); - goto out_fput; - } + ret = msg_insert(msg_ptr, info); + if (ret) + goto out_unlock; __do_notify(info); } inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - spin_unlock(&info->lock); - ret = 0; } +out_unlock: + spin_unlock(&info->lock); +out_free: + if (ret) + free_msg(msg_ptr); out_fput: fput(filp); out: @@ -1063,6 +1101,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, struct ext_wait_queue wait; ktime_t expires, *timeout = NULL; struct timespec ts; + struct posix_msg_tree_node *new_leaf = NULL; if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1098,7 +1137,26 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, goto out_fput; } + /* + * msg_insert really wants us to have a valid, spare node struct so + * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will + * fall back to that if necessary. + */ + if (!info->node_cache) + new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); + spin_lock(&info->lock); + + if (!info->node_cache && new_leaf) { + /* Save our speculative allocation into the cache */ + rb_init_node(&new_leaf->rb_node); + INIT_LIST_HEAD(&new_leaf->msg_list); + info->node_cache = new_leaf; + info->qsize += sizeof(*new_leaf); + } else { + kfree(new_leaf); + } + if (info->attr.mq_curmsgs == 0) { if (filp->f_flags & O_NONBLOCK) { spin_unlock(&info->lock);