forked from luck/tmp_suning_uos_patched
4e717f5c10
The list_lru implementation has one function, list_lru_dispose_all, with only one user (the dentry code). At first, such function appears to make sense because we are really not interested in the result of isolating each dentry separately - all of them are going away anyway. However, it's implementation is buggy in the following way: When we call list_lru_dispose_all in fs/dcache.c, we scan all dentries marking them with DCACHE_SHRINK_LIST. However, this is done without the nlru->lock taken. The imediate result of that is that someone else may add or remove the dentry from the LRU at the same time. When list_lru_del happens in that scenario we will see an element that is not yet marked with DCACHE_SHRINK_LIST (even though it will be in the future) and obviously remove it from an lru where the element no longer is. Since list_lru_dispose_all will in effect count down nlru's nr_items and list_lru_del will do the same, this will lead to an imbalance. The solution for this would not be so simple: we can obviously just keep the lru_lock taken, but then we have no guarantees that we will be able to acquire the dentry lock (dentry->d_lock). To properly solve this, we need a communication mechanism between the lru and dentry code, so they can coordinate this with each other. Such mechanism already exists in the form of the list_lru_walk_cb callback. So it is possible to construct a dcache-side prune function that does the right thing only by calling list_lru_walk in a loop until no more dentries are available. With only one user, plus the fact that a sane solution for the problem would involve boucing between dcache and list_lru anyway, I see little justification to keep the special case list_lru_dispose_all in tree. Signed-off-by: Glauber Costa <glommer@openvz.org> Cc: Michal Hocko <mhocko@suse.cz> Acked-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
128 lines
2.8 KiB
C
128 lines
2.8 KiB
C
/*
|
|
* Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
|
|
* Authors: David Chinner and Glauber Costa
|
|
*
|
|
* Generic LRU infrastructure
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/list_lru.h>
|
|
|
|
bool list_lru_add(struct list_lru *lru, struct list_head *item)
|
|
{
|
|
int nid = page_to_nid(virt_to_page(item));
|
|
struct list_lru_node *nlru = &lru->node[nid];
|
|
|
|
spin_lock(&nlru->lock);
|
|
WARN_ON_ONCE(nlru->nr_items < 0);
|
|
if (list_empty(item)) {
|
|
list_add_tail(item, &nlru->list);
|
|
if (nlru->nr_items++ == 0)
|
|
node_set(nid, lru->active_nodes);
|
|
spin_unlock(&nlru->lock);
|
|
return true;
|
|
}
|
|
spin_unlock(&nlru->lock);
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(list_lru_add);
|
|
|
|
bool list_lru_del(struct list_lru *lru, struct list_head *item)
|
|
{
|
|
int nid = page_to_nid(virt_to_page(item));
|
|
struct list_lru_node *nlru = &lru->node[nid];
|
|
|
|
spin_lock(&nlru->lock);
|
|
if (!list_empty(item)) {
|
|
list_del_init(item);
|
|
if (--nlru->nr_items == 0)
|
|
node_clear(nid, lru->active_nodes);
|
|
WARN_ON_ONCE(nlru->nr_items < 0);
|
|
spin_unlock(&nlru->lock);
|
|
return true;
|
|
}
|
|
spin_unlock(&nlru->lock);
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(list_lru_del);
|
|
|
|
unsigned long
|
|
list_lru_count_node(struct list_lru *lru, int nid)
|
|
{
|
|
unsigned long count = 0;
|
|
struct list_lru_node *nlru = &lru->node[nid];
|
|
|
|
spin_lock(&nlru->lock);
|
|
WARN_ON_ONCE(nlru->nr_items < 0);
|
|
count += nlru->nr_items;
|
|
spin_unlock(&nlru->lock);
|
|
|
|
return count;
|
|
}
|
|
EXPORT_SYMBOL_GPL(list_lru_count_node);
|
|
|
|
unsigned long
|
|
list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate,
|
|
void *cb_arg, unsigned long *nr_to_walk)
|
|
{
|
|
|
|
struct list_lru_node *nlru = &lru->node[nid];
|
|
struct list_head *item, *n;
|
|
unsigned long isolated = 0;
|
|
|
|
spin_lock(&nlru->lock);
|
|
restart:
|
|
list_for_each_safe(item, n, &nlru->list) {
|
|
enum lru_status ret;
|
|
|
|
/*
|
|
* decrement nr_to_walk first so that we don't livelock if we
|
|
* get stuck on large numbesr of LRU_RETRY items
|
|
*/
|
|
if (--(*nr_to_walk) == 0)
|
|
break;
|
|
|
|
ret = isolate(item, &nlru->lock, cb_arg);
|
|
switch (ret) {
|
|
case LRU_REMOVED:
|
|
if (--nlru->nr_items == 0)
|
|
node_clear(nid, lru->active_nodes);
|
|
WARN_ON_ONCE(nlru->nr_items < 0);
|
|
isolated++;
|
|
break;
|
|
case LRU_ROTATE:
|
|
list_move_tail(item, &nlru->list);
|
|
break;
|
|
case LRU_SKIP:
|
|
break;
|
|
case LRU_RETRY:
|
|
/*
|
|
* The lru lock has been dropped, our list traversal is
|
|
* now invalid and so we have to restart from scratch.
|
|
*/
|
|
goto restart;
|
|
default:
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
spin_unlock(&nlru->lock);
|
|
return isolated;
|
|
}
|
|
EXPORT_SYMBOL_GPL(list_lru_walk_node);
|
|
|
|
int list_lru_init(struct list_lru *lru)
|
|
{
|
|
int i;
|
|
|
|
nodes_clear(lru->active_nodes);
|
|
for (i = 0; i < MAX_NUMNODES; i++) {
|
|
spin_lock_init(&lru->node[i].lock);
|
|
INIT_LIST_HEAD(&lru->node[i].list);
|
|
lru->node[i].nr_items = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(list_lru_init);
|