mm: do batched scans for mem_cgroup
For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in which case shrink_list() _still_ calls isolate_pages() with the much larger SWAP_CLUSTER_MAX. It effectively scales up the inactive list scan rate by up to 32 times. For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4. So when shrink_zone() expects to scan 4 pages in the active/inactive list, the active list will be scanned 4 pages, while the inactive list will be (over) scanned SWAP_CLUSTER_MAX=32 pages in effect. And that could break the balance between the two lists. It can further impact the scan of anon active list, due to the anon active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone(): inactive anon list over scanned => inactive_anon_is_low() == TRUE => shrink_active_list() => active anon list over scanned So the end result may be - anon inactive => over scanned - anon active => over scanned (maybe not as much) - file inactive => over scanned - file active => under scanned (relatively) The accesses to nr_saved_scan are not lock protected and so not 100% accurate, however we can tolerate small errors and the resulted small imbalanced scan rates between zones. Cc: Rik van Riel <riel@redhat.com> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
1a8670a29b
commit
f862963174
@ -273,6 +273,11 @@ struct zone_reclaim_stat {
|
|||||||
*/
|
*/
|
||||||
unsigned long recent_rotated[2];
|
unsigned long recent_rotated[2];
|
||||||
unsigned long recent_scanned[2];
|
unsigned long recent_scanned[2];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* accumulated for batching
|
||||||
|
*/
|
||||||
|
unsigned long nr_saved_scan[NR_LRU_LISTS];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct zone {
|
struct zone {
|
||||||
@ -327,7 +332,6 @@ struct zone {
|
|||||||
spinlock_t lru_lock;
|
spinlock_t lru_lock;
|
||||||
struct zone_lru {
|
struct zone_lru {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
unsigned long nr_saved_scan; /* accumulated for batching */
|
|
||||||
} lru[NR_LRU_LISTS];
|
} lru[NR_LRU_LISTS];
|
||||||
|
|
||||||
struct zone_reclaim_stat reclaim_stat;
|
struct zone_reclaim_stat reclaim_stat;
|
||||||
|
@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
|
|||||||
zone_pcp_init(zone);
|
zone_pcp_init(zone);
|
||||||
for_each_lru(l) {
|
for_each_lru(l) {
|
||||||
INIT_LIST_HEAD(&zone->lru[l].list);
|
INIT_LIST_HEAD(&zone->lru[l].list);
|
||||||
zone->lru[l].nr_saved_scan = 0;
|
zone->reclaim_stat.nr_saved_scan[l] = 0;
|
||||||
}
|
}
|
||||||
zone->reclaim_stat.recent_rotated[0] = 0;
|
zone->reclaim_stat.recent_rotated[0] = 0;
|
||||||
zone->reclaim_stat.recent_rotated[1] = 0;
|
zone->reclaim_stat.recent_rotated[1] = 0;
|
||||||
|
16
mm/vmscan.c
16
mm/vmscan.c
@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone,
|
|||||||
enum lru_list l;
|
enum lru_list l;
|
||||||
unsigned long nr_reclaimed = sc->nr_reclaimed;
|
unsigned long nr_reclaimed = sc->nr_reclaimed;
|
||||||
unsigned long swap_cluster_max = sc->swap_cluster_max;
|
unsigned long swap_cluster_max = sc->swap_cluster_max;
|
||||||
|
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
|
||||||
int noswap = 0;
|
int noswap = 0;
|
||||||
|
|
||||||
/* If we have no swap space, do not bother scanning anon pages. */
|
/* If we have no swap space, do not bother scanning anon pages. */
|
||||||
@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone,
|
|||||||
scan >>= priority;
|
scan >>= priority;
|
||||||
scan = (scan * percent[file]) / 100;
|
scan = (scan * percent[file]) / 100;
|
||||||
}
|
}
|
||||||
if (scanning_global_lru(sc))
|
|
||||||
nr[l] = nr_scan_try_batch(scan,
|
nr[l] = nr_scan_try_batch(scan,
|
||||||
&zone->lru[l].nr_saved_scan,
|
&reclaim_stat->nr_saved_scan[l],
|
||||||
swap_cluster_max);
|
swap_cluster_max);
|
||||||
else
|
|
||||||
nr[l] = scan;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
|
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
|
||||||
@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
|
|||||||
{
|
{
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
unsigned long nr_reclaimed = 0;
|
unsigned long nr_reclaimed = 0;
|
||||||
|
struct zone_reclaim_stat *reclaim_stat;
|
||||||
|
|
||||||
for_each_populated_zone(zone) {
|
for_each_populated_zone(zone) {
|
||||||
enum lru_list l;
|
enum lru_list l;
|
||||||
@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
|
|||||||
l == LRU_ACTIVE_FILE))
|
l == LRU_ACTIVE_FILE))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
|
reclaim_stat = get_reclaim_stat(zone, sc);
|
||||||
if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
|
reclaim_stat->nr_saved_scan[l] +=
|
||||||
|
(lru_pages >> prio) + 1;
|
||||||
|
if (reclaim_stat->nr_saved_scan[l]
|
||||||
|
>= nr_pages || pass > 3) {
|
||||||
unsigned long nr_to_scan;
|
unsigned long nr_to_scan;
|
||||||
|
|
||||||
zone->lru[l].nr_saved_scan = 0;
|
reclaim_stat->nr_saved_scan[l] = 0;
|
||||||
nr_to_scan = min(nr_pages, lru_pages);
|
nr_to_scan = min(nr_pages, lru_pages);
|
||||||
nr_reclaimed += shrink_list(l, nr_to_scan, zone,
|
nr_reclaimed += shrink_list(l, nr_to_scan, zone,
|
||||||
sc, prio);
|
sc, prio);
|
||||||
|
Loading…
Reference in New Issue
Block a user