memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the cgroup. It breaks memory isolation since one cgroup can end up reclaiming pages from another cgroup. Instead we should rely on memcg-aware target reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under memory pressure. In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. This patch is the first step to skip shrink_zone() if soft_limit reclaim does enough work. This is part of the effort which tries to reduce reclaiming pages in global LRU in memcg. The per-memcg background reclaim patchset further enhances the per-cgroup targetting reclaim, which I should have V4 posted shortly. Try running multiple memory intensive workloads within seperate memcgs. Watch the counters of soft_steal in memory.stat. $ cat /dev/cgroup/A/memory.stat | grep 'soft' soft_steal 240000 soft_scan 240000 total_soft_steal 240000 total_soft_scan 240000 This patch: In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. We would like to skip shrink_zone() if soft_limit reclaim does enough work. Also, we need to make the memory pressure balanced across per-memcg zones, like the logic vm-core. This patch is the first step where we start with counting the nr_scanned and nr_reclaimed from soft_limit reclaim into the global scan_control. Signed-off-by: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f042e707ee
commit
0ae5e89c60
@ -144,7 +144,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask);
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned);
|
||||
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
@ -338,7 +339,8 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||
|
||||
static inline
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask)
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -257,7 +257,8 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
|
||||
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
gfp_t gfp_mask, bool noswap,
|
||||
unsigned int swappiness,
|
||||
struct zone *zone);
|
||||
struct zone *zone,
|
||||
unsigned long *nr_scanned);
|
||||
extern int __isolate_lru_page(struct page *page, int mode, int file);
|
||||
extern unsigned long shrink_all_memory(unsigned long nr_pages);
|
||||
extern int vm_swappiness;
|
||||
|
@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
|
||||
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
|
||||
struct zone *zone,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long reclaim_options)
|
||||
unsigned long reclaim_options,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
struct mem_cgroup *victim;
|
||||
int ret, total = 0;
|
||||
@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
|
||||
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
|
||||
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
|
||||
unsigned long excess;
|
||||
unsigned long nr_scanned;
|
||||
|
||||
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
|
||||
|
||||
@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
|
||||
continue;
|
||||
}
|
||||
/* we use swappiness of local cgroup */
|
||||
if (check_soft)
|
||||
if (check_soft) {
|
||||
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
|
||||
noswap, get_swappiness(victim), zone);
|
||||
else
|
||||
noswap, get_swappiness(victim), zone,
|
||||
&nr_scanned);
|
||||
*total_scanned += nr_scanned;
|
||||
} else
|
||||
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
|
||||
noswap, get_swappiness(victim));
|
||||
css_put(&victim->css);
|
||||
@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
|
||||
return CHARGE_WOULDBLOCK;
|
||||
|
||||
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
|
||||
gfp_mask, flags);
|
||||
gfp_mask, flags, NULL);
|
||||
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
|
||||
return CHARGE_RETRY;
|
||||
/*
|
||||
@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
||||
break;
|
||||
|
||||
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_SHRINK);
|
||||
MEM_CGROUP_RECLAIM_SHRINK,
|
||||
NULL);
|
||||
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
|
||||
/* Usage is reduced ? */
|
||||
if (curusage >= oldusage)
|
||||
@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
||||
|
||||
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_NOSWAP |
|
||||
MEM_CGROUP_RECLAIM_SHRINK);
|
||||
MEM_CGROUP_RECLAIM_SHRINK,
|
||||
NULL);
|
||||
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
||||
/* Usage is reduced ? */
|
||||
if (curusage >= oldusage)
|
||||
@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask)
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
unsigned long nr_reclaimed = 0;
|
||||
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
|
||||
@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
int loop = 0;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
unsigned long long excess;
|
||||
unsigned long nr_scanned;
|
||||
|
||||
if (order > 0)
|
||||
return 0;
|
||||
@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
if (!mz)
|
||||
break;
|
||||
|
||||
nr_scanned = 0;
|
||||
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
|
||||
gfp_mask,
|
||||
MEM_CGROUP_RECLAIM_SOFT);
|
||||
MEM_CGROUP_RECLAIM_SOFT,
|
||||
&nr_scanned);
|
||||
nr_reclaimed += reclaimed;
|
||||
*total_scanned += nr_scanned;
|
||||
spin_lock(&mctz->lock);
|
||||
|
||||
/*
|
||||
|
16
mm/vmscan.c
16
mm/vmscan.c
@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
||||
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
gfp_t gfp_mask, bool noswap,
|
||||
unsigned int swappiness,
|
||||
struct zone *zone)
|
||||
struct zone *zone,
|
||||
unsigned long *nr_scanned)
|
||||
{
|
||||
struct scan_control sc = {
|
||||
.nr_scanned = 0,
|
||||
.nr_to_reclaim = SWAP_CLUSTER_MAX,
|
||||
.may_writepage = !laptop_mode,
|
||||
.may_unmap = 1,
|
||||
@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
.order = 0,
|
||||
.mem_cgroup = mem,
|
||||
};
|
||||
|
||||
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
|
||||
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
|
||||
|
||||
@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
|
||||
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
|
||||
|
||||
*nr_scanned = sc.nr_scanned;
|
||||
return sc.nr_reclaimed;
|
||||
}
|
||||
|
||||
@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
||||
unsigned long total_scanned;
|
||||
struct reclaim_state *reclaim_state = current->reclaim_state;
|
||||
unsigned long nr_soft_reclaimed;
|
||||
unsigned long nr_soft_scanned;
|
||||
struct scan_control sc = {
|
||||
.gfp_mask = GFP_KERNEL,
|
||||
.may_unmap = 1,
|
||||
@ -2439,11 +2445,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||
|
||||
sc.nr_scanned = 0;
|
||||
|
||||
nr_soft_scanned = 0;
|
||||
/*
|
||||
* Call soft limit reclaim before calling shrink_zone.
|
||||
* For now we ignore the return value
|
||||
*/
|
||||
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
|
||||
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||
order, sc.gfp_mask,
|
||||
&nr_soft_scanned);
|
||||
sc.nr_reclaimed += nr_soft_reclaimed;
|
||||
total_scanned += nr_soft_scanned;
|
||||
|
||||
/*
|
||||
* We put equal pressure on every zone, unless
|
||||
|
Loading…
Reference in New Issue
Block a user