Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "The rest of MM" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (101 commits) mm, compaction: simplify contended compaction handling mm, compaction: introduce direct compaction priority mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations mm, page_alloc: make THP-specific decisions more generic mm, page_alloc: restructure direct compaction handling in slowpath mm, page_alloc: don't retry initial attempt in slowpath mm, page_alloc: set alloc_flags only once in slowpath lib/stackdepot.c: use __GFP_NOWARN for stack allocations mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB mm, kasan: account for object redzone in SLUB's nearest_obj() mm: fix use-after-free if memory allocation failed in vma_adjust() zsmalloc: Delete an unnecessary check before the function call "iput" mm/memblock.c: fix index adjustment error in __next_mem_range_rev() mem-hotplug: alloc new page from a nearest neighbor node when mem-offline mm: optimize copy_page_to/from_iter_iovec mm: add cond_resched() to generic_swapfile_activate() Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements" mm, compaction: don't isolate PageWriteback pages in MIGRATE_SYNC_LIGHT mode mm: hwpoison: remove incorrect comments make __section_nr() more efficient ...
This commit is contained in:
commit
1c88e19b0f
@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
||||
|
||||
8. LRU
|
||||
Each memcg has its own private LRU. Now, its handling is under global
|
||||
VM's control (means that it's handled under global zone->lru_lock).
|
||||
VM's control (means that it's handled under global zone_lru_lock).
|
||||
Almost all routines around memcg's LRU is called by global LRU's
|
||||
list management functions under zone->lru_lock().
|
||||
list management functions under zone_lru_lock().
|
||||
|
||||
A special function is mem_cgroup_isolate_pages(). This scans
|
||||
memcg's private LRU and call __isolate_lru_page() to extract a page
|
||||
|
@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
|
||||
Other lock order is following:
|
||||
PG_locked.
|
||||
mm->page_table_lock
|
||||
zone->lru_lock
|
||||
zone_lru_lock
|
||||
lock_page_cgroup.
|
||||
In many cases, just lock_page_cgroup() is called.
|
||||
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
|
||||
zone->lru_lock, it has no lock of its own.
|
||||
zone_lru_lock, it has no lock of its own.
|
||||
|
||||
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
|
||||
|
||||
|
@ -224,7 +224,7 @@ void __init arm64_memblock_init(void)
|
||||
* via the linear mapping.
|
||||
*/
|
||||
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
|
||||
memblock_enforce_memory_limit(memory_limit);
|
||||
memblock_mem_limit_remove_map(memory_limit);
|
||||
memblock_add(__pa(_text), (u64)(_end - _text));
|
||||
}
|
||||
|
||||
|
@ -102,7 +102,7 @@ static void appldata_get_mem_data(void *data)
|
||||
mem_data->totalhigh = P2K(val.totalhigh);
|
||||
mem_data->freehigh = P2K(val.freehigh);
|
||||
mem_data->bufferram = P2K(val.bufferram);
|
||||
mem_data->cached = P2K(global_page_state(NR_FILE_PAGES)
|
||||
mem_data->cached = P2K(global_node_page_state(NR_FILE_PAGES)
|
||||
- val.bufferram);
|
||||
|
||||
si_swapinfo(&val);
|
||||
|
@ -45,20 +45,20 @@ void show_mem(unsigned int filter)
|
||||
struct zone *zone;
|
||||
|
||||
pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
|
||||
(global_page_state(NR_ACTIVE_ANON) +
|
||||
global_page_state(NR_ACTIVE_FILE)),
|
||||
(global_page_state(NR_INACTIVE_ANON) +
|
||||
global_page_state(NR_INACTIVE_FILE)),
|
||||
global_page_state(NR_FILE_DIRTY),
|
||||
global_page_state(NR_WRITEBACK),
|
||||
global_page_state(NR_UNSTABLE_NFS),
|
||||
(global_node_page_state(NR_ACTIVE_ANON) +
|
||||
global_node_page_state(NR_ACTIVE_FILE)),
|
||||
(global_node_page_state(NR_INACTIVE_ANON) +
|
||||
global_node_page_state(NR_INACTIVE_FILE)),
|
||||
global_node_page_state(NR_FILE_DIRTY),
|
||||
global_node_page_state(NR_WRITEBACK),
|
||||
global_node_page_state(NR_UNSTABLE_NFS),
|
||||
global_page_state(NR_FREE_PAGES),
|
||||
(global_page_state(NR_SLAB_RECLAIMABLE) +
|
||||
global_page_state(NR_SLAB_UNRECLAIMABLE)),
|
||||
global_page_state(NR_FILE_MAPPED),
|
||||
global_node_page_state(NR_FILE_MAPPED),
|
||||
global_page_state(NR_PAGETABLE),
|
||||
global_page_state(NR_BOUNCE),
|
||||
global_page_state(NR_FILE_PAGES),
|
||||
global_node_page_state(NR_FILE_PAGES),
|
||||
get_nr_swap_pages());
|
||||
|
||||
for_each_zone(zone) {
|
||||
|
@ -56,6 +56,7 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
{
|
||||
int n;
|
||||
int nid = dev->id;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
struct sysinfo i;
|
||||
|
||||
si_meminfo_node(&i, nid);
|
||||
@ -74,16 +75,16 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
nid, K(i.totalram),
|
||||
nid, K(i.freeram),
|
||||
nid, K(i.totalram - i.freeram),
|
||||
nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
|
||||
node_page_state(nid, NR_ACTIVE_FILE)),
|
||||
nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
|
||||
node_page_state(nid, NR_INACTIVE_FILE)),
|
||||
nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
|
||||
nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
|
||||
nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
|
||||
nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
|
||||
nid, K(node_page_state(nid, NR_UNEVICTABLE)),
|
||||
nid, K(node_page_state(nid, NR_MLOCK)));
|
||||
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
|
||||
node_page_state(pgdat, NR_ACTIVE_FILE)),
|
||||
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
|
||||
node_page_state(pgdat, NR_INACTIVE_FILE)),
|
||||
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
|
||||
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
|
||||
nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
|
||||
nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
|
||||
nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
n += sprintf(buf + n,
|
||||
@ -117,31 +118,30 @@ static ssize_t node_read_meminfo(struct device *dev,
|
||||
"Node %d ShmemPmdMapped: %8lu kB\n"
|
||||
#endif
|
||||
,
|
||||
nid, K(node_page_state(nid, NR_FILE_DIRTY)),
|
||||
nid, K(node_page_state(nid, NR_WRITEBACK)),
|
||||
nid, K(node_page_state(nid, NR_FILE_PAGES)),
|
||||
nid, K(node_page_state(nid, NR_FILE_MAPPED)),
|
||||
nid, K(node_page_state(nid, NR_ANON_PAGES)),
|
||||
nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
|
||||
nid, K(node_page_state(pgdat, NR_WRITEBACK)),
|
||||
nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
|
||||
nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
|
||||
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
|
||||
nid, K(i.sharedram),
|
||||
nid, node_page_state(nid, NR_KERNEL_STACK) *
|
||||
THREAD_SIZE / 1024,
|
||||
nid, K(node_page_state(nid, NR_PAGETABLE)),
|
||||
nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
|
||||
nid, K(node_page_state(nid, NR_BOUNCE)),
|
||||
nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)),
|
||||
nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
|
||||
node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
|
||||
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
|
||||
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) +
|
||||
sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(nid, NR_ANON_THPS) *
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
|
||||
nid, K(node_page_state(pgdat, NR_ANON_THPS) *
|
||||
HPAGE_PMD_NR),
|
||||
nid, K(node_page_state(nid, NR_SHMEM_THPS) *
|
||||
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
|
||||
HPAGE_PMD_NR),
|
||||
nid, K(node_page_state(nid, NR_SHMEM_PMDMAPPED) *
|
||||
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
|
||||
HPAGE_PMD_NR));
|
||||
#else
|
||||
nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
|
||||
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
|
||||
#endif
|
||||
n += hugetlb_report_node_meminfo(nid, buf + n);
|
||||
return n;
|
||||
@ -160,12 +160,12 @@ static ssize_t node_read_numastat(struct device *dev,
|
||||
"interleave_hit %lu\n"
|
||||
"local_node %lu\n"
|
||||
"other_node %lu\n",
|
||||
node_page_state(dev->id, NUMA_HIT),
|
||||
node_page_state(dev->id, NUMA_MISS),
|
||||
node_page_state(dev->id, NUMA_FOREIGN),
|
||||
node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
|
||||
node_page_state(dev->id, NUMA_LOCAL),
|
||||
node_page_state(dev->id, NUMA_OTHER));
|
||||
sum_zone_node_page_state(dev->id, NUMA_HIT),
|
||||
sum_zone_node_page_state(dev->id, NUMA_MISS),
|
||||
sum_zone_node_page_state(dev->id, NUMA_FOREIGN),
|
||||
sum_zone_node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
|
||||
sum_zone_node_page_state(dev->id, NUMA_LOCAL),
|
||||
sum_zone_node_page_state(dev->id, NUMA_OTHER));
|
||||
}
|
||||
static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
|
||||
|
||||
@ -173,12 +173,18 @@ static ssize_t node_read_vmstat(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
int nid = dev->id;
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
int i;
|
||||
int n = 0;
|
||||
|
||||
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
||||
n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
|
||||
node_page_state(nid, i));
|
||||
sum_zone_node_page_state(nid, i));
|
||||
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||
n += sprintf(buf+n, "%s %lu\n",
|
||||
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
|
||||
node_page_state(pgdat, i));
|
||||
|
||||
return n;
|
||||
}
|
||||
|
@ -72,10 +72,10 @@ static unsigned long lowmem_deathpending_timeout;
|
||||
static unsigned long lowmem_count(struct shrinker *s,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
return global_page_state(NR_ACTIVE_ANON) +
|
||||
global_page_state(NR_ACTIVE_FILE) +
|
||||
global_page_state(NR_INACTIVE_ANON) +
|
||||
global_page_state(NR_INACTIVE_FILE);
|
||||
return global_node_page_state(NR_ACTIVE_ANON) +
|
||||
global_node_page_state(NR_ACTIVE_FILE) +
|
||||
global_node_page_state(NR_INACTIVE_ANON) +
|
||||
global_node_page_state(NR_INACTIVE_FILE);
|
||||
}
|
||||
|
||||
static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
|
||||
@ -91,8 +91,8 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
|
||||
short selected_oom_score_adj;
|
||||
int array_size = ARRAY_SIZE(lowmem_adj);
|
||||
int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
|
||||
int other_file = global_page_state(NR_FILE_PAGES) -
|
||||
global_page_state(NR_SHMEM) -
|
||||
int other_file = global_node_page_state(NR_FILE_PAGES) -
|
||||
global_node_page_state(NR_SHMEM) -
|
||||
total_swapcache_pages();
|
||||
|
||||
if (lowmem_adj_size < array_size)
|
||||
|
@ -1864,7 +1864,8 @@ void osc_dec_unstable_pages(struct ptlrpc_request *req)
|
||||
LASSERT(page_count >= 0);
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
|
||||
dec_node_page_state(desc->bd_iov[i].kiov_page,
|
||||
NR_UNSTABLE_NFS);
|
||||
|
||||
atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
|
||||
LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
|
||||
@ -1898,7 +1899,8 @@ void osc_inc_unstable_pages(struct ptlrpc_request *req)
|
||||
LASSERT(page_count >= 0);
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
|
||||
inc_node_page_state(desc->bd_iov[i].kiov_page,
|
||||
NR_UNSTABLE_NFS);
|
||||
|
||||
LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
|
||||
atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
|
||||
|
@ -1807,8 +1807,8 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
|
||||
*/
|
||||
static unsigned long get_nr_dirty_pages(void)
|
||||
{
|
||||
return global_page_state(NR_FILE_DIRTY) +
|
||||
global_page_state(NR_UNSTABLE_NFS) +
|
||||
return global_node_page_state(NR_FILE_DIRTY) +
|
||||
global_node_page_state(NR_UNSTABLE_NFS) +
|
||||
get_nr_dirty_inodes();
|
||||
}
|
||||
|
||||
|
@ -1452,7 +1452,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
|
||||
list_del(&req->writepages_entry);
|
||||
for (i = 0; i < req->num_pages; i++) {
|
||||
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
|
||||
dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
|
||||
dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
|
||||
wb_writeout_inc(&bdi->wb);
|
||||
}
|
||||
wake_up(&fi->page_waitq);
|
||||
@ -1642,7 +1642,7 @@ static int fuse_writepage_locked(struct page *page)
|
||||
req->inode = inode;
|
||||
|
||||
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
|
||||
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
list_add(&req->writepages_entry, &fi->writepages);
|
||||
@ -1756,7 +1756,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_WRITEBACK_TEMP);
|
||||
dec_node_page_state(page, NR_WRITEBACK_TEMP);
|
||||
wb_writeout_inc(&bdi->wb);
|
||||
fuse_writepage_free(fc, new_req);
|
||||
fuse_request_free(new_req);
|
||||
@ -1855,7 +1855,7 @@ static int fuse_writepages_fill(struct page *page,
|
||||
req->page_descs[req->num_pages].length = PAGE_SIZE;
|
||||
|
||||
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
|
||||
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
|
||||
err = 0;
|
||||
if (is_writeback && fuse_writepage_in_flight(req, page)) {
|
||||
|
@ -623,7 +623,7 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
|
||||
if (!cinfo->dreq) {
|
||||
struct inode *inode = page_file_mapping(page)->host;
|
||||
|
||||
inc_zone_page_state(page, NR_UNSTABLE_NFS);
|
||||
inc_node_page_state(page, NR_UNSTABLE_NFS);
|
||||
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
|
||||
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
|
||||
}
|
||||
|
@ -898,7 +898,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
|
||||
static void
|
||||
nfs_clear_page_commit(struct page *page)
|
||||
{
|
||||
dec_zone_page_state(page, NR_UNSTABLE_NFS);
|
||||
dec_node_page_state(page, NR_UNSTABLE_NFS);
|
||||
dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
|
||||
WB_RECLAIMABLE);
|
||||
}
|
||||
|
185
fs/proc/base.c
185
fs/proc/base.c
@ -1024,23 +1024,107 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
|
||||
char buffer[PROC_NUMBUF];
|
||||
int oom_adj = OOM_ADJUST_MIN;
|
||||
size_t len;
|
||||
unsigned long flags;
|
||||
|
||||
if (!task)
|
||||
return -ESRCH;
|
||||
if (lock_task_sighand(task, &flags)) {
|
||||
if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
|
||||
oom_adj = OOM_ADJUST_MAX;
|
||||
else
|
||||
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
|
||||
OOM_SCORE_ADJ_MAX;
|
||||
unlock_task_sighand(task, &flags);
|
||||
}
|
||||
if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
|
||||
oom_adj = OOM_ADJUST_MAX;
|
||||
else
|
||||
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
|
||||
OOM_SCORE_ADJ_MAX;
|
||||
put_task_struct(task);
|
||||
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
|
||||
return simple_read_from_buffer(buf, count, ppos, buffer, len);
|
||||
}
|
||||
|
||||
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
|
||||
{
|
||||
static DEFINE_MUTEX(oom_adj_mutex);
|
||||
struct mm_struct *mm = NULL;
|
||||
struct task_struct *task;
|
||||
int err = 0;
|
||||
|
||||
task = get_proc_task(file_inode(file));
|
||||
if (!task)
|
||||
return -ESRCH;
|
||||
|
||||
mutex_lock(&oom_adj_mutex);
|
||||
if (legacy) {
|
||||
if (oom_adj < task->signal->oom_score_adj &&
|
||||
!capable(CAP_SYS_RESOURCE)) {
|
||||
err = -EACCES;
|
||||
goto err_unlock;
|
||||
}
|
||||
/*
|
||||
* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
|
||||
* /proc/pid/oom_score_adj instead.
|
||||
*/
|
||||
pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
|
||||
current->comm, task_pid_nr(current), task_pid_nr(task),
|
||||
task_pid_nr(task));
|
||||
} else {
|
||||
if ((short)oom_adj < task->signal->oom_score_adj_min &&
|
||||
!capable(CAP_SYS_RESOURCE)) {
|
||||
err = -EACCES;
|
||||
goto err_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure we will check other processes sharing the mm if this is
|
||||
* not vfrok which wants its own oom_score_adj.
|
||||
* pin the mm so it doesn't go away and get reused after task_unlock
|
||||
*/
|
||||
if (!task->vfork_done) {
|
||||
struct task_struct *p = find_lock_task_mm(task);
|
||||
|
||||
if (p) {
|
||||
if (atomic_read(&p->mm->mm_users) > 1) {
|
||||
mm = p->mm;
|
||||
atomic_inc(&mm->mm_count);
|
||||
}
|
||||
task_unlock(p);
|
||||
}
|
||||
}
|
||||
|
||||
task->signal->oom_score_adj = oom_adj;
|
||||
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
|
||||
task->signal->oom_score_adj_min = (short)oom_adj;
|
||||
trace_oom_score_adj_update(task);
|
||||
|
||||
if (mm) {
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_process(p) {
|
||||
if (same_thread_group(task, p))
|
||||
continue;
|
||||
|
||||
/* do not touch kernel threads or the global init */
|
||||
if (p->flags & PF_KTHREAD || is_global_init(p))
|
||||
continue;
|
||||
|
||||
task_lock(p);
|
||||
if (!p->vfork_done && process_shares_mm(p, mm)) {
|
||||
pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
|
||||
task_pid_nr(p), p->comm,
|
||||
p->signal->oom_score_adj, oom_adj,
|
||||
task_pid_nr(task), task->comm);
|
||||
p->signal->oom_score_adj = oom_adj;
|
||||
if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
|
||||
p->signal->oom_score_adj_min = (short)oom_adj;
|
||||
}
|
||||
task_unlock(p);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
mmdrop(mm);
|
||||
}
|
||||
err_unlock:
|
||||
mutex_unlock(&oom_adj_mutex);
|
||||
put_task_struct(task);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* /proc/pid/oom_adj exists solely for backwards compatibility with previous
|
||||
* kernels. The effective policy is defined by oom_score_adj, which has a
|
||||
@ -1054,10 +1138,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
|
||||
static ssize_t oom_adj_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct task_struct *task;
|
||||
char buffer[PROC_NUMBUF];
|
||||
int oom_adj;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
@ -1077,23 +1159,6 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
task = get_proc_task(file_inode(file));
|
||||
if (!task) {
|
||||
err = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
task_lock(task);
|
||||
if (!task->mm) {
|
||||
err = -EINVAL;
|
||||
goto err_task_lock;
|
||||
}
|
||||
|
||||
if (!lock_task_sighand(task, &flags)) {
|
||||
err = -ESRCH;
|
||||
goto err_task_lock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
|
||||
* value is always attainable.
|
||||
@ -1103,27 +1168,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
|
||||
else
|
||||
oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
|
||||
|
||||
if (oom_adj < task->signal->oom_score_adj &&
|
||||
!capable(CAP_SYS_RESOURCE)) {
|
||||
err = -EACCES;
|
||||
goto err_sighand;
|
||||
}
|
||||
|
||||
/*
|
||||
* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
|
||||
* /proc/pid/oom_score_adj instead.
|
||||
*/
|
||||
pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
|
||||
current->comm, task_pid_nr(current), task_pid_nr(task),
|
||||
task_pid_nr(task));
|
||||
|
||||
task->signal->oom_score_adj = oom_adj;
|
||||
trace_oom_score_adj_update(task);
|
||||
err_sighand:
|
||||
unlock_task_sighand(task, &flags);
|
||||
err_task_lock:
|
||||
task_unlock(task);
|
||||
put_task_struct(task);
|
||||
err = __set_oom_adj(file, oom_adj, true);
|
||||
out:
|
||||
return err < 0 ? err : count;
|
||||
}
|
||||
@ -1140,15 +1185,11 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
|
||||
struct task_struct *task = get_proc_task(file_inode(file));
|
||||
char buffer[PROC_NUMBUF];
|
||||
short oom_score_adj = OOM_SCORE_ADJ_MIN;
|
||||
unsigned long flags;
|
||||
size_t len;
|
||||
|
||||
if (!task)
|
||||
return -ESRCH;
|
||||
if (lock_task_sighand(task, &flags)) {
|
||||
oom_score_adj = task->signal->oom_score_adj;
|
||||
unlock_task_sighand(task, &flags);
|
||||
}
|
||||
oom_score_adj = task->signal->oom_score_adj;
|
||||
put_task_struct(task);
|
||||
len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
|
||||
return simple_read_from_buffer(buf, count, ppos, buffer, len);
|
||||
@ -1157,9 +1198,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
|
||||
static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct task_struct *task;
|
||||
char buffer[PROC_NUMBUF];
|
||||
unsigned long flags;
|
||||
int oom_score_adj;
|
||||
int err;
|
||||
|
||||
@ -1180,39 +1219,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
task = get_proc_task(file_inode(file));
|
||||
if (!task) {
|
||||
err = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
task_lock(task);
|
||||
if (!task->mm) {
|
||||
err = -EINVAL;
|
||||
goto err_task_lock;
|
||||
}
|
||||
|
||||
if (!lock_task_sighand(task, &flags)) {
|
||||
err = -ESRCH;
|
||||
goto err_task_lock;
|
||||
}
|
||||
|
||||
if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
|
||||
!capable(CAP_SYS_RESOURCE)) {
|
||||
err = -EACCES;
|
||||
goto err_sighand;
|
||||
}
|
||||
|
||||
task->signal->oom_score_adj = (short)oom_score_adj;
|
||||
if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
|
||||
task->signal->oom_score_adj_min = (short)oom_score_adj;
|
||||
trace_oom_score_adj_update(task);
|
||||
|
||||
err_sighand:
|
||||
unlock_task_sighand(task, &flags);
|
||||
err_task_lock:
|
||||
task_unlock(task);
|
||||
put_task_struct(task);
|
||||
err = __set_oom_adj(file, oom_score_adj, false);
|
||||
out:
|
||||
return err < 0 ? err : count;
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
si_swapinfo(&i);
|
||||
committed = percpu_counter_read_positive(&vm_committed_as);
|
||||
|
||||
cached = global_page_state(NR_FILE_PAGES) -
|
||||
cached = global_node_page_state(NR_FILE_PAGES) -
|
||||
total_swapcache_pages() - i.bufferram;
|
||||
if (cached < 0)
|
||||
cached = 0;
|
||||
@ -138,23 +138,23 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
#endif
|
||||
K(i.totalswap),
|
||||
K(i.freeswap),
|
||||
K(global_page_state(NR_FILE_DIRTY)),
|
||||
K(global_page_state(NR_WRITEBACK)),
|
||||
K(global_page_state(NR_ANON_PAGES)),
|
||||
K(global_page_state(NR_FILE_MAPPED)),
|
||||
K(global_node_page_state(NR_FILE_DIRTY)),
|
||||
K(global_node_page_state(NR_WRITEBACK)),
|
||||
K(global_node_page_state(NR_ANON_MAPPED)),
|
||||
K(global_node_page_state(NR_FILE_MAPPED)),
|
||||
K(i.sharedram),
|
||||
K(global_page_state(NR_SLAB_RECLAIMABLE) +
|
||||
global_page_state(NR_SLAB_UNRECLAIMABLE)),
|
||||
K(global_page_state(NR_SLAB_RECLAIMABLE)),
|
||||
K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
|
||||
global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
|
||||
global_page_state(NR_KERNEL_STACK_KB),
|
||||
K(global_page_state(NR_PAGETABLE)),
|
||||
#ifdef CONFIG_QUICKLIST
|
||||
K(quicklist_total_size()),
|
||||
#endif
|
||||
K(global_page_state(NR_UNSTABLE_NFS)),
|
||||
K(global_node_page_state(NR_UNSTABLE_NFS)),
|
||||
K(global_page_state(NR_BOUNCE)),
|
||||
K(global_page_state(NR_WRITEBACK_TEMP)),
|
||||
K(global_node_page_state(NR_WRITEBACK_TEMP)),
|
||||
K(vm_commit_limit()),
|
||||
K(committed),
|
||||
(unsigned long)VMALLOC_TOTAL >> 10,
|
||||
@ -164,9 +164,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
||||
, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
|
||||
#endif
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
, K(global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
|
||||
, K(global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
|
||||
, K(global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
|
||||
, K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
|
||||
, K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
|
||||
, K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
|
||||
#endif
|
||||
#ifdef CONFIG_CMA
|
||||
, K(totalcma_pages)
|
||||
|
@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
|
||||
}
|
||||
|
||||
long congestion_wait(int sync, long timeout);
|
||||
long wait_iff_congested(struct zone *zone, int sync, long timeout);
|
||||
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
|
||||
int pdflush_proc_obsolete(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
|
||||
|
@ -1,6 +1,18 @@
|
||||
#ifndef _LINUX_COMPACTION_H
|
||||
#define _LINUX_COMPACTION_H
|
||||
|
||||
/*
|
||||
* Determines how hard direct compaction should try to succeed.
|
||||
* Lower value means higher priority, analogically to reclaim priority.
|
||||
*/
|
||||
enum compact_priority {
|
||||
COMPACT_PRIO_SYNC_LIGHT,
|
||||
MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
|
||||
DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
|
||||
COMPACT_PRIO_ASYNC,
|
||||
INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC
|
||||
};
|
||||
|
||||
/* Return values for compact_zone() and try_to_compact_pages() */
|
||||
/* When adding new states, please adjust include/trace/events/compaction.h */
|
||||
enum compact_result {
|
||||
@ -43,14 +55,6 @@ enum compact_result {
|
||||
COMPACT_PARTIAL,
|
||||
};
|
||||
|
||||
/* Used to signal whether compaction detected need_sched() or lock contention */
|
||||
/* No contention detected */
|
||||
#define COMPACT_CONTENDED_NONE 0
|
||||
/* Either need_sched() was true or fatal signal pending */
|
||||
#define COMPACT_CONTENDED_SCHED 1
|
||||
/* Zone lock or lru_lock was contended in async compaction */
|
||||
#define COMPACT_CONTENDED_LOCK 2
|
||||
|
||||
struct alloc_context; /* in mm/internal.h */
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
@ -64,9 +68,8 @@ extern int sysctl_compact_unevictable_allowed;
|
||||
|
||||
extern int fragmentation_index(struct zone *zone, unsigned int order);
|
||||
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
|
||||
unsigned int order,
|
||||
unsigned int alloc_flags, const struct alloc_context *ac,
|
||||
enum migrate_mode mode, int *contended);
|
||||
unsigned int order, unsigned int alloc_flags,
|
||||
const struct alloc_context *ac, enum compact_priority prio);
|
||||
extern void compact_pgdat(pg_data_t *pgdat, int order);
|
||||
extern void reset_isolation_suitable(pg_data_t *pgdat);
|
||||
extern enum compact_result compaction_suitable(struct zone *zone, int order,
|
||||
@ -151,14 +154,6 @@ extern void kcompactd_stop(int nid);
|
||||
extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
|
||||
|
||||
#else
|
||||
static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask,
|
||||
unsigned int order, int alloc_flags,
|
||||
const struct alloc_context *ac,
|
||||
enum migrate_mode mode, int *contended)
|
||||
{
|
||||
return COMPACT_CONTINUE;
|
||||
}
|
||||
|
||||
static inline void compact_pgdat(pg_data_t *pgdat, int order)
|
||||
{
|
||||
}
|
||||
|
@ -237,9 +237,11 @@ struct vm_area_struct;
|
||||
* are expected to be movable via page reclaim or page migration. Typically,
|
||||
* pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
|
||||
*
|
||||
* GFP_TRANSHUGE is used for THP allocations. They are compound allocations
|
||||
* that will fail quickly if memory is not available and will not wake
|
||||
* kswapd on failure.
|
||||
* GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
|
||||
* compound allocations that will generally fail quickly if memory is not
|
||||
* available and will not wake kswapd/kcompactd on failure. The _LIGHT
|
||||
* version does not attempt reclaim/compaction at all and is by default used
|
||||
* in page fault path, while the non-light is used by khugepaged.
|
||||
*/
|
||||
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
|
||||
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
|
||||
@ -254,9 +256,9 @@ struct vm_area_struct;
|
||||
#define GFP_DMA32 __GFP_DMA32
|
||||
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
|
||||
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
|
||||
#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
|
||||
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
|
||||
~__GFP_RECLAIM)
|
||||
#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
|
||||
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
|
||||
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
|
||||
|
||||
/* Convert GFP flags to their corresponding migrate type */
|
||||
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
|
||||
|
@ -11,7 +11,7 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
pmd_t *pmd,
|
||||
unsigned int flags);
|
||||
extern int madvise_free_huge_pmd(struct mmu_gather *tlb,
|
||||
extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
|
||||
struct vm_area_struct *vma,
|
||||
pmd_t *pmd, unsigned long addr, unsigned long next);
|
||||
extern int zap_huge_pmd(struct mmu_gather *tlb,
|
||||
|
@ -77,6 +77,7 @@ void kasan_free_shadow(const struct vm_struct *vm);
|
||||
|
||||
size_t ksize(const void *);
|
||||
static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
|
||||
size_t kasan_metadata_size(struct kmem_cache *cache);
|
||||
|
||||
#else /* CONFIG_KASAN */
|
||||
|
||||
@ -121,6 +122,7 @@ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
|
||||
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
|
||||
|
||||
static inline void kasan_unpoison_slab(const void *ptr) { }
|
||||
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
|
||||
|
||||
#endif /* CONFIG_KASAN */
|
||||
|
||||
|
@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void);
|
||||
static inline
|
||||
int kdb_process_cpu(const struct task_struct *p)
|
||||
{
|
||||
unsigned int cpu = task_thread_info(p)->cpu;
|
||||
unsigned int cpu = task_cpu(p);
|
||||
if (cpu > num_possible_cpus())
|
||||
cpu = 0;
|
||||
return cpu;
|
||||
|
@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
|
||||
phys_addr_t memblock_start_of_DRAM(void);
|
||||
phys_addr_t memblock_end_of_DRAM(void);
|
||||
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
|
||||
void memblock_mem_limit_remove_map(phys_addr_t limit);
|
||||
bool memblock_is_memory(phys_addr_t addr);
|
||||
int memblock_is_map_memory(phys_addr_t addr);
|
||||
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
|
||||
|
@ -52,7 +52,7 @@ enum mem_cgroup_stat_index {
|
||||
MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
|
||||
MEM_CGROUP_STAT_NSTATS,
|
||||
/* default hierarchy stats */
|
||||
MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS,
|
||||
MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS,
|
||||
MEMCG_SLAB_RECLAIMABLE,
|
||||
MEMCG_SLAB_UNRECLAIMABLE,
|
||||
MEMCG_SOCK,
|
||||
@ -60,7 +60,7 @@ enum mem_cgroup_stat_index {
|
||||
};
|
||||
|
||||
struct mem_cgroup_reclaim_cookie {
|
||||
struct zone *zone;
|
||||
pg_data_t *pgdat;
|
||||
int priority;
|
||||
unsigned int generation;
|
||||
};
|
||||
@ -118,7 +118,7 @@ struct mem_cgroup_reclaim_iter {
|
||||
/*
|
||||
* per-zone information in memory controller.
|
||||
*/
|
||||
struct mem_cgroup_per_zone {
|
||||
struct mem_cgroup_per_node {
|
||||
struct lruvec lruvec;
|
||||
unsigned long lru_size[NR_LRU_LISTS];
|
||||
|
||||
@ -132,10 +132,6 @@ struct mem_cgroup_per_zone {
|
||||
/* use container_of */
|
||||
};
|
||||
|
||||
struct mem_cgroup_per_node {
|
||||
struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
|
||||
};
|
||||
|
||||
struct mem_cgroup_threshold {
|
||||
struct eventfd_ctx *eventfd;
|
||||
unsigned long threshold;
|
||||
@ -314,8 +310,46 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);
|
||||
|
||||
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
|
||||
|
||||
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
|
||||
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
|
||||
static struct mem_cgroup_per_node *
|
||||
mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
|
||||
{
|
||||
return memcg->nodeinfo[nid];
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
|
||||
* @node: node of the wanted lruvec
|
||||
* @memcg: memcg of the wanted lruvec
|
||||
*
|
||||
* Returns the lru list vector holding pages for a given @node or a given
|
||||
* @memcg and @zone. This can be the node lruvec, if the memory controller
|
||||
* is disabled.
|
||||
*/
|
||||
static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup_per_node *mz;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
lruvec = node_lruvec(pgdat);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
|
||||
lruvec = &mz->lruvec;
|
||||
out:
|
||||
/*
|
||||
* Since a node can be onlined after the mem_cgroup was created,
|
||||
* we have to be prepared to initialize lruvec->pgdat here;
|
||||
* and if offlined then reonlined, we need to reinitialize it.
|
||||
*/
|
||||
if (unlikely(lruvec->pgdat != pgdat))
|
||||
lruvec->pgdat = pgdat;
|
||||
return lruvec;
|
||||
}
|
||||
|
||||
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
|
||||
|
||||
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
|
||||
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
|
||||
@ -404,9 +438,9 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
static inline
|
||||
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
return mz->lru_size[lru];
|
||||
}
|
||||
|
||||
@ -477,7 +511,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||
mem_cgroup_update_page_stat(page, idx, -1);
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned);
|
||||
|
||||
@ -568,16 +602,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
|
||||
struct mem_cgroup *memcg)
|
||||
static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
return &zone->lruvec;
|
||||
return node_lruvec(pgdat);
|
||||
}
|
||||
|
||||
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
|
||||
struct zone *zone)
|
||||
struct pglist_data *pgdat)
|
||||
{
|
||||
return &zone->lruvec;
|
||||
return &pgdat->lruvec;
|
||||
}
|
||||
|
||||
static inline bool mm_match_cgroup(struct mm_struct *mm,
|
||||
@ -681,7 +715,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
|
@ -26,7 +26,7 @@ struct vmem_altmap {
|
||||
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
|
||||
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
|
||||
#else
|
||||
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
|
||||
|
@ -933,6 +933,11 @@ static inline struct zone *page_zone(const struct page *page)
|
||||
return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
|
||||
}
|
||||
|
||||
static inline pg_data_t *page_pgdat(const struct page *page)
|
||||
{
|
||||
return NODE_DATA(page_to_nid(page));
|
||||
}
|
||||
|
||||
#ifdef SECTION_IN_PAGE_FLAGS
|
||||
static inline void set_page_section(struct page *page, unsigned long section)
|
||||
{
|
||||
@ -973,11 +978,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
|
||||
{
|
||||
return page->mem_cgroup;
|
||||
}
|
||||
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return READ_ONCE(page->mem_cgroup);
|
||||
}
|
||||
#else
|
||||
static inline struct mem_cgroup *page_memcg(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -2284,6 +2299,8 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
|
||||
}
|
||||
#endif /* __HAVE_ARCH_GATE_AREA */
|
||||
|
||||
extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
extern int sysctl_drop_caches;
|
||||
int drop_caches_sysctl_handler(struct ctl_table *, int,
|
||||
|
@ -23,25 +23,30 @@ static inline int page_is_file_cache(struct page *page)
|
||||
}
|
||||
|
||||
static __always_inline void __update_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, int nr_pages)
|
||||
enum lru_list lru, enum zone_type zid,
|
||||
int nr_pages)
|
||||
{
|
||||
__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
__mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
|
||||
__mod_zone_page_state(&pgdat->node_zones[zid],
|
||||
NR_ZONE_LRU_BASE + lru, nr_pages);
|
||||
}
|
||||
|
||||
static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, int nr_pages)
|
||||
enum lru_list lru, enum zone_type zid,
|
||||
int nr_pages)
|
||||
{
|
||||
__update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
|
||||
#else
|
||||
__update_lru_size(lruvec, lru, nr_pages);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void add_page_to_lru_list(struct page *page,
|
||||
struct lruvec *lruvec, enum lru_list lru)
|
||||
{
|
||||
update_lru_size(lruvec, lru, hpage_nr_pages(page));
|
||||
update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
|
||||
list_add(&page->lru, &lruvec->lists[lru]);
|
||||
}
|
||||
|
||||
@ -49,7 +54,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
|
||||
struct lruvec *lruvec, enum lru_list lru)
|
||||
{
|
||||
list_del(&page->lru);
|
||||
update_lru_size(lruvec, lru, -hpage_nr_pages(page));
|
||||
update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -118,7 +118,7 @@ struct page {
|
||||
*/
|
||||
union {
|
||||
struct list_head lru; /* Pageout list, eg. active_list
|
||||
* protected by zone->lru_lock !
|
||||
* protected by zone_lru_lock !
|
||||
* Can be used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
|
@ -93,7 +93,7 @@ struct free_area {
|
||||
struct pglist_data;
|
||||
|
||||
/*
|
||||
* zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
|
||||
* zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
|
||||
* So add a wild amount of padding here to ensure that they fall into separate
|
||||
* cachelines. There are very few zone structures in the machine, so space
|
||||
* consumption is not a concern here.
|
||||
@ -110,36 +110,20 @@ struct zone_padding {
|
||||
enum zone_stat_item {
|
||||
/* First 128 byte cacheline (assuming 64 bit words) */
|
||||
NR_FREE_PAGES,
|
||||
NR_ALLOC_BATCH,
|
||||
NR_LRU_BASE,
|
||||
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
|
||||
NR_ACTIVE_ANON, /* " " " " " */
|
||||
NR_INACTIVE_FILE, /* " " " " " */
|
||||
NR_ACTIVE_FILE, /* " " " " " */
|
||||
NR_UNEVICTABLE, /* " " " " " */
|
||||
NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
|
||||
NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
|
||||
NR_ZONE_ACTIVE_ANON,
|
||||
NR_ZONE_INACTIVE_FILE,
|
||||
NR_ZONE_ACTIVE_FILE,
|
||||
NR_ZONE_UNEVICTABLE,
|
||||
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
|
||||
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
|
||||
NR_ANON_PAGES, /* Mapped anonymous pages */
|
||||
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
|
||||
only modified from process context */
|
||||
NR_FILE_PAGES,
|
||||
NR_FILE_DIRTY,
|
||||
NR_WRITEBACK,
|
||||
NR_SLAB_RECLAIMABLE,
|
||||
NR_SLAB_UNRECLAIMABLE,
|
||||
NR_PAGETABLE, /* used for pagetables */
|
||||
NR_KERNEL_STACK,
|
||||
NR_KERNEL_STACK_KB, /* measured in KiB */
|
||||
/* Second 128 byte cacheline */
|
||||
NR_UNSTABLE_NFS, /* NFS unstable pages */
|
||||
NR_BOUNCE,
|
||||
NR_VMSCAN_WRITE,
|
||||
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
|
||||
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
|
||||
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
|
||||
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
|
||||
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
|
||||
NR_DIRTIED, /* page dirtyings since bootup */
|
||||
NR_WRITTEN, /* page writings since bootup */
|
||||
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
|
||||
#if IS_ENABLED(CONFIG_ZSMALLOC)
|
||||
NR_ZSPAGES, /* allocated in zsmalloc */
|
||||
#endif
|
||||
@ -151,14 +135,40 @@ enum zone_stat_item {
|
||||
NUMA_LOCAL, /* allocation from local node */
|
||||
NUMA_OTHER, /* allocation from other node */
|
||||
#endif
|
||||
NR_FREE_CMA_PAGES,
|
||||
NR_VM_ZONE_STAT_ITEMS };
|
||||
|
||||
enum node_stat_item {
|
||||
NR_LRU_BASE,
|
||||
NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
|
||||
NR_ACTIVE_ANON, /* " " " " " */
|
||||
NR_INACTIVE_FILE, /* " " " " " */
|
||||
NR_ACTIVE_FILE, /* " " " " " */
|
||||
NR_UNEVICTABLE, /* " " " " " */
|
||||
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
|
||||
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
|
||||
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
|
||||
WORKINGSET_REFAULT,
|
||||
WORKINGSET_ACTIVATE,
|
||||
WORKINGSET_NODERECLAIM,
|
||||
NR_ANON_THPS,
|
||||
NR_ANON_MAPPED, /* Mapped anonymous pages */
|
||||
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
|
||||
only modified from process context */
|
||||
NR_FILE_PAGES,
|
||||
NR_FILE_DIRTY,
|
||||
NR_WRITEBACK,
|
||||
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
|
||||
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
|
||||
NR_SHMEM_THPS,
|
||||
NR_SHMEM_PMDMAPPED,
|
||||
NR_FREE_CMA_PAGES,
|
||||
NR_VM_ZONE_STAT_ITEMS };
|
||||
NR_ANON_THPS,
|
||||
NR_UNSTABLE_NFS, /* NFS unstable pages */
|
||||
NR_VMSCAN_WRITE,
|
||||
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
|
||||
NR_DIRTIED, /* page dirtyings since bootup */
|
||||
NR_WRITTEN, /* page writings since bootup */
|
||||
NR_VM_NODE_STAT_ITEMS
|
||||
};
|
||||
|
||||
/*
|
||||
* We do arithmetic on the LRU lists in various places in the code,
|
||||
@ -215,7 +225,7 @@ struct lruvec {
|
||||
/* Evictions & activations on the inactive file list */
|
||||
atomic_long_t inactive_age;
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct zone *zone;
|
||||
struct pglist_data *pgdat;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -267,6 +277,11 @@ struct per_cpu_pageset {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct per_cpu_nodestat {
|
||||
s8 stat_threshold;
|
||||
s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
|
||||
};
|
||||
|
||||
#endif /* !__GENERATING_BOUNDS.H */
|
||||
|
||||
enum zone_type {
|
||||
@ -348,22 +363,9 @@ struct zone {
|
||||
#ifdef CONFIG_NUMA
|
||||
int node;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
|
||||
* this zone's LRU. Maintained by the pageout code.
|
||||
*/
|
||||
unsigned int inactive_ratio;
|
||||
|
||||
struct pglist_data *zone_pgdat;
|
||||
struct per_cpu_pageset __percpu *pageset;
|
||||
|
||||
/*
|
||||
* This is a per-zone reserve of pages that are not available
|
||||
* to userspace allocations.
|
||||
*/
|
||||
unsigned long totalreserve_pages;
|
||||
|
||||
#ifndef CONFIG_SPARSEMEM
|
||||
/*
|
||||
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
|
||||
@ -372,14 +374,6 @@ struct zone {
|
||||
unsigned long *pageblock_flags;
|
||||
#endif /* CONFIG_SPARSEMEM */
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* zone reclaim becomes active if more unmapped pages exist.
|
||||
*/
|
||||
unsigned long min_unmapped_pages;
|
||||
unsigned long min_slab_pages;
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
|
||||
unsigned long zone_start_pfn;
|
||||
|
||||
@ -472,24 +466,21 @@ struct zone {
|
||||
unsigned long wait_table_hash_nr_entries;
|
||||
unsigned long wait_table_bits;
|
||||
|
||||
/* Write-intensive fields used from the page allocator */
|
||||
ZONE_PADDING(_pad1_)
|
||||
|
||||
/* free areas of different sizes */
|
||||
struct free_area free_area[MAX_ORDER];
|
||||
|
||||
/* zone flags, see below */
|
||||
unsigned long flags;
|
||||
|
||||
/* Write-intensive fields used from the page allocator */
|
||||
/* Primarily protects free_area */
|
||||
spinlock_t lock;
|
||||
|
||||
/* Write-intensive fields used by compaction and vmstats. */
|
||||
ZONE_PADDING(_pad2_)
|
||||
|
||||
/* Write-intensive fields used by page reclaim */
|
||||
|
||||
/* Fields commonly accessed by the page reclaim scanner */
|
||||
spinlock_t lru_lock;
|
||||
struct lruvec lruvec;
|
||||
|
||||
/*
|
||||
* When free pages are below this point, additional steps are taken
|
||||
* when reading the number of free pages to avoid per-cpu counter
|
||||
@ -527,19 +518,18 @@ struct zone {
|
||||
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
enum zone_flags {
|
||||
ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
|
||||
ZONE_CONGESTED, /* zone has many dirty pages backed by
|
||||
enum pgdat_flags {
|
||||
PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
|
||||
* a congested BDI
|
||||
*/
|
||||
ZONE_DIRTY, /* reclaim scanning has recently found
|
||||
PGDAT_DIRTY, /* reclaim scanning has recently found
|
||||
* many dirty file pages at the tail
|
||||
* of the LRU.
|
||||
*/
|
||||
ZONE_WRITEBACK, /* reclaim scanning has recently found
|
||||
PGDAT_WRITEBACK, /* reclaim scanning has recently found
|
||||
* many pages under writeback
|
||||
*/
|
||||
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
|
||||
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
|
||||
};
|
||||
|
||||
static inline unsigned long zone_end_pfn(const struct zone *zone)
|
||||
@ -663,8 +653,9 @@ typedef struct pglist_data {
|
||||
wait_queue_head_t pfmemalloc_wait;
|
||||
struct task_struct *kswapd; /* Protected by
|
||||
mem_hotplug_begin/end() */
|
||||
int kswapd_max_order;
|
||||
enum zone_type classzone_idx;
|
||||
int kswapd_order;
|
||||
enum zone_type kswapd_classzone_idx;
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
int kcompactd_max_order;
|
||||
enum zone_type kcompactd_classzone_idx;
|
||||
@ -681,6 +672,23 @@ typedef struct pglist_data {
|
||||
/* Number of pages migrated during the rate limiting time interval */
|
||||
unsigned long numabalancing_migrate_nr_pages;
|
||||
#endif
|
||||
/*
|
||||
* This is a per-node reserve of pages that are not available
|
||||
* to userspace allocations.
|
||||
*/
|
||||
unsigned long totalreserve_pages;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* zone reclaim becomes active if more unmapped pages exist.
|
||||
*/
|
||||
unsigned long min_unmapped_pages;
|
||||
unsigned long min_slab_pages;
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
/* Write-intensive fields used by page reclaim */
|
||||
ZONE_PADDING(_pad1_)
|
||||
spinlock_t lru_lock;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
/*
|
||||
@ -695,6 +703,23 @@ typedef struct pglist_data {
|
||||
struct list_head split_queue;
|
||||
unsigned long split_queue_len;
|
||||
#endif
|
||||
|
||||
/* Fields commonly accessed by the page reclaim scanner */
|
||||
struct lruvec lruvec;
|
||||
|
||||
/*
|
||||
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
|
||||
* this node's LRU. Maintained by the pageout code.
|
||||
*/
|
||||
unsigned int inactive_ratio;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
ZONE_PADDING(_pad2_)
|
||||
|
||||
/* Per-node vmstats */
|
||||
struct per_cpu_nodestat __percpu *per_cpu_nodestats;
|
||||
atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
|
||||
} pg_data_t;
|
||||
|
||||
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
|
||||
@ -708,6 +733,15 @@ typedef struct pglist_data {
|
||||
|
||||
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
|
||||
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
|
||||
static inline spinlock_t *zone_lru_lock(struct zone *zone)
|
||||
{
|
||||
return &zone->zone_pgdat->lru_lock;
|
||||
}
|
||||
|
||||
static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
|
||||
{
|
||||
return &pgdat->lruvec;
|
||||
}
|
||||
|
||||
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
|
||||
{
|
||||
@ -760,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
|
||||
|
||||
extern void lruvec_init(struct lruvec *lruvec);
|
||||
|
||||
static inline struct zone *lruvec_zone(struct lruvec *lruvec)
|
||||
static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG
|
||||
return lruvec->zone;
|
||||
return lruvec->pgdat;
|
||||
#else
|
||||
return container_of(lruvec, struct zone, lruvec);
|
||||
return container_of(lruvec, struct pglist_data, lruvec);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -73,9 +73,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
|
||||
extern void mark_oom_victim(struct task_struct *tsk);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern void try_oom_reaper(struct task_struct *tsk);
|
||||
extern void wake_oom_reaper(struct task_struct *tsk);
|
||||
#else
|
||||
static inline void try_oom_reaper(struct task_struct *tsk)
|
||||
static inline void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@ -107,27 +107,7 @@ extern void oom_killer_enable(void);
|
||||
|
||||
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
|
||||
|
||||
static inline bool task_will_free_mem(struct task_struct *task)
|
||||
{
|
||||
struct signal_struct *sig = task->signal;
|
||||
|
||||
/*
|
||||
* A coredumping process may sleep for an extended period in exit_mm(),
|
||||
* so the oom killer cannot assume that the process will promptly exit
|
||||
* and release memory.
|
||||
*/
|
||||
if (sig->flags & SIGNAL_GROUP_COREDUMP)
|
||||
return false;
|
||||
|
||||
if (!(task->flags & PF_EXITING))
|
||||
return false;
|
||||
|
||||
/* Make sure that the whole thread group is going down */
|
||||
if (!thread_group_empty(task) && !(sig->flags & SIGNAL_GROUP_EXIT))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
bool task_will_free_mem(struct task_struct *task);
|
||||
|
||||
/* sysctls */
|
||||
extern int sysctl_oom_dump_tasks;
|
||||
|
@ -523,6 +523,7 @@ static inline int get_dumpable(struct mm_struct *mm)
|
||||
#define MMF_HAS_UPROBES 19 /* has uprobes */
|
||||
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
|
||||
#define MMF_OOM_REAPED 21 /* mm has been already reaped */
|
||||
#define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */
|
||||
|
||||
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
|
||||
|
||||
@ -1949,6 +1950,32 @@ static inline int tsk_nr_cpus_allowed(struct task_struct *p)
|
||||
#define TNF_FAULT_LOCAL 0x08
|
||||
#define TNF_MIGRATE_FAIL 0x10
|
||||
|
||||
static inline bool in_vfork(struct task_struct *tsk)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
/*
|
||||
* need RCU to access ->real_parent if CLONE_VM was used along with
|
||||
* CLONE_PARENT.
|
||||
*
|
||||
* We check real_parent->mm == tsk->mm because CLONE_VFORK does not
|
||||
* imply CLONE_VM
|
||||
*
|
||||
* CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
|
||||
* ->real_parent is not necessarily the task doing vfork(), so in
|
||||
* theory we can't rely on task_lock() if we want to dereference it.
|
||||
*
|
||||
* And in this case we can't trust the real_parent->mm == tsk->mm
|
||||
* check, it can be false negative. But we do not care, if init or
|
||||
* another oom-unkillable task does this it should blame itself.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
extern void task_numa_fault(int last_node, int node, int pages, int flags);
|
||||
extern pid_t task_numa_group_id(struct task_struct *p);
|
||||
|
@ -88,7 +88,8 @@ struct kmem_cache {
|
||||
};
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
||||
void *x) {
|
||||
void *x)
|
||||
{
|
||||
void *object = x - (x - page->s_mem) % cache->size;
|
||||
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
|
||||
|
||||
|
@ -104,6 +104,10 @@ struct kmem_cache {
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
@ -119,15 +123,17 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
|
||||
void object_err(struct kmem_cache *s, struct page *page,
|
||||
u8 *object, char *reason);
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
||||
void *x) {
|
||||
void *object = x - (x - page_address(page)) % cache->size;
|
||||
void *last_object = page_address(page) +
|
||||
(page->objects - 1) * cache->size;
|
||||
if (unlikely(object > last_object))
|
||||
return last_object;
|
||||
else
|
||||
return object;
|
||||
void *result = (unlikely(object > last_object)) ? last_object : object;
|
||||
|
||||
result = fixup_red_left(cache, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SLUB_DEF_H */
|
||||
|
@ -157,15 +157,6 @@ enum {
|
||||
#define SWAP_CLUSTER_MAX 32UL
|
||||
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
|
||||
|
||||
/*
|
||||
* Ratio between zone->managed_pages and the "gap" that above the per-zone
|
||||
* "high_wmark". While balancing nodes, We allow kswapd to shrink zones that
|
||||
* do not meet the (high_wmark + gap) watermark, even which already met the
|
||||
* high_wmark, in order to provide better per-zone lru behavior. We are ok to
|
||||
* spend not more than 1% of the memory for this zone balancing "gap".
|
||||
*/
|
||||
#define KSWAPD_ZONE_BALANCE_GAP_RATIO 100
|
||||
|
||||
#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */
|
||||
#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */
|
||||
#define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */
|
||||
@ -317,6 +308,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
|
||||
|
||||
/* linux/mm/vmscan.c */
|
||||
extern unsigned long zone_reclaimable_pages(struct zone *zone);
|
||||
extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
|
||||
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
||||
gfp_t gfp_mask, nodemask_t *mask);
|
||||
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
|
||||
@ -324,9 +316,9 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
unsigned long nr_pages,
|
||||
gfp_t gfp_mask,
|
||||
bool may_swap);
|
||||
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
|
||||
gfp_t gfp_mask, bool noswap,
|
||||
struct zone *zone,
|
||||
pg_data_t *pgdat,
|
||||
unsigned long *nr_scanned);
|
||||
extern unsigned long shrink_all_memory(unsigned long nr_pages);
|
||||
extern int vm_swappiness;
|
||||
@ -334,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
|
||||
extern unsigned long vm_total_pages;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern int zone_reclaim_mode;
|
||||
extern int node_reclaim_mode;
|
||||
extern int sysctl_min_unmapped_ratio;
|
||||
extern int sysctl_min_slab_ratio;
|
||||
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
|
||||
extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
|
||||
#else
|
||||
#define zone_reclaim_mode 0
|
||||
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
|
||||
#define node_reclaim_mode 0
|
||||
static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
|
||||
unsigned int order)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ int arch_update_cpu_topology(void);
|
||||
/*
|
||||
* If the distance between nodes in a system is larger than RECLAIM_DISTANCE
|
||||
* (in whatever arch specific measurement units returned by node_distance())
|
||||
* and zone_reclaim_mode is enabled then the VM will only call zone_reclaim()
|
||||
* and node_reclaim_mode is enabled then the VM will only call node_reclaim()
|
||||
* on nodes within this distance.
|
||||
*/
|
||||
#define RECLAIM_DISTANCE 30
|
||||
|
@ -23,21 +23,23 @@
|
||||
|
||||
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
||||
FOR_ALL_ZONES(PGALLOC),
|
||||
FOR_ALL_ZONES(ALLOCSTALL),
|
||||
FOR_ALL_ZONES(PGSCAN_SKIP),
|
||||
PGFREE, PGACTIVATE, PGDEACTIVATE,
|
||||
PGFAULT, PGMAJFAULT,
|
||||
PGLAZYFREED,
|
||||
FOR_ALL_ZONES(PGREFILL),
|
||||
FOR_ALL_ZONES(PGSTEAL_KSWAPD),
|
||||
FOR_ALL_ZONES(PGSTEAL_DIRECT),
|
||||
FOR_ALL_ZONES(PGSCAN_KSWAPD),
|
||||
FOR_ALL_ZONES(PGSCAN_DIRECT),
|
||||
PGREFILL,
|
||||
PGSTEAL_KSWAPD,
|
||||
PGSTEAL_DIRECT,
|
||||
PGSCAN_KSWAPD,
|
||||
PGSCAN_DIRECT,
|
||||
PGSCAN_DIRECT_THROTTLE,
|
||||
#ifdef CONFIG_NUMA
|
||||
PGSCAN_ZONE_RECLAIM_FAILED,
|
||||
#endif
|
||||
PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
|
||||
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
|
||||
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
|
||||
PAGEOUTRUN, PGROTATED,
|
||||
DROP_PAGECACHE, DROP_SLAB,
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
NUMA_PTE_UPDATES,
|
||||
|
@ -101,25 +101,42 @@ static inline void vm_events_fold_cpu(int cpu)
|
||||
#define count_vm_vmacache_event(x) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define __count_zone_vm_events(item, zone, delta) \
|
||||
__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
|
||||
zone_idx(zone), delta)
|
||||
#define __count_zid_vm_events(item, zid, delta) \
|
||||
__count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta)
|
||||
|
||||
/*
|
||||
* Zone based page accounting with per cpu differentials.
|
||||
* Zone and node-based page accounting with per cpu differentials.
|
||||
*/
|
||||
extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
|
||||
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
|
||||
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
|
||||
|
||||
static inline void zone_page_state_add(long x, struct zone *zone,
|
||||
enum zone_stat_item item)
|
||||
{
|
||||
atomic_long_add(x, &zone->vm_stat[item]);
|
||||
atomic_long_add(x, &vm_stat[item]);
|
||||
atomic_long_add(x, &vm_zone_stat[item]);
|
||||
}
|
||||
|
||||
static inline void node_page_state_add(long x, struct pglist_data *pgdat,
|
||||
enum node_stat_item item)
|
||||
{
|
||||
atomic_long_add(x, &pgdat->vm_stat[item]);
|
||||
atomic_long_add(x, &vm_node_stat[item]);
|
||||
}
|
||||
|
||||
static inline unsigned long global_page_state(enum zone_stat_item item)
|
||||
{
|
||||
long x = atomic_long_read(&vm_stat[item]);
|
||||
long x = atomic_long_read(&vm_zone_stat[item]);
|
||||
#ifdef CONFIG_SMP
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline unsigned long global_node_page_state(enum node_stat_item item)
|
||||
{
|
||||
long x = atomic_long_read(&vm_node_stat[item]);
|
||||
#ifdef CONFIG_SMP
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
@ -160,32 +177,61 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
|
||||
enum node_stat_item item)
|
||||
{
|
||||
long x = atomic_long_read(&pgdat->vm_stat[item]);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpu;
|
||||
for_each_online_cpu(cpu)
|
||||
x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
|
||||
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
extern unsigned long node_page_state(int node, enum zone_stat_item item);
|
||||
|
||||
extern unsigned long sum_zone_node_page_state(int node,
|
||||
enum zone_stat_item item);
|
||||
extern unsigned long node_page_state(struct pglist_data *pgdat,
|
||||
enum node_stat_item item);
|
||||
#else
|
||||
|
||||
#define node_page_state(node, item) global_page_state(item)
|
||||
|
||||
#define sum_zone_node_page_state(node, item) global_page_state(item)
|
||||
#define node_page_state(node, item) global_node_page_state(item)
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
|
||||
#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
|
||||
#define add_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, __d)
|
||||
#define sub_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, -(__d))
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
|
||||
void __inc_zone_page_state(struct page *, enum zone_stat_item);
|
||||
void __dec_zone_page_state(struct page *, enum zone_stat_item);
|
||||
|
||||
void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long);
|
||||
void __inc_node_page_state(struct page *, enum node_stat_item);
|
||||
void __dec_node_page_state(struct page *, enum node_stat_item);
|
||||
|
||||
void mod_zone_page_state(struct zone *, enum zone_stat_item, long);
|
||||
void inc_zone_page_state(struct page *, enum zone_stat_item);
|
||||
void dec_zone_page_state(struct page *, enum zone_stat_item);
|
||||
|
||||
extern void inc_zone_state(struct zone *, enum zone_stat_item);
|
||||
void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
|
||||
void inc_node_page_state(struct page *, enum node_stat_item);
|
||||
void dec_node_page_state(struct page *, enum node_stat_item);
|
||||
|
||||
extern void inc_node_state(struct pglist_data *, enum node_stat_item);
|
||||
extern void __inc_zone_state(struct zone *, enum zone_stat_item);
|
||||
extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
|
||||
extern void dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
extern void __dec_node_state(struct pglist_data *, enum node_stat_item);
|
||||
|
||||
void quiet_vmstat(void);
|
||||
void cpu_vm_stats_fold(int cpu);
|
||||
@ -213,16 +259,34 @@ static inline void __mod_zone_page_state(struct zone *zone,
|
||||
zone_page_state_add(delta, zone, item);
|
||||
}
|
||||
|
||||
static inline void __mod_node_page_state(struct pglist_data *pgdat,
|
||||
enum node_stat_item item, int delta)
|
||||
{
|
||||
node_page_state_add(delta, pgdat, item);
|
||||
}
|
||||
|
||||
static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
{
|
||||
atomic_long_inc(&zone->vm_stat[item]);
|
||||
atomic_long_inc(&vm_stat[item]);
|
||||
atomic_long_inc(&vm_zone_stat[item]);
|
||||
}
|
||||
|
||||
static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
atomic_long_inc(&pgdat->vm_stat[item]);
|
||||
atomic_long_inc(&vm_node_stat[item]);
|
||||
}
|
||||
|
||||
static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
{
|
||||
atomic_long_dec(&zone->vm_stat[item]);
|
||||
atomic_long_dec(&vm_stat[item]);
|
||||
atomic_long_dec(&vm_zone_stat[item]);
|
||||
}
|
||||
|
||||
static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
atomic_long_dec(&pgdat->vm_stat[item]);
|
||||
atomic_long_dec(&vm_node_stat[item]);
|
||||
}
|
||||
|
||||
static inline void __inc_zone_page_state(struct page *page,
|
||||
@ -231,12 +295,26 @@ static inline void __inc_zone_page_state(struct page *page,
|
||||
__inc_zone_state(page_zone(page), item);
|
||||
}
|
||||
|
||||
static inline void __inc_node_page_state(struct page *page,
|
||||
enum node_stat_item item)
|
||||
{
|
||||
__inc_node_state(page_pgdat(page), item);
|
||||
}
|
||||
|
||||
|
||||
static inline void __dec_zone_page_state(struct page *page,
|
||||
enum zone_stat_item item)
|
||||
{
|
||||
__dec_zone_state(page_zone(page), item);
|
||||
}
|
||||
|
||||
static inline void __dec_node_page_state(struct page *page,
|
||||
enum node_stat_item item)
|
||||
{
|
||||
__dec_node_state(page_pgdat(page), item);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* We only use atomic operations to update counters. So there is no need to
|
||||
* disable interrupts.
|
||||
@ -245,7 +323,12 @@ static inline void __dec_zone_page_state(struct page *page,
|
||||
#define dec_zone_page_state __dec_zone_page_state
|
||||
#define mod_zone_page_state __mod_zone_page_state
|
||||
|
||||
#define inc_node_page_state __inc_node_page_state
|
||||
#define dec_node_page_state __dec_node_page_state
|
||||
#define mod_node_page_state __mod_node_page_state
|
||||
|
||||
#define inc_zone_state __inc_zone_state
|
||||
#define inc_node_state __inc_node_state
|
||||
#define dec_zone_state __dec_zone_state
|
||||
|
||||
#define set_pgdat_percpu_threshold(pgdat, callback) { }
|
||||
|
@ -320,7 +320,7 @@ void laptop_mode_timer_fn(unsigned long data);
|
||||
static inline void laptop_sync_completion(void) { }
|
||||
#endif
|
||||
void throttle_vm_writeout(gfp_t gfp_mask);
|
||||
bool zone_dirty_ok(struct zone *zone);
|
||||
bool node_dirty_ok(struct pglist_data *pgdat);
|
||||
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
void wb_domain_exit(struct wb_domain *dom);
|
||||
|
@ -226,26 +226,26 @@ TRACE_EVENT(mm_compaction_try_to_compact_pages,
|
||||
TP_PROTO(
|
||||
int order,
|
||||
gfp_t gfp_mask,
|
||||
enum migrate_mode mode),
|
||||
int prio),
|
||||
|
||||
TP_ARGS(order, gfp_mask, mode),
|
||||
TP_ARGS(order, gfp_mask, prio),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, order)
|
||||
__field(gfp_t, gfp_mask)
|
||||
__field(enum migrate_mode, mode)
|
||||
__field(int, prio)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->order = order;
|
||||
__entry->gfp_mask = gfp_mask;
|
||||
__entry->mode = mode;
|
||||
__entry->prio = prio;
|
||||
),
|
||||
|
||||
TP_printk("order=%d gfp_mask=0x%x mode=%d",
|
||||
TP_printk("order=%d gfp_mask=0x%x priority=%d",
|
||||
__entry->order,
|
||||
__entry->gfp_mask,
|
||||
(int)__entry->mode)
|
||||
__entry->prio)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#define __def_gfpflag_names \
|
||||
{(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \
|
||||
{(unsigned long)GFP_TRANSHUGE_LIGHT, "GFP_TRANSHUGE_LIGHT"}, \
|
||||
{(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\
|
||||
{(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \
|
||||
{(unsigned long)GFP_USER, "GFP_USER"}, \
|
||||
|
@ -55,21 +55,23 @@ TRACE_EVENT(mm_vmscan_kswapd_sleep,
|
||||
|
||||
TRACE_EVENT(mm_vmscan_kswapd_wake,
|
||||
|
||||
TP_PROTO(int nid, int order),
|
||||
TP_PROTO(int nid, int zid, int order),
|
||||
|
||||
TP_ARGS(nid, order),
|
||||
TP_ARGS(nid, zid, order),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, nid )
|
||||
__field( int, zid )
|
||||
__field( int, order )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nid = nid;
|
||||
__entry->zid = zid;
|
||||
__entry->order = order;
|
||||
),
|
||||
|
||||
TP_printk("nid=%d order=%d", __entry->nid, __entry->order)
|
||||
TP_printk("nid=%d zid=%d order=%d", __entry->nid, __entry->zid, __entry->order)
|
||||
);
|
||||
|
||||
TRACE_EVENT(mm_vmscan_wakeup_kswapd,
|
||||
@ -98,47 +100,50 @@ TRACE_EVENT(mm_vmscan_wakeup_kswapd,
|
||||
|
||||
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
|
||||
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
|
||||
|
||||
TP_ARGS(order, may_writepage, gfp_flags),
|
||||
TP_ARGS(order, may_writepage, gfp_flags, classzone_idx),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, order )
|
||||
__field( int, may_writepage )
|
||||
__field( gfp_t, gfp_flags )
|
||||
__field( int, classzone_idx )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->order = order;
|
||||
__entry->may_writepage = may_writepage;
|
||||
__entry->gfp_flags = gfp_flags;
|
||||
__entry->classzone_idx = classzone_idx;
|
||||
),
|
||||
|
||||
TP_printk("order=%d may_writepage=%d gfp_flags=%s",
|
||||
TP_printk("order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d",
|
||||
__entry->order,
|
||||
__entry->may_writepage,
|
||||
show_gfp_flags(__entry->gfp_flags))
|
||||
show_gfp_flags(__entry->gfp_flags),
|
||||
__entry->classzone_idx)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin,
|
||||
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
|
||||
|
||||
TP_ARGS(order, may_writepage, gfp_flags)
|
||||
TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
|
||||
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
|
||||
|
||||
TP_ARGS(order, may_writepage, gfp_flags)
|
||||
TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
|
||||
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
|
||||
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags, int classzone_idx),
|
||||
|
||||
TP_ARGS(order, may_writepage, gfp_flags)
|
||||
TP_ARGS(order, may_writepage, gfp_flags, classzone_idx)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
|
||||
@ -266,16 +271,18 @@ TRACE_EVENT(mm_shrink_slab_end,
|
||||
|
||||
DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
|
||||
|
||||
TP_PROTO(int order,
|
||||
TP_PROTO(int classzone_idx,
|
||||
int order,
|
||||
unsigned long nr_requested,
|
||||
unsigned long nr_scanned,
|
||||
unsigned long nr_taken,
|
||||
isolate_mode_t isolate_mode,
|
||||
int file),
|
||||
|
||||
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
|
||||
TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, classzone_idx)
|
||||
__field(int, order)
|
||||
__field(unsigned long, nr_requested)
|
||||
__field(unsigned long, nr_scanned)
|
||||
@ -285,6 +292,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->classzone_idx = classzone_idx;
|
||||
__entry->order = order;
|
||||
__entry->nr_requested = nr_requested;
|
||||
__entry->nr_scanned = nr_scanned;
|
||||
@ -293,8 +301,9 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
|
||||
__entry->file = file;
|
||||
),
|
||||
|
||||
TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
|
||||
TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d",
|
||||
__entry->isolate_mode,
|
||||
__entry->classzone_idx,
|
||||
__entry->order,
|
||||
__entry->nr_requested,
|
||||
__entry->nr_scanned,
|
||||
@ -304,27 +313,29 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
|
||||
|
||||
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
|
||||
|
||||
TP_PROTO(int order,
|
||||
TP_PROTO(int classzone_idx,
|
||||
int order,
|
||||
unsigned long nr_requested,
|
||||
unsigned long nr_scanned,
|
||||
unsigned long nr_taken,
|
||||
isolate_mode_t isolate_mode,
|
||||
int file),
|
||||
|
||||
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
|
||||
TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
|
||||
|
||||
);
|
||||
|
||||
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
|
||||
|
||||
TP_PROTO(int order,
|
||||
TP_PROTO(int classzone_idx,
|
||||
int order,
|
||||
unsigned long nr_requested,
|
||||
unsigned long nr_scanned,
|
||||
unsigned long nr_taken,
|
||||
isolate_mode_t isolate_mode,
|
||||
int file),
|
||||
|
||||
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
|
||||
TP_ARGS(classzone_idx, order, nr_requested, nr_scanned, nr_taken, isolate_mode, file)
|
||||
|
||||
);
|
||||
|
||||
@ -352,15 +363,14 @@ TRACE_EVENT(mm_vmscan_writepage,
|
||||
|
||||
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
|
||||
|
||||
TP_PROTO(struct zone *zone,
|
||||
TP_PROTO(int nid,
|
||||
unsigned long nr_scanned, unsigned long nr_reclaimed,
|
||||
int priority, int file),
|
||||
|
||||
TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file),
|
||||
TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, nid)
|
||||
__field(int, zid)
|
||||
__field(unsigned long, nr_scanned)
|
||||
__field(unsigned long, nr_reclaimed)
|
||||
__field(int, priority)
|
||||
@ -368,16 +378,15 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nid = zone_to_nid(zone);
|
||||
__entry->zid = zone_idx(zone);
|
||||
__entry->nid = nid;
|
||||
__entry->nr_scanned = nr_scanned;
|
||||
__entry->nr_reclaimed = nr_reclaimed;
|
||||
__entry->priority = priority;
|
||||
__entry->reclaim_flags = trace_shrink_flags(file);
|
||||
),
|
||||
|
||||
TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
|
||||
__entry->nid, __entry->zid,
|
||||
TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
|
||||
__entry->nid,
|
||||
__entry->nr_scanned, __entry->nr_reclaimed,
|
||||
__entry->priority,
|
||||
show_reclaim_flags(__entry->reclaim_flags))
|
||||
|
@ -412,11 +412,11 @@ TRACE_EVENT(global_dirty_state,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nr_dirty = global_page_state(NR_FILE_DIRTY);
|
||||
__entry->nr_writeback = global_page_state(NR_WRITEBACK);
|
||||
__entry->nr_unstable = global_page_state(NR_UNSTABLE_NFS);
|
||||
__entry->nr_dirtied = global_page_state(NR_DIRTIED);
|
||||
__entry->nr_written = global_page_state(NR_WRITTEN);
|
||||
__entry->nr_dirty = global_node_page_state(NR_FILE_DIRTY);
|
||||
__entry->nr_writeback = global_node_page_state(NR_WRITEBACK);
|
||||
__entry->nr_unstable = global_node_page_state(NR_UNSTABLE_NFS);
|
||||
__entry->nr_dirtied = global_node_page_state(NR_DIRTIED);
|
||||
__entry->nr_written = global_node_page_state(NR_WRITTEN);
|
||||
__entry->background_thresh = background_thresh;
|
||||
__entry->dirty_thresh = dirty_thresh;
|
||||
__entry->dirty_limit = global_wb_domain.dirty_limit;
|
||||
|
@ -1034,15 +1034,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
|
||||
{
|
||||
bool need_loop;
|
||||
|
||||
/*
|
||||
* Allow tasks that have access to memory reserves because they have
|
||||
* been OOM killed to get memory anywhere.
|
||||
*/
|
||||
if (unlikely(test_thread_flag(TIF_MEMDIE)))
|
||||
return;
|
||||
if (current->flags & PF_EXITING) /* Let dying task have memory */
|
||||
return;
|
||||
|
||||
task_lock(tsk);
|
||||
/*
|
||||
* Determine if a loop is necessary if another thread is doing
|
||||
|
@ -165,20 +165,12 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
|
||||
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
|
||||
THREAD_SIZE_ORDER);
|
||||
|
||||
if (page)
|
||||
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
|
||||
1 << THREAD_SIZE_ORDER);
|
||||
|
||||
return page ? page_address(page) : NULL;
|
||||
}
|
||||
|
||||
static inline void free_thread_stack(unsigned long *stack)
|
||||
{
|
||||
struct page *page = virt_to_page(stack);
|
||||
|
||||
memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
|
||||
-(1 << THREAD_SIZE_ORDER));
|
||||
__free_pages(page, THREAD_SIZE_ORDER);
|
||||
__free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
|
||||
}
|
||||
# else
|
||||
static struct kmem_cache *thread_stack_cache;
|
||||
@ -223,9 +215,15 @@ static struct kmem_cache *mm_cachep;
|
||||
|
||||
static void account_kernel_stack(unsigned long *stack, int account)
|
||||
{
|
||||
struct zone *zone = page_zone(virt_to_page(stack));
|
||||
/* All stack pages are in the same zone and belong to the same memcg. */
|
||||
struct page *first_page = virt_to_page(stack);
|
||||
|
||||
mod_zone_page_state(zone, NR_KERNEL_STACK, account);
|
||||
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
|
||||
THREAD_SIZE / 1024 * account);
|
||||
|
||||
memcg_kmem_update_page_stat(
|
||||
first_page, MEMCG_KERNEL_STACK_KB,
|
||||
account * (THREAD_SIZE / 1024));
|
||||
}
|
||||
|
||||
void free_task(struct task_struct *tsk)
|
||||
|
@ -42,7 +42,7 @@ bool freezing_slow_path(struct task_struct *p)
|
||||
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
|
||||
return false;
|
||||
|
||||
if (test_thread_flag(TIF_MEMDIE))
|
||||
if (test_tsk_thread_flag(p, TIF_MEMDIE))
|
||||
return false;
|
||||
|
||||
if (pm_nosig_freezing || cgroup_freezing(p))
|
||||
|
@ -308,12 +308,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
if (is_ram == REGION_INTERSECTS)
|
||||
return __va(res->start);
|
||||
|
||||
if (altmap && !IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) {
|
||||
dev_err(dev, "%s: altmap requires CONFIG_SPARSEMEM_VMEMMAP=y\n",
|
||||
__func__);
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
|
||||
if (!ref)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
@ -401,7 +395,6 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
|
||||
altmap->alloc -= nr_pfns;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
|
||||
{
|
||||
/*
|
||||
@ -427,5 +420,4 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
|
||||
|
||||
return pgmap ? pgmap->altmap : NULL;
|
||||
}
|
||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||
#endif /* CONFIG_ZONE_DEVICE */
|
||||
|
@ -1627,11 +1627,11 @@ static unsigned long minimum_image_size(unsigned long saveable)
|
||||
unsigned long size;
|
||||
|
||||
size = global_page_state(NR_SLAB_RECLAIMABLE)
|
||||
+ global_page_state(NR_ACTIVE_ANON)
|
||||
+ global_page_state(NR_INACTIVE_ANON)
|
||||
+ global_page_state(NR_ACTIVE_FILE)
|
||||
+ global_page_state(NR_INACTIVE_FILE)
|
||||
- global_page_state(NR_FILE_MAPPED);
|
||||
+ global_node_page_state(NR_ACTIVE_ANON)
|
||||
+ global_node_page_state(NR_INACTIVE_ANON)
|
||||
+ global_node_page_state(NR_ACTIVE_FILE)
|
||||
+ global_node_page_state(NR_INACTIVE_FILE)
|
||||
- global_node_page_state(NR_FILE_MAPPED);
|
||||
|
||||
return saveable <= size ? 0 : saveable - size;
|
||||
}
|
||||
|
@ -3177,9 +3177,8 @@ void show_regs_print_info(const char *log_lvl)
|
||||
{
|
||||
dump_stack_print_info(log_lvl);
|
||||
|
||||
printk("%stask: %p ti: %p task.ti: %p\n",
|
||||
log_lvl, current, current_thread_info(),
|
||||
task_thread_info(current));
|
||||
printk("%stask: %p task.stack: %p\n",
|
||||
log_lvl, current, task_stack_page(current));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1508,8 +1508,8 @@ static struct ctl_table vm_table[] = {
|
||||
#ifdef CONFIG_NUMA
|
||||
{
|
||||
.procname = "zone_reclaim_mode",
|
||||
.data = &zone_reclaim_mode,
|
||||
.maxlen = sizeof(zone_reclaim_mode),
|
||||
.data = &node_reclaim_mode,
|
||||
.maxlen = sizeof(node_reclaim_mode),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.extra1 = &zero,
|
||||
|
@ -5,9 +5,9 @@ if HAVE_ARCH_KASAN
|
||||
|
||||
config KASAN
|
||||
bool "KASan: runtime memory debugger"
|
||||
depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
|
||||
depends on SLUB || (SLAB && !DEBUG_SLAB)
|
||||
select CONSTRUCTORS
|
||||
select STACKDEPOT if SLAB
|
||||
select STACKDEPOT
|
||||
help
|
||||
Enables kernel address sanitizer - runtime memory debugger,
|
||||
designed to find out-of-bounds accesses and use-after-free bugs.
|
||||
|
@ -144,7 +144,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
|
||||
buf = iov->iov_base + skip;
|
||||
copy = min(bytes, iov->iov_len - skip);
|
||||
|
||||
if (!fault_in_pages_writeable(buf, copy)) {
|
||||
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
|
||||
kaddr = kmap_atomic(page);
|
||||
from = kaddr + offset;
|
||||
|
||||
@ -175,6 +175,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
|
||||
copy = min(bytes, iov->iov_len - skip);
|
||||
}
|
||||
/* Too bad - revert to non-atomic kmap */
|
||||
|
||||
kaddr = kmap(page);
|
||||
from = kaddr + offset;
|
||||
left = __copy_to_user(buf, from, copy);
|
||||
@ -193,6 +194,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
|
||||
bytes -= copy;
|
||||
}
|
||||
kunmap(page);
|
||||
|
||||
done:
|
||||
if (skip == iov->iov_len) {
|
||||
iov++;
|
||||
@ -225,7 +227,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
|
||||
buf = iov->iov_base + skip;
|
||||
copy = min(bytes, iov->iov_len - skip);
|
||||
|
||||
if (!fault_in_pages_readable(buf, copy)) {
|
||||
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
|
||||
kaddr = kmap_atomic(page);
|
||||
to = kaddr + offset;
|
||||
|
||||
@ -256,6 +258,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
|
||||
copy = min(bytes, iov->iov_len - skip);
|
||||
}
|
||||
/* Too bad - revert to non-atomic kmap */
|
||||
|
||||
kaddr = kmap(page);
|
||||
to = kaddr + offset;
|
||||
left = __copy_from_user(to, buf, copy);
|
||||
@ -274,6 +277,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
|
||||
bytes -= copy;
|
||||
}
|
||||
kunmap(page);
|
||||
|
||||
done:
|
||||
if (skip == iov->iov_len) {
|
||||
iov++;
|
||||
|
@ -242,6 +242,7 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
|
||||
*/
|
||||
alloc_flags &= ~GFP_ZONEMASK;
|
||||
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
|
||||
alloc_flags |= __GFP_NOWARN;
|
||||
page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
|
||||
if (page)
|
||||
prealloc = page_address(page);
|
||||
|
@ -681,7 +681,7 @@ config IDLE_PAGE_TRACKING
|
||||
See Documentation/vm/idle_page_tracking.txt for more details.
|
||||
|
||||
config ZONE_DEVICE
|
||||
bool "Device memory (pmem, etc...) hotplug support" if EXPERT
|
||||
bool "Device memory (pmem, etc...) hotplug support"
|
||||
depends on MEMORY_HOTPLUG
|
||||
depends on MEMORY_HOTREMOVE
|
||||
depends on SPARSEMEM_VMEMMAP
|
||||
|
@ -947,24 +947,24 @@ long congestion_wait(int sync, long timeout)
|
||||
EXPORT_SYMBOL(congestion_wait);
|
||||
|
||||
/**
|
||||
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
|
||||
* @zone: A zone to check if it is heavily congested
|
||||
* wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
|
||||
* @pgdat: A pgdat to check if it is heavily congested
|
||||
* @sync: SYNC or ASYNC IO
|
||||
* @timeout: timeout in jiffies
|
||||
*
|
||||
* In the event of a congested backing_dev (any backing_dev) and the given
|
||||
* @zone has experienced recent congestion, this waits for up to @timeout
|
||||
* @pgdat has experienced recent congestion, this waits for up to @timeout
|
||||
* jiffies for either a BDI to exit congestion of the given @sync queue
|
||||
* or a write to complete.
|
||||
*
|
||||
* In the absence of zone congestion, cond_resched() is called to yield
|
||||
* In the absence of pgdat congestion, cond_resched() is called to yield
|
||||
* the processor if necessary but otherwise does not sleep.
|
||||
*
|
||||
* The return value is 0 if the sleep is for the full timeout. Otherwise,
|
||||
* it is the number of jiffies that were still remaining when the function
|
||||
* returned. return_value == timeout implies the function did not sleep.
|
||||
*/
|
||||
long wait_iff_congested(struct zone *zone, int sync, long timeout)
|
||||
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
|
||||
{
|
||||
long ret;
|
||||
unsigned long start = jiffies;
|
||||
@ -973,12 +973,13 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
|
||||
|
||||
/*
|
||||
* If there is no congestion, or heavy congestion is not being
|
||||
* encountered in the current zone, yield if necessary instead
|
||||
* encountered in the current pgdat, yield if necessary instead
|
||||
* of sleeping on the congestion queue
|
||||
*/
|
||||
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
|
||||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
|
||||
!test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
|
||||
cond_resched();
|
||||
|
||||
/* In case we scheduled, work out time remaining */
|
||||
ret = timeout - (jiffies - start);
|
||||
if (ret < 0)
|
||||
|
113
mm/compaction.c
113
mm/compaction.c
@ -331,7 +331,7 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
|
||||
{
|
||||
if (cc->mode == MIGRATE_ASYNC) {
|
||||
if (!spin_trylock_irqsave(lock, *flags)) {
|
||||
cc->contended = COMPACT_CONTENDED_LOCK;
|
||||
cc->contended = true;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@ -365,13 +365,13 @@ static bool compact_unlock_should_abort(spinlock_t *lock,
|
||||
}
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
cc->contended = COMPACT_CONTENDED_SCHED;
|
||||
cc->contended = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (need_resched()) {
|
||||
if (cc->mode == MIGRATE_ASYNC) {
|
||||
cc->contended = COMPACT_CONTENDED_SCHED;
|
||||
cc->contended = true;
|
||||
return true;
|
||||
}
|
||||
cond_resched();
|
||||
@ -394,7 +394,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
|
||||
/* async compaction aborts if contended */
|
||||
if (need_resched()) {
|
||||
if (cc->mode == MIGRATE_ASYNC) {
|
||||
cc->contended = COMPACT_CONTENDED_SCHED;
|
||||
cc->contended = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -646,8 +646,8 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
|
||||
list_for_each_entry(page, &cc->migratepages, lru)
|
||||
count[!!page_is_file_cache(page)]++;
|
||||
|
||||
mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
|
||||
mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
|
||||
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
|
||||
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
|
||||
}
|
||||
|
||||
/* Similar to reclaim, but different enough that they don't share logic */
|
||||
@ -655,12 +655,12 @@ static bool too_many_isolated(struct zone *zone)
|
||||
{
|
||||
unsigned long active, inactive, isolated;
|
||||
|
||||
inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
|
||||
zone_page_state(zone, NR_INACTIVE_ANON);
|
||||
active = zone_page_state(zone, NR_ACTIVE_FILE) +
|
||||
zone_page_state(zone, NR_ACTIVE_ANON);
|
||||
isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
|
||||
zone_page_state(zone, NR_ISOLATED_ANON);
|
||||
inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
|
||||
node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
|
||||
active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
|
||||
node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
|
||||
isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
|
||||
node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
|
||||
|
||||
return isolated > (inactive + active) / 2;
|
||||
}
|
||||
@ -752,7 +752,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
* if contended.
|
||||
*/
|
||||
if (!(low_pfn % SWAP_CLUSTER_MAX)
|
||||
&& compact_unlock_should_abort(&zone->lru_lock, flags,
|
||||
&& compact_unlock_should_abort(zone_lru_lock(zone), flags,
|
||||
&locked, cc))
|
||||
break;
|
||||
|
||||
@ -813,7 +813,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
if (unlikely(__PageMovable(page)) &&
|
||||
!PageIsolated(page)) {
|
||||
if (locked) {
|
||||
spin_unlock_irqrestore(&zone->lru_lock,
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone),
|
||||
flags);
|
||||
locked = false;
|
||||
}
|
||||
@ -836,7 +836,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
|
||||
/* If we already hold the lock, we can skip some rechecking */
|
||||
if (!locked) {
|
||||
locked = compact_trylock_irqsave(&zone->lru_lock,
|
||||
locked = compact_trylock_irqsave(zone_lru_lock(zone),
|
||||
&flags, cc);
|
||||
if (!locked)
|
||||
break;
|
||||
@ -856,7 +856,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
}
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
|
||||
/* Try isolate the page */
|
||||
if (__isolate_lru_page(page, isolate_mode) != 0)
|
||||
@ -899,7 +899,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
*/
|
||||
if (nr_isolated) {
|
||||
if (locked) {
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
locked = false;
|
||||
}
|
||||
acct_isolated(zone, cc);
|
||||
@ -927,7 +927,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
||||
low_pfn = end_pfn;
|
||||
|
||||
if (locked)
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
|
||||
/*
|
||||
* Update the pageblock-skip information and cached scanner pfn,
|
||||
@ -1200,7 +1200,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
|
||||
struct page *page;
|
||||
const isolate_mode_t isolate_mode =
|
||||
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
|
||||
(cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
|
||||
(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
|
||||
|
||||
/*
|
||||
* Start at where we last stopped, or beginning of the zone as
|
||||
@ -1619,14 +1619,11 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
|
||||
trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
|
||||
cc->free_pfn, end_pfn, sync, ret);
|
||||
|
||||
if (ret == COMPACT_CONTENDED)
|
||||
ret = COMPACT_PARTIAL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum compact_result compact_zone_order(struct zone *zone, int order,
|
||||
gfp_t gfp_mask, enum migrate_mode mode, int *contended,
|
||||
gfp_t gfp_mask, enum compact_priority prio,
|
||||
unsigned int alloc_flags, int classzone_idx)
|
||||
{
|
||||
enum compact_result ret;
|
||||
@ -1636,7 +1633,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
|
||||
.order = order,
|
||||
.gfp_mask = gfp_mask,
|
||||
.zone = zone,
|
||||
.mode = mode,
|
||||
.mode = (prio == COMPACT_PRIO_ASYNC) ?
|
||||
MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
|
||||
.alloc_flags = alloc_flags,
|
||||
.classzone_idx = classzone_idx,
|
||||
.direct_compaction = true,
|
||||
@ -1649,7 +1647,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
|
||||
VM_BUG_ON(!list_empty(&cc.freepages));
|
||||
VM_BUG_ON(!list_empty(&cc.migratepages));
|
||||
|
||||
*contended = cc.contended;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1662,50 +1659,38 @@ int sysctl_extfrag_threshold = 500;
|
||||
* @alloc_flags: The allocation flags of the current allocation
|
||||
* @ac: The context of current allocation
|
||||
* @mode: The migration mode for async, sync light, or sync migration
|
||||
* @contended: Return value that determines if compaction was aborted due to
|
||||
* need_resched() or lock contention
|
||||
*
|
||||
* This is the main entry point for direct page compaction.
|
||||
*/
|
||||
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
|
||||
unsigned int alloc_flags, const struct alloc_context *ac,
|
||||
enum migrate_mode mode, int *contended)
|
||||
enum compact_priority prio)
|
||||
{
|
||||
int may_enter_fs = gfp_mask & __GFP_FS;
|
||||
int may_perform_io = gfp_mask & __GFP_IO;
|
||||
struct zoneref *z;
|
||||
struct zone *zone;
|
||||
enum compact_result rc = COMPACT_SKIPPED;
|
||||
int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
|
||||
|
||||
*contended = COMPACT_CONTENDED_NONE;
|
||||
|
||||
/* Check if the GFP flags allow compaction */
|
||||
if (!order || !may_enter_fs || !may_perform_io)
|
||||
if (!may_enter_fs || !may_perform_io)
|
||||
return COMPACT_SKIPPED;
|
||||
|
||||
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode);
|
||||
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
|
||||
|
||||
/* Compact each zone in the list */
|
||||
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
|
||||
ac->nodemask) {
|
||||
enum compact_result status;
|
||||
int zone_contended;
|
||||
|
||||
if (compaction_deferred(zone, order)) {
|
||||
rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
|
||||
continue;
|
||||
}
|
||||
|
||||
status = compact_zone_order(zone, order, gfp_mask, mode,
|
||||
&zone_contended, alloc_flags,
|
||||
ac_classzone_idx(ac));
|
||||
status = compact_zone_order(zone, order, gfp_mask, prio,
|
||||
alloc_flags, ac_classzone_idx(ac));
|
||||
rc = max(status, rc);
|
||||
/*
|
||||
* It takes at least one zone that wasn't lock contended
|
||||
* to clear all_zones_contended.
|
||||
*/
|
||||
all_zones_contended &= zone_contended;
|
||||
|
||||
/* If a normal allocation would succeed, stop compacting */
|
||||
if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
|
||||
@ -1717,59 +1702,29 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
|
||||
* succeeds in this zone.
|
||||
*/
|
||||
compaction_defer_reset(zone, order, false);
|
||||
/*
|
||||
* It is possible that async compaction aborted due to
|
||||
* need_resched() and the watermarks were ok thanks to
|
||||
* somebody else freeing memory. The allocation can
|
||||
* however still fail so we better signal the
|
||||
* need_resched() contention anyway (this will not
|
||||
* prevent the allocation attempt).
|
||||
*/
|
||||
if (zone_contended == COMPACT_CONTENDED_SCHED)
|
||||
*contended = COMPACT_CONTENDED_SCHED;
|
||||
|
||||
goto break_loop;
|
||||
break;
|
||||
}
|
||||
|
||||
if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE ||
|
||||
status == COMPACT_PARTIAL_SKIPPED)) {
|
||||
if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
|
||||
status == COMPACT_PARTIAL_SKIPPED))
|
||||
/*
|
||||
* We think that allocation won't succeed in this zone
|
||||
* so we defer compaction there. If it ends up
|
||||
* succeeding after all, it will be reset.
|
||||
*/
|
||||
defer_compaction(zone, order);
|
||||
}
|
||||
|
||||
/*
|
||||
* We might have stopped compacting due to need_resched() in
|
||||
* async compaction, or due to a fatal signal detected. In that
|
||||
* case do not try further zones and signal need_resched()
|
||||
* contention.
|
||||
* case do not try further zones
|
||||
*/
|
||||
if ((zone_contended == COMPACT_CONTENDED_SCHED)
|
||||
|| fatal_signal_pending(current)) {
|
||||
*contended = COMPACT_CONTENDED_SCHED;
|
||||
goto break_loop;
|
||||
}
|
||||
|
||||
continue;
|
||||
break_loop:
|
||||
/*
|
||||
* We might not have tried all the zones, so be conservative
|
||||
* and assume they are not all lock contended.
|
||||
*/
|
||||
all_zones_contended = 0;
|
||||
break;
|
||||
if ((prio == COMPACT_PRIO_ASYNC && need_resched())
|
||||
|| fatal_signal_pending(current))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If at least one zone wasn't deferred or skipped, we report if all
|
||||
* zones that were tried were lock contended.
|
||||
*/
|
||||
if (rc > COMPACT_INACTIVE && all_zones_contended)
|
||||
*contended = COMPACT_CONTENDED_LOCK;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
16
mm/filemap.c
16
mm/filemap.c
@ -95,8 +95,8 @@
|
||||
* ->swap_lock (try_to_unmap_one)
|
||||
* ->private_lock (try_to_unmap_one)
|
||||
* ->tree_lock (try_to_unmap_one)
|
||||
* ->zone.lru_lock (follow_page->mark_page_accessed)
|
||||
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
|
||||
* ->zone_lru_lock(zone) (follow_page->mark_page_accessed)
|
||||
* ->zone_lru_lock(zone) (check_pte_range->isolate_lru_page)
|
||||
* ->private_lock (page_remove_rmap->set_page_dirty)
|
||||
* ->tree_lock (page_remove_rmap->set_page_dirty)
|
||||
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
|
||||
@ -218,11 +218,11 @@ void __delete_from_page_cache(struct page *page, void *shadow)
|
||||
|
||||
/* hugetlb pages do not participate in page cache accounting. */
|
||||
if (!PageHuge(page))
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
|
||||
if (PageSwapBacked(page)) {
|
||||
__mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
|
||||
if (PageTransHuge(page))
|
||||
__dec_zone_page_state(page, NR_SHMEM_THPS);
|
||||
__dec_node_page_state(page, NR_SHMEM_THPS);
|
||||
} else {
|
||||
VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
|
||||
}
|
||||
@ -568,9 +568,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
|
||||
* hugetlb pages do not participate in page cache accounting.
|
||||
*/
|
||||
if (!PageHuge(new))
|
||||
__inc_zone_page_state(new, NR_FILE_PAGES);
|
||||
__inc_node_page_state(new, NR_FILE_PAGES);
|
||||
if (PageSwapBacked(new))
|
||||
__inc_zone_page_state(new, NR_SHMEM);
|
||||
__inc_node_page_state(new, NR_SHMEM);
|
||||
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
||||
mem_cgroup_migrate(old, new);
|
||||
radix_tree_preload_end();
|
||||
@ -677,7 +677,7 @@ static int __add_to_page_cache_locked(struct page *page,
|
||||
|
||||
/* hugetlb pages do not participate in page cache accounting. */
|
||||
if (!huge)
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
__inc_node_page_state(page, NR_FILE_PAGES);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
if (!huge)
|
||||
mem_cgroup_commit_charge(page, memcg, false, false);
|
||||
|
@ -539,23 +539,26 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
|
||||
}
|
||||
|
||||
/*
|
||||
* If THP is set to always then directly reclaim/compact as necessary
|
||||
* If set to defer then do no reclaim and defer to khugepaged
|
||||
* If THP defrag is set to always then directly reclaim/compact as necessary
|
||||
* If set to defer then do only background reclaim/compact and defer to khugepaged
|
||||
* If set to madvise and the VMA is flagged then directly reclaim/compact
|
||||
* When direct reclaim/compact is allowed, don't retry except for flagged VMA's
|
||||
*/
|
||||
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
|
||||
{
|
||||
gfp_t reclaim_flags = 0;
|
||||
bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
|
||||
|
||||
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags) &&
|
||||
(vma->vm_flags & VM_HUGEPAGE))
|
||||
reclaim_flags = __GFP_DIRECT_RECLAIM;
|
||||
else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
|
||||
reclaim_flags = __GFP_KSWAPD_RECLAIM;
|
||||
else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
|
||||
reclaim_flags = __GFP_DIRECT_RECLAIM;
|
||||
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
|
||||
&transparent_hugepage_flags) && vma_madvised)
|
||||
return GFP_TRANSHUGE;
|
||||
else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
|
||||
&transparent_hugepage_flags))
|
||||
return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
|
||||
else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
|
||||
&transparent_hugepage_flags))
|
||||
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
|
||||
|
||||
return GFP_TRANSHUGE | reclaim_flags;
|
||||
return GFP_TRANSHUGE_LIGHT;
|
||||
}
|
||||
|
||||
/* Caller must hold page table lock. */
|
||||
@ -1249,25 +1252,26 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
/*
|
||||
* Return true if we do MADV_FREE successfully on entire pmd page.
|
||||
* Otherwise, return false.
|
||||
*/
|
||||
bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
pmd_t *pmd, unsigned long addr, unsigned long next)
|
||||
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
pmd_t orig_pmd;
|
||||
struct page *page;
|
||||
struct mm_struct *mm = tlb->mm;
|
||||
int ret = 0;
|
||||
bool ret = false;
|
||||
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (!ptl)
|
||||
goto out_unlocked;
|
||||
|
||||
orig_pmd = *pmd;
|
||||
if (is_huge_zero_pmd(orig_pmd)) {
|
||||
ret = 1;
|
||||
if (is_huge_zero_pmd(orig_pmd))
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = pmd_page(orig_pmd);
|
||||
/*
|
||||
@ -1309,7 +1313,7 @@ int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
set_pmd_at(mm, addr, pmd, orig_pmd);
|
||||
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
|
||||
}
|
||||
ret = 1;
|
||||
ret = true;
|
||||
out:
|
||||
spin_unlock(ptl);
|
||||
out_unlocked:
|
||||
@ -1586,7 +1590,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
|
||||
if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
|
||||
/* Last compound_mapcount is gone. */
|
||||
__dec_zone_page_state(page, NR_ANON_THPS);
|
||||
__dec_node_page_state(page, NR_ANON_THPS);
|
||||
if (TestClearPageDoubleMap(page)) {
|
||||
/* No need in mapcount reference anymore */
|
||||
for (i = 0; i < HPAGE_PMD_NR; i++)
|
||||
@ -1818,7 +1822,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
pgoff_t end = -1;
|
||||
int i;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(head, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
|
||||
|
||||
/* complete memcg works before add pages to LRU */
|
||||
mem_cgroup_split_huge_fixup(head);
|
||||
@ -1848,7 +1852,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
spin_unlock(&head->mapping->tree_lock);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&page_zone(head)->lru_lock, flags);
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
|
||||
unfreeze_page(head);
|
||||
|
||||
@ -2034,7 +2038,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
lru_add_drain();
|
||||
|
||||
/* prevent PageLRU to go away from under us, and freeze lru stats */
|
||||
spin_lock_irqsave(&page_zone(head)->lru_lock, flags);
|
||||
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
|
||||
|
||||
if (mapping) {
|
||||
void **pslot;
|
||||
@ -2061,7 +2065,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
list_del(page_deferred_list(head));
|
||||
}
|
||||
if (mapping)
|
||||
__dec_zone_page_state(page, NR_SHMEM_THPS);
|
||||
__dec_node_page_state(page, NR_SHMEM_THPS);
|
||||
spin_unlock(&pgdata->split_queue_lock);
|
||||
__split_huge_page(page, list, flags);
|
||||
ret = 0;
|
||||
@ -2077,7 +2081,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
|
||||
spin_unlock(&pgdata->split_queue_lock);
|
||||
fail: if (mapping)
|
||||
spin_unlock(&mapping->tree_lock);
|
||||
spin_unlock_irqrestore(&page_zone(head)->lru_lock, flags);
|
||||
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
|
||||
unfreeze_page(head);
|
||||
ret = -EBUSY;
|
||||
}
|
||||
|
@ -4391,7 +4391,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
|
||||
/*
|
||||
* This function is called from memory failure code.
|
||||
* Assume the caller holds page lock of the head page.
|
||||
*/
|
||||
int dequeue_hwpoisoned_huge_page(struct page *hpage)
|
||||
{
|
||||
|
@ -78,7 +78,7 @@ extern unsigned long highest_memmap_pfn;
|
||||
*/
|
||||
extern int isolate_lru_page(struct page *page);
|
||||
extern void putback_lru_page(struct page *page);
|
||||
extern bool zone_reclaimable(struct zone *zone);
|
||||
extern bool pgdat_reclaimable(struct pglist_data *pgdat);
|
||||
|
||||
/*
|
||||
* in mm/rmap.c:
|
||||
@ -185,10 +185,7 @@ struct compact_control {
|
||||
const unsigned int alloc_flags; /* alloc flags of a direct compactor */
|
||||
const int classzone_idx; /* zone index of a direct compactor */
|
||||
struct zone *zone;
|
||||
int contended; /* Signal need_sched() or lock
|
||||
* contention detected during
|
||||
* compaction
|
||||
*/
|
||||
bool contended; /* Signal lock or sched contention */
|
||||
};
|
||||
|
||||
unsigned long
|
||||
@ -433,10 +430,10 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
|
||||
}
|
||||
#endif /* CONFIG_SPARSEMEM */
|
||||
|
||||
#define ZONE_RECLAIM_NOSCAN -2
|
||||
#define ZONE_RECLAIM_FULL -1
|
||||
#define ZONE_RECLAIM_SOME 0
|
||||
#define ZONE_RECLAIM_SUCCESS 1
|
||||
#define NODE_RECLAIM_NOSCAN -2
|
||||
#define NODE_RECLAIM_FULL -1
|
||||
#define NODE_RECLAIM_SOME 0
|
||||
#define NODE_RECLAIM_SUCCESS 1
|
||||
|
||||
extern int hwpoison_filter(struct page *p);
|
||||
|
||||
@ -467,7 +464,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
|
||||
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
|
||||
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
|
||||
#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
|
||||
#define ALLOC_FAIR 0x100 /* fair zone allocation */
|
||||
|
||||
enum ttu_flags;
|
||||
struct tlbflush_unmap_batch;
|
||||
|
@ -7,5 +7,4 @@ CFLAGS_REMOVE_kasan.o = -pg
|
||||
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
|
||||
CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
|
||||
|
||||
obj-y := kasan.o report.o kasan_init.o
|
||||
obj-$(CONFIG_SLAB) += quarantine.o
|
||||
obj-y := kasan.o report.o kasan_init.o quarantine.o
|
||||
|
@ -351,7 +351,6 @@ void kasan_free_pages(struct page *page, unsigned int order)
|
||||
KASAN_FREE_PAGE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
/*
|
||||
* Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
|
||||
* For larger allocations larger redzones are used.
|
||||
@ -373,16 +372,8 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
|
||||
unsigned long *flags)
|
||||
{
|
||||
int redzone_adjust;
|
||||
/* Make sure the adjusted size is still less than
|
||||
* KMALLOC_MAX_CACHE_SIZE.
|
||||
* TODO: this check is only useful for SLAB, but not SLUB. We'll need
|
||||
* to skip it for SLUB when it starts using kasan_cache_create().
|
||||
*/
|
||||
if (*size > KMALLOC_MAX_CACHE_SIZE -
|
||||
sizeof(struct kasan_alloc_meta) -
|
||||
sizeof(struct kasan_free_meta))
|
||||
return;
|
||||
*flags |= SLAB_KASAN;
|
||||
int orig_size = *size;
|
||||
|
||||
/* Add alloc meta. */
|
||||
cache->kasan_info.alloc_meta_offset = *size;
|
||||
*size += sizeof(struct kasan_alloc_meta);
|
||||
@ -395,14 +386,26 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
|
||||
}
|
||||
redzone_adjust = optimal_redzone(cache->object_size) -
|
||||
(*size - cache->object_size);
|
||||
|
||||
if (redzone_adjust > 0)
|
||||
*size += redzone_adjust;
|
||||
*size = min(KMALLOC_MAX_CACHE_SIZE,
|
||||
max(*size,
|
||||
cache->object_size +
|
||||
optimal_redzone(cache->object_size)));
|
||||
|
||||
*size = min(KMALLOC_MAX_SIZE, max(*size, cache->object_size +
|
||||
optimal_redzone(cache->object_size)));
|
||||
|
||||
/*
|
||||
* If the metadata doesn't fit, don't enable KASAN at all.
|
||||
*/
|
||||
if (*size <= cache->kasan_info.alloc_meta_offset ||
|
||||
*size <= cache->kasan_info.free_meta_offset) {
|
||||
cache->kasan_info.alloc_meta_offset = 0;
|
||||
cache->kasan_info.free_meta_offset = 0;
|
||||
*size = orig_size;
|
||||
return;
|
||||
}
|
||||
|
||||
*flags |= SLAB_KASAN;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kasan_cache_shrink(struct kmem_cache *cache)
|
||||
{
|
||||
@ -414,6 +417,14 @@ void kasan_cache_destroy(struct kmem_cache *cache)
|
||||
quarantine_remove_cache(cache);
|
||||
}
|
||||
|
||||
size_t kasan_metadata_size(struct kmem_cache *cache)
|
||||
{
|
||||
return (cache->kasan_info.alloc_meta_offset ?
|
||||
sizeof(struct kasan_alloc_meta) : 0) +
|
||||
(cache->kasan_info.free_meta_offset ?
|
||||
sizeof(struct kasan_free_meta) : 0);
|
||||
}
|
||||
|
||||
void kasan_poison_slab(struct page *page)
|
||||
{
|
||||
kasan_poison_shadow(page_address(page),
|
||||
@ -431,16 +442,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
|
||||
kasan_poison_shadow(object,
|
||||
round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
|
||||
KASAN_KMALLOC_REDZONE);
|
||||
#ifdef CONFIG_SLAB
|
||||
if (cache->flags & SLAB_KASAN) {
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
alloc_info->state = KASAN_STATE_INIT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
static inline int in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return (ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
@ -501,7 +509,6 @@ struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
|
||||
BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
|
||||
return (void *)object + cache->kasan_info.free_meta_offset;
|
||||
}
|
||||
#endif
|
||||
|
||||
void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
|
||||
{
|
||||
@ -522,16 +529,16 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
|
||||
|
||||
bool kasan_slab_free(struct kmem_cache *cache, void *object)
|
||||
{
|
||||
#ifdef CONFIG_SLAB
|
||||
/* RCU slabs could be legally used after free within the RCU period */
|
||||
if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
|
||||
return false;
|
||||
|
||||
if (likely(cache->flags & SLAB_KASAN)) {
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
struct kasan_free_meta *free_info =
|
||||
get_free_info(cache, object);
|
||||
struct kasan_alloc_meta *alloc_info;
|
||||
struct kasan_free_meta *free_info;
|
||||
|
||||
alloc_info = get_alloc_info(cache, object);
|
||||
free_info = get_free_info(cache, object);
|
||||
|
||||
switch (alloc_info->state) {
|
||||
case KASAN_STATE_ALLOC:
|
||||
@ -550,10 +557,6 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object)
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#else
|
||||
kasan_poison_slab_free(cache, object);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
|
||||
@ -576,7 +579,6 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
|
||||
kasan_unpoison_shadow(object, size);
|
||||
kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
|
||||
KASAN_KMALLOC_REDZONE);
|
||||
#ifdef CONFIG_SLAB
|
||||
if (cache->flags & SLAB_KASAN) {
|
||||
struct kasan_alloc_meta *alloc_info =
|
||||
get_alloc_info(cache, object);
|
||||
@ -585,7 +587,6 @@ void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
|
||||
alloc_info->alloc_size = size;
|
||||
set_track(&alloc_info->track, flags);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(kasan_kmalloc);
|
||||
|
||||
|
@ -95,7 +95,6 @@ struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
|
||||
struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
|
||||
const void *object);
|
||||
|
||||
|
||||
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
|
||||
{
|
||||
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
|
||||
@ -110,7 +109,7 @@ static inline bool kasan_report_enabled(void)
|
||||
void kasan_report(unsigned long addr, size_t size,
|
||||
bool is_write, unsigned long ip);
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB)
|
||||
void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
|
||||
void quarantine_reduce(void);
|
||||
void quarantine_remove_cache(struct kmem_cache *cache);
|
||||
|
@ -116,7 +116,6 @@ static inline bool init_task_stack_addr(const void *addr)
|
||||
sizeof(init_thread_union.stack));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
static void print_track(struct kasan_track *track)
|
||||
{
|
||||
pr_err("PID = %u\n", track->pid);
|
||||
@ -130,8 +129,8 @@ static void print_track(struct kasan_track *track)
|
||||
}
|
||||
}
|
||||
|
||||
static void object_err(struct kmem_cache *cache, struct page *page,
|
||||
void *object, char *unused_reason)
|
||||
static void kasan_object_err(struct kmem_cache *cache, struct page *page,
|
||||
void *object, char *unused_reason)
|
||||
{
|
||||
struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
|
||||
struct kasan_free_meta *free_info;
|
||||
@ -162,7 +161,6 @@ static void object_err(struct kmem_cache *cache, struct page *page,
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void print_address_description(struct kasan_access_info *info)
|
||||
{
|
||||
@ -177,7 +175,7 @@ static void print_address_description(struct kasan_access_info *info)
|
||||
struct kmem_cache *cache = page->slab_cache;
|
||||
object = nearest_obj(cache, page,
|
||||
(void *)info->access_addr);
|
||||
object_err(cache, page, object,
|
||||
kasan_object_err(cache, page, object,
|
||||
"kasan: bad access detected");
|
||||
return;
|
||||
}
|
||||
|
@ -480,7 +480,7 @@ void __khugepaged_exit(struct mm_struct *mm)
|
||||
static void release_pte_page(struct page *page)
|
||||
{
|
||||
/* 0 stands for page_is_file_cache(page) == false */
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON + 0);
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON + 0);
|
||||
unlock_page(page);
|
||||
putback_lru_page(page);
|
||||
}
|
||||
@ -576,7 +576,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
||||
goto out;
|
||||
}
|
||||
/* 0 stands for page_is_file_cache(page) == false */
|
||||
inc_zone_page_state(page, NR_ISOLATED_ANON + 0);
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON + 0);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
|
||||
@ -672,10 +672,10 @@ static bool khugepaged_scan_abort(int nid)
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If zone_reclaim_mode is disabled, then no extra effort is made to
|
||||
* If node_reclaim_mode is disabled, then no extra effort is made to
|
||||
* allocate memory locally.
|
||||
*/
|
||||
if (!zone_reclaim_mode)
|
||||
if (!node_reclaim_mode)
|
||||
return false;
|
||||
|
||||
/* If there is a count for this node already, it must be acceptable */
|
||||
@ -694,7 +694,7 @@ static bool khugepaged_scan_abort(int nid)
|
||||
/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
|
||||
static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
|
||||
{
|
||||
return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0);
|
||||
return khugepaged_defrag() ? GFP_TRANSHUGE : GFP_TRANSHUGE_LIGHT;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@ -1483,10 +1483,10 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
__inc_zone_page_state(new_page, NR_SHMEM_THPS);
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
if (nr_none) {
|
||||
__mod_zone_page_state(zone, NR_FILE_PAGES, nr_none);
|
||||
__mod_zone_page_state(zone, NR_SHMEM, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
|
@ -1485,8 +1485,10 @@ static int kmemleak_scan_thread(void *arg)
|
||||
* Wait before the first scan to allow the system to fully initialize.
|
||||
*/
|
||||
if (first_run) {
|
||||
signed long timeout = msecs_to_jiffies(SECS_FIRST_SCAN * 1000);
|
||||
first_run = 0;
|
||||
ssleep(SECS_FIRST_SCAN);
|
||||
while (timeout && !kthread_should_stop())
|
||||
timeout = schedule_timeout_interruptible(timeout);
|
||||
}
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm-generic/sections.h>
|
||||
#include <asm/sections.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -1027,7 +1027,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
|
||||
*out_end = m_end;
|
||||
if (out_nid)
|
||||
*out_nid = m_nid;
|
||||
idx_a++;
|
||||
idx_a--;
|
||||
*idx = (u32)idx_a | (u64)idx_b << 32;
|
||||
return;
|
||||
}
|
||||
@ -1465,15 +1465,16 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
|
||||
return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
|
||||
}
|
||||
|
||||
void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
|
||||
{
|
||||
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
|
||||
struct memblock_region *r;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
/* find out max address */
|
||||
/*
|
||||
* translate the memory @limit size into the max address within one of
|
||||
* the memory memblock regions, if the @limit exceeds the total size
|
||||
* of those regions, max_addr will keep original value ULLONG_MAX
|
||||
*/
|
||||
for_each_memblock(memory, r) {
|
||||
if (limit <= r->size) {
|
||||
max_addr = r->base + limit;
|
||||
@ -1482,6 +1483,22 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
limit -= r->size;
|
||||
}
|
||||
|
||||
return max_addr;
|
||||
}
|
||||
|
||||
void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
{
|
||||
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
max_addr = __find_max_addr(limit);
|
||||
|
||||
/* @limit exceeds the total size of the memory, do nothing */
|
||||
if (max_addr == (phys_addr_t)ULLONG_MAX)
|
||||
return;
|
||||
|
||||
/* truncate both memory and reserved regions */
|
||||
memblock_remove_range(&memblock.memory, max_addr,
|
||||
(phys_addr_t)ULLONG_MAX);
|
||||
@ -1489,6 +1506,36 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
|
||||
(phys_addr_t)ULLONG_MAX);
|
||||
}
|
||||
|
||||
void __init memblock_mem_limit_remove_map(phys_addr_t limit)
|
||||
{
|
||||
struct memblock_type *type = &memblock.memory;
|
||||
phys_addr_t max_addr;
|
||||
int i, ret, start_rgn, end_rgn;
|
||||
|
||||
if (!limit)
|
||||
return;
|
||||
|
||||
max_addr = __find_max_addr(limit);
|
||||
|
||||
/* @limit exceeds the total size of the memory, do nothing */
|
||||
if (max_addr == (phys_addr_t)ULLONG_MAX)
|
||||
return;
|
||||
|
||||
ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
|
||||
&start_rgn, &end_rgn);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
/* remove all the MAP regions above the limit */
|
||||
for (i = end_rgn - 1; i >= start_rgn; i--) {
|
||||
if (!memblock_is_nomap(&type->regions[i]))
|
||||
memblock_remove_region(type, i);
|
||||
}
|
||||
/* truncate the reserved regions */
|
||||
memblock_remove_range(&memblock.reserved, max_addr,
|
||||
(phys_addr_t)ULLONG_MAX);
|
||||
}
|
||||
|
||||
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
|
||||
{
|
||||
unsigned int left = 0, right = type->cnt;
|
||||
|
260
mm/memcontrol.c
260
mm/memcontrol.c
@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = {
|
||||
* their hierarchy representation
|
||||
*/
|
||||
|
||||
struct mem_cgroup_tree_per_zone {
|
||||
struct mem_cgroup_tree_per_node {
|
||||
struct rb_root rb_root;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct mem_cgroup_tree_per_node {
|
||||
struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
|
||||
};
|
||||
|
||||
struct mem_cgroup_tree {
|
||||
struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
|
||||
};
|
||||
@ -323,15 +319,6 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
|
||||
|
||||
#endif /* !CONFIG_SLOB */
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
|
||||
{
|
||||
int nid = zone_to_nid(zone);
|
||||
int zid = zone_idx(zone);
|
||||
|
||||
return &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_css_from_page - css of the memcg associated with a page
|
||||
* @page: page of interest
|
||||
@ -383,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page)
|
||||
return ino;
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
|
||||
static struct mem_cgroup_per_node *
|
||||
mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
|
||||
{
|
||||
int nid = page_to_nid(page);
|
||||
int zid = page_zonenum(page);
|
||||
|
||||
return &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
return memcg->nodeinfo[nid];
|
||||
}
|
||||
|
||||
static struct mem_cgroup_tree_per_zone *
|
||||
soft_limit_tree_node_zone(int nid, int zid)
|
||||
static struct mem_cgroup_tree_per_node *
|
||||
soft_limit_tree_node(int nid)
|
||||
{
|
||||
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
|
||||
return soft_limit_tree.rb_tree_per_node[nid];
|
||||
}
|
||||
|
||||
static struct mem_cgroup_tree_per_zone *
|
||||
static struct mem_cgroup_tree_per_node *
|
||||
soft_limit_tree_from_page(struct page *page)
|
||||
{
|
||||
int nid = page_to_nid(page);
|
||||
int zid = page_zonenum(page);
|
||||
|
||||
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
|
||||
return soft_limit_tree.rb_tree_per_node[nid];
|
||||
}
|
||||
|
||||
static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz,
|
||||
static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
|
||||
struct mem_cgroup_tree_per_node *mctz,
|
||||
unsigned long new_usage_in_excess)
|
||||
{
|
||||
struct rb_node **p = &mctz->rb_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct mem_cgroup_per_zone *mz_node;
|
||||
struct mem_cgroup_per_node *mz_node;
|
||||
|
||||
if (mz->on_tree)
|
||||
return;
|
||||
@ -423,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
return;
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
|
||||
mz_node = rb_entry(parent, struct mem_cgroup_per_node,
|
||||
tree_node);
|
||||
if (mz->usage_in_excess < mz_node->usage_in_excess)
|
||||
p = &(*p)->rb_left;
|
||||
@ -439,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
mz->on_tree = true;
|
||||
}
|
||||
|
||||
static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz)
|
||||
static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
|
||||
struct mem_cgroup_tree_per_node *mctz)
|
||||
{
|
||||
if (!mz->on_tree)
|
||||
return;
|
||||
@ -448,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
mz->on_tree = false;
|
||||
}
|
||||
|
||||
static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz)
|
||||
static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
|
||||
struct mem_cgroup_tree_per_node *mctz)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@ -473,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
|
||||
static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
||||
{
|
||||
unsigned long excess;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
|
||||
mctz = soft_limit_tree_from_page(page);
|
||||
/*
|
||||
@ -482,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
||||
* because their event counter is not touched.
|
||||
*/
|
||||
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
|
||||
mz = mem_cgroup_page_zoneinfo(memcg, page);
|
||||
mz = mem_cgroup_page_nodeinfo(memcg, page);
|
||||
excess = soft_limit_excess(memcg);
|
||||
/*
|
||||
* We have to update the tree if mz is on RB-tree or
|
||||
@ -507,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
||||
|
||||
static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
int nid, zid;
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
int nid;
|
||||
|
||||
for_each_node(nid) {
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
mctz = soft_limit_tree_node_zone(nid, zid);
|
||||
mem_cgroup_remove_exceeded(mz, mctz);
|
||||
}
|
||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
||||
mctz = soft_limit_tree_node(nid);
|
||||
mem_cgroup_remove_exceeded(mz, mctz);
|
||||
}
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
static struct mem_cgroup_per_node *
|
||||
__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
|
||||
{
|
||||
struct rb_node *rightmost = NULL;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
retry:
|
||||
mz = NULL;
|
||||
@ -532,7 +515,7 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
if (!rightmost)
|
||||
goto done; /* Nothing to reclaim from */
|
||||
|
||||
mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
|
||||
mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node);
|
||||
/*
|
||||
* Remove the node now but someone else can add it back,
|
||||
* we will to add it back at the end of reclaim to its correct
|
||||
@ -546,10 +529,10 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
return mz;
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
static struct mem_cgroup_per_node *
|
||||
mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
spin_lock_irq(&mctz->lock);
|
||||
mz = __mem_cgroup_largest_soft_limit_node(mctz);
|
||||
@ -643,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
int nid, unsigned int lru_mask)
|
||||
{
|
||||
unsigned long nr = 0;
|
||||
int zid;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
enum lru_list lru;
|
||||
|
||||
VM_BUG_ON((unsigned)nid >= nr_node_ids);
|
||||
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
enum lru_list lru;
|
||||
|
||||
for_each_lru(lru) {
|
||||
if (!(BIT(lru) & lru_mask))
|
||||
continue;
|
||||
mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
nr += mz->lru_size[lru];
|
||||
}
|
||||
for_each_lru(lru) {
|
||||
if (!(BIT(lru) & lru_mask))
|
||||
continue;
|
||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
||||
nr += mz->lru_size[lru];
|
||||
}
|
||||
return nr;
|
||||
}
|
||||
@ -809,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
||||
rcu_read_lock();
|
||||
|
||||
if (reclaim) {
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone);
|
||||
mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
|
||||
iter = &mz->iter[reclaim->priority];
|
||||
|
||||
if (prev && reclaim->generation != iter->generation)
|
||||
@ -910,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
|
||||
{
|
||||
struct mem_cgroup *memcg = dead_memcg;
|
||||
struct mem_cgroup_reclaim_iter *iter;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
int nid, zid;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
int nid;
|
||||
int i;
|
||||
|
||||
while ((memcg = parent_mem_cgroup(memcg))) {
|
||||
for_each_node(nid) {
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
for (i = 0; i <= DEF_PRIORITY; i++) {
|
||||
iter = &mz->iter[i];
|
||||
cmpxchg(&iter->position,
|
||||
dead_memcg, NULL);
|
||||
}
|
||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
||||
for (i = 0; i <= DEF_PRIORITY; i++) {
|
||||
iter = &mz->iter[i];
|
||||
cmpxchg(&iter->position,
|
||||
dead_memcg, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -943,39 +920,6 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
|
||||
iter != NULL; \
|
||||
iter = mem_cgroup_iter(NULL, iter, NULL))
|
||||
|
||||
/**
|
||||
* mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
|
||||
* @zone: zone of the wanted lruvec
|
||||
* @memcg: memcg of the wanted lruvec
|
||||
*
|
||||
* Returns the lru list vector holding pages for the given @zone and
|
||||
* @mem. This can be the global zone lruvec, if the memory controller
|
||||
* is disabled.
|
||||
*/
|
||||
struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
lruvec = &zone->lruvec;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mz = mem_cgroup_zone_zoneinfo(memcg, zone);
|
||||
lruvec = &mz->lruvec;
|
||||
out:
|
||||
/*
|
||||
* Since a node can be onlined after the mem_cgroup was created,
|
||||
* we have to be prepared to initialize lruvec->zone here;
|
||||
* and if offlined then reonlined, we need to reinitialize it.
|
||||
*/
|
||||
if (unlikely(lruvec->zone != zone))
|
||||
lruvec->zone = zone;
|
||||
return lruvec;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
|
||||
* @page: the page
|
||||
@ -985,14 +929,14 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
|
||||
* and putback protocol: the LRU lock must be held, and the page must
|
||||
* either be PageLRU() or the caller must have isolated/allocated it.
|
||||
*/
|
||||
struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
|
||||
struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
lruvec = &zone->lruvec;
|
||||
lruvec = &pgdat->lruvec;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1004,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
|
||||
if (!memcg)
|
||||
memcg = root_mem_cgroup;
|
||||
|
||||
mz = mem_cgroup_page_zoneinfo(memcg, page);
|
||||
mz = mem_cgroup_page_nodeinfo(memcg, page);
|
||||
lruvec = &mz->lruvec;
|
||||
out:
|
||||
/*
|
||||
@ -1012,8 +956,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
|
||||
* we have to be prepared to initialize lruvec->zone here;
|
||||
* and if offlined then reonlined, we need to reinitialize it.
|
||||
*/
|
||||
if (unlikely(lruvec->zone != zone))
|
||||
lruvec->zone = zone;
|
||||
if (unlikely(lruvec->pgdat != pgdat))
|
||||
lruvec->pgdat = pgdat;
|
||||
return lruvec;
|
||||
}
|
||||
|
||||
@ -1030,17 +974,15 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
|
||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||
int nr_pages)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
unsigned long *lru_size;
|
||||
long size;
|
||||
bool empty;
|
||||
|
||||
__update_lru_size(lruvec, lru, nr_pages);
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
lru_size = mz->lru_size + lru;
|
||||
empty = list_empty(lruvec->lists + lru);
|
||||
|
||||
@ -1276,9 +1218,9 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
* select it. The goal is to allow it to allocate so that it may
|
||||
* quickly exit and free its memory.
|
||||
*/
|
||||
if (fatal_signal_pending(current) || task_will_free_mem(current)) {
|
||||
if (task_will_free_mem(current)) {
|
||||
mark_oom_victim(current);
|
||||
try_oom_reaper(current);
|
||||
wake_oom_reaper(current);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -1433,7 +1375,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
|
||||
#endif
|
||||
|
||||
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||
struct zone *zone,
|
||||
pg_data_t *pgdat,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
@ -1443,7 +1385,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||
unsigned long excess;
|
||||
unsigned long nr_scanned;
|
||||
struct mem_cgroup_reclaim_cookie reclaim = {
|
||||
.zone = zone,
|
||||
.pgdat = pgdat,
|
||||
.priority = 0,
|
||||
};
|
||||
|
||||
@ -1473,8 +1415,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
||||
zone, &nr_scanned);
|
||||
total += mem_cgroup_shrink_node(victim, gfp_mask, false,
|
||||
pgdat, &nr_scanned);
|
||||
*total_scanned += nr_scanned;
|
||||
if (!soft_limit_excess(root_memcg))
|
||||
break;
|
||||
@ -2107,11 +2049,11 @@ static void lock_page_lru(struct page *page, int *isolated)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
if (PageLRU(page)) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_lru(page));
|
||||
*isolated = 1;
|
||||
@ -2126,12 +2068,12 @@ static void unlock_page_lru(struct page *page, int isolated)
|
||||
if (isolated) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
VM_BUG_ON_PAGE(PageLRU(page), page);
|
||||
SetPageLRU(page);
|
||||
add_page_to_lru_list(page, lruvec, page_lru(page));
|
||||
}
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
}
|
||||
|
||||
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
|
||||
@ -2431,7 +2373,7 @@ void memcg_kmem_uncharge(struct page *page, int order)
|
||||
|
||||
/*
|
||||
* Because tail pages are not marked as "used", set it. We're under
|
||||
* zone->lru_lock and migration entries setup in all page mappings.
|
||||
* zone_lru_lock and migration entries setup in all page mappings.
|
||||
*/
|
||||
void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
{
|
||||
@ -2601,22 +2543,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
unsigned long nr_reclaimed = 0;
|
||||
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
|
||||
struct mem_cgroup_per_node *mz, *next_mz = NULL;
|
||||
unsigned long reclaimed;
|
||||
int loop = 0;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
unsigned long excess;
|
||||
unsigned long nr_scanned;
|
||||
|
||||
if (order > 0)
|
||||
return 0;
|
||||
|
||||
mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
|
||||
mctz = soft_limit_tree_node(pgdat->node_id);
|
||||
/*
|
||||
* This loop can run a while, specially if mem_cgroup's continuously
|
||||
* keep exceeding their soft limit and putting the system under
|
||||
@ -2631,7 +2573,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
break;
|
||||
|
||||
nr_scanned = 0;
|
||||
reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
|
||||
reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
|
||||
gfp_mask, &nr_scanned);
|
||||
nr_reclaimed += reclaimed;
|
||||
*total_scanned += nr_scanned;
|
||||
@ -3252,22 +3194,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
{
|
||||
int nid, zid;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
pg_data_t *pgdat;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
struct zone_reclaim_stat *rstat;
|
||||
unsigned long recent_rotated[2] = {0, 0};
|
||||
unsigned long recent_scanned[2] = {0, 0};
|
||||
|
||||
for_each_online_node(nid)
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
|
||||
rstat = &mz->lruvec.reclaim_stat;
|
||||
for_each_online_pgdat(pgdat) {
|
||||
mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
|
||||
rstat = &mz->lruvec.reclaim_stat;
|
||||
|
||||
recent_rotated[0] += rstat->recent_rotated[0];
|
||||
recent_rotated[1] += rstat->recent_rotated[1];
|
||||
recent_scanned[0] += rstat->recent_scanned[0];
|
||||
recent_scanned[1] += rstat->recent_scanned[1];
|
||||
}
|
||||
recent_rotated[0] += rstat->recent_rotated[0];
|
||||
recent_rotated[1] += rstat->recent_rotated[1];
|
||||
recent_scanned[0] += rstat->recent_scanned[0];
|
||||
recent_scanned[1] += rstat->recent_scanned[1];
|
||||
}
|
||||
seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
|
||||
seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
|
||||
seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
|
||||
@ -4147,11 +4088,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
|
||||
return idr_find(&mem_cgroup_idr, id);
|
||||
}
|
||||
|
||||
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
||||
static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
{
|
||||
struct mem_cgroup_per_node *pn;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
int zone, tmp = node;
|
||||
int tmp = node;
|
||||
/*
|
||||
* This routine is called against possible nodes.
|
||||
* But it's BUG to call kmalloc() against offline node.
|
||||
@ -4166,18 +4106,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
||||
if (!pn)
|
||||
return 1;
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
mz = &pn->zoneinfo[zone];
|
||||
lruvec_init(&mz->lruvec);
|
||||
mz->usage_in_excess = 0;
|
||||
mz->on_tree = false;
|
||||
mz->memcg = memcg;
|
||||
}
|
||||
lruvec_init(&pn->lruvec);
|
||||
pn->usage_in_excess = 0;
|
||||
pn->on_tree = false;
|
||||
pn->memcg = memcg;
|
||||
|
||||
memcg->nodeinfo[node] = pn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
||||
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
{
|
||||
kfree(memcg->nodeinfo[node]);
|
||||
}
|
||||
@ -4188,7 +4126,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
|
||||
memcg_wb_domain_exit(memcg);
|
||||
for_each_node(node)
|
||||
free_mem_cgroup_per_zone_info(memcg, node);
|
||||
free_mem_cgroup_per_node_info(memcg, node);
|
||||
free_percpu(memcg->stat);
|
||||
kfree(memcg);
|
||||
}
|
||||
@ -4217,7 +4155,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
goto fail;
|
||||
|
||||
for_each_node(node)
|
||||
if (alloc_mem_cgroup_per_zone_info(memcg, node))
|
||||
if (alloc_mem_cgroup_per_node_info(memcg, node))
|
||||
goto fail;
|
||||
|
||||
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
|
||||
@ -5233,7 +5171,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
|
||||
seq_printf(m, "file %llu\n",
|
||||
(u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE);
|
||||
seq_printf(m, "kernel_stack %llu\n",
|
||||
(u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE);
|
||||
(u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
|
||||
seq_printf(m, "slab %llu\n",
|
||||
(u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
|
||||
stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
|
||||
@ -5820,18 +5758,12 @@ static int __init mem_cgroup_init(void)
|
||||
|
||||
for_each_node(node) {
|
||||
struct mem_cgroup_tree_per_node *rtpn;
|
||||
int zone;
|
||||
|
||||
rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
|
||||
node_online(node) ? node : NUMA_NO_NODE);
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
struct mem_cgroup_tree_per_zone *rtpz;
|
||||
|
||||
rtpz = &rtpn->rb_tree_per_zone[zone];
|
||||
rtpz->rb_root = RB_ROOT;
|
||||
spin_lock_init(&rtpz->lock);
|
||||
}
|
||||
rtpn->rb_root = RB_ROOT;
|
||||
spin_lock_init(&rtpn->lock);
|
||||
soft_limit_tree.rb_tree_per_node[node] = rtpn;
|
||||
}
|
||||
|
||||
|
@ -741,8 +741,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
|
||||
* page->lru because it can be used in other hugepage operations,
|
||||
* such as __unmap_hugepage_range() and gather_surplus_pages().
|
||||
* So instead we use page_mapping() and PageAnon().
|
||||
* We assume that this function is called with page lock held,
|
||||
* so there is no race between isolation and mapping/unmapping.
|
||||
*/
|
||||
if (!(page_mapping(hpage) || PageAnon(hpage))) {
|
||||
res = dequeue_hwpoisoned_huge_page(hpage);
|
||||
@ -1663,7 +1661,7 @@ static int __soft_offline_page(struct page *page, int flags)
|
||||
put_hwpoison_page(page);
|
||||
if (!ret) {
|
||||
LIST_HEAD(pagelist);
|
||||
inc_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
list_add(&page->lru, &pagelist);
|
||||
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
|
||||
@ -1671,7 +1669,7 @@ static int __soft_offline_page(struct page *page, int flags)
|
||||
if (ret) {
|
||||
if (!list_empty(&pagelist)) {
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
putback_lru_page(page);
|
||||
}
|
||||
|
@ -1209,9 +1209,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
|
||||
|
||||
arch_refresh_nodedata(nid, pgdat);
|
||||
} else {
|
||||
/* Reset the nr_zones and classzone_idx to 0 before reuse */
|
||||
/* Reset the nr_zones, order and classzone_idx before reuse */
|
||||
pgdat->nr_zones = 0;
|
||||
pgdat->classzone_idx = 0;
|
||||
pgdat->kswapd_order = 0;
|
||||
pgdat->kswapd_classzone_idx = 0;
|
||||
}
|
||||
|
||||
/* we can use NODE_DATA(nid) from here */
|
||||
@ -1547,6 +1548,37 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct page *new_node_page(struct page *page, unsigned long private,
|
||||
int **result)
|
||||
{
|
||||
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
|
||||
int nid = page_to_nid(page);
|
||||
nodemask_t nmask = node_online_map;
|
||||
struct page *new_page;
|
||||
|
||||
/*
|
||||
* TODO: allocate a destination hugepage from a nearest neighbor node,
|
||||
* accordance with memory policy of the user process if possible. For
|
||||
* now as a simple work-around, we use the next node for destination.
|
||||
*/
|
||||
if (PageHuge(page))
|
||||
return alloc_huge_page_node(page_hstate(compound_head(page)),
|
||||
next_node_in(nid, nmask));
|
||||
|
||||
node_clear(nid, nmask);
|
||||
if (PageHighMem(page)
|
||||
|| (zone_idx(page_zone(page)) == ZONE_MOVABLE))
|
||||
gfp_mask |= __GFP_HIGHMEM;
|
||||
|
||||
new_page = __alloc_pages_nodemask(gfp_mask, 0,
|
||||
node_zonelist(nid, gfp_mask), &nmask);
|
||||
if (!new_page)
|
||||
new_page = __alloc_pages(gfp_mask, 0,
|
||||
node_zonelist(nid, gfp_mask));
|
||||
|
||||
return new_page;
|
||||
}
|
||||
|
||||
#define NR_OFFLINE_AT_ONCE_PAGES (256)
|
||||
static int
|
||||
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
@ -1586,7 +1618,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
put_page(page);
|
||||
list_add_tail(&page->lru, &source);
|
||||
move_pages--;
|
||||
inc_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
|
||||
} else {
|
||||
@ -1610,11 +1642,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* alloc_migrate_target should be improooooved!!
|
||||
* migrate_pages returns # of failed pages.
|
||||
*/
|
||||
ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
|
||||
/* Allocate a new page from the nearest neighbor node */
|
||||
ret = migrate_pages(&source, new_node_page, NULL, 0,
|
||||
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
|
||||
if (ret)
|
||||
putback_movable_pages(&source);
|
||||
|
@ -962,7 +962,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
|
||||
if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
|
||||
if (!isolate_lru_page(page)) {
|
||||
list_add_tail(&page->lru, pagelist);
|
||||
inc_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
}
|
||||
}
|
||||
|
18
mm/mempool.c
18
mm/mempool.c
@ -306,7 +306,7 @@ EXPORT_SYMBOL(mempool_resize);
|
||||
* returns NULL. Note that due to preallocation, this function
|
||||
* *never* fails when called from process contexts. (it might
|
||||
* fail if called from an IRQ context.)
|
||||
* Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported.
|
||||
* Note: using __GFP_ZERO is not supported.
|
||||
*/
|
||||
void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
|
||||
{
|
||||
@ -315,27 +315,16 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
|
||||
wait_queue_t wait;
|
||||
gfp_t gfp_temp;
|
||||
|
||||
/* If oom killed, memory reserves are essential to prevent livelock */
|
||||
VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC);
|
||||
/* No element size to zero on allocation */
|
||||
VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
|
||||
|
||||
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
|
||||
|
||||
gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
|
||||
gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
|
||||
gfp_mask |= __GFP_NOWARN; /* failures are OK */
|
||||
|
||||
gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
|
||||
|
||||
repeat_alloc:
|
||||
if (likely(pool->curr_nr)) {
|
||||
/*
|
||||
* Don't allocate from emergency reserves if there are
|
||||
* elements available. This check is racy, but it will
|
||||
* be rechecked each loop.
|
||||
*/
|
||||
gfp_temp |= __GFP_NOMEMALLOC;
|
||||
}
|
||||
|
||||
element = pool->alloc(gfp_temp, pool->pool_data);
|
||||
if (likely(element != NULL))
|
||||
@ -359,12 +348,11 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
|
||||
* We use gfp mask w/o direct reclaim or IO for the first round. If
|
||||
* alloc failed with that and @pool was empty, retry immediately.
|
||||
*/
|
||||
if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) {
|
||||
if (gfp_temp != gfp_mask) {
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
gfp_temp = gfp_mask;
|
||||
goto repeat_alloc;
|
||||
}
|
||||
gfp_temp = gfp_mask;
|
||||
|
||||
/* We must not sleep if !__GFP_DIRECT_RECLAIM */
|
||||
if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
|
||||
|
39
mm/migrate.c
39
mm/migrate.c
@ -168,7 +168,7 @@ void putback_movable_pages(struct list_head *l)
|
||||
continue;
|
||||
}
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
/*
|
||||
* We isolated non-lru movable page so here we can use
|
||||
@ -501,19 +501,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
|
||||
* new page and drop references to the old page.
|
||||
*
|
||||
* Note that anonymous pages are accounted for
|
||||
* via NR_FILE_PAGES and NR_ANON_PAGES if they
|
||||
* via NR_FILE_PAGES and NR_ANON_MAPPED if they
|
||||
* are mapped to swap space.
|
||||
*/
|
||||
if (newzone != oldzone) {
|
||||
__dec_zone_state(oldzone, NR_FILE_PAGES);
|
||||
__inc_zone_state(newzone, NR_FILE_PAGES);
|
||||
__dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
|
||||
__inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
|
||||
if (PageSwapBacked(page) && !PageSwapCache(page)) {
|
||||
__dec_zone_state(oldzone, NR_SHMEM);
|
||||
__inc_zone_state(newzone, NR_SHMEM);
|
||||
__dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
|
||||
__inc_node_state(newzone->zone_pgdat, NR_SHMEM);
|
||||
}
|
||||
if (dirty && mapping_cap_account_dirty(mapping)) {
|
||||
__dec_zone_state(oldzone, NR_FILE_DIRTY);
|
||||
__inc_zone_state(newzone, NR_FILE_DIRTY);
|
||||
__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
|
||||
__dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
|
||||
__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
|
||||
__inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
|
||||
}
|
||||
}
|
||||
local_irq_enable();
|
||||
@ -1119,7 +1121,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
|
||||
* restored.
|
||||
*/
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
}
|
||||
|
||||
@ -1460,7 +1462,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
|
||||
err = isolate_lru_page(page);
|
||||
if (!err) {
|
||||
list_add_tail(&page->lru, &pagelist);
|
||||
inc_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
}
|
||||
put_and_set:
|
||||
@ -1726,15 +1728,16 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
|
||||
unsigned long nr_migrate_pages)
|
||||
{
|
||||
int z;
|
||||
|
||||
if (!pgdat_reclaimable(pgdat))
|
||||
return false;
|
||||
|
||||
for (z = pgdat->nr_zones - 1; z >= 0; z--) {
|
||||
struct zone *zone = pgdat->node_zones + z;
|
||||
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
|
||||
if (!zone_reclaimable(zone))
|
||||
continue;
|
||||
|
||||
/* Avoid waking kswapd by allocating pages_to_migrate pages. */
|
||||
if (!zone_watermark_ok(zone, 0,
|
||||
high_wmark_pages(zone) +
|
||||
@ -1828,7 +1831,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
|
||||
}
|
||||
|
||||
page_lru = page_is_file_cache(page);
|
||||
mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
|
||||
mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
|
||||
hpage_nr_pages(page));
|
||||
|
||||
/*
|
||||
@ -1886,7 +1889,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
|
||||
if (nr_remaining) {
|
||||
if (!list_empty(&migratepages)) {
|
||||
list_del(&page->lru);
|
||||
dec_zone_page_state(page, NR_ISOLATED_ANON +
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
putback_lru_page(page);
|
||||
}
|
||||
@ -1931,7 +1934,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||
goto out_dropref;
|
||||
|
||||
new_page = alloc_pages_node(node,
|
||||
(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
|
||||
(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
|
||||
HPAGE_PMD_ORDER);
|
||||
if (!new_page)
|
||||
goto out_fail;
|
||||
@ -1979,7 +1982,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||
/* Retake the callers reference and putback on LRU */
|
||||
get_page(page);
|
||||
putback_lru_page(page);
|
||||
mod_zone_page_state(page_zone(page),
|
||||
mod_node_page_state(page_pgdat(page),
|
||||
NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
|
||||
|
||||
goto out_unlock;
|
||||
@ -2030,7 +2033,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
|
||||
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
|
||||
|
||||
mod_zone_page_state(page_zone(page),
|
||||
mod_node_page_state(page_pgdat(page),
|
||||
NR_ISOLATED_ANON + page_lru,
|
||||
-HPAGE_PMD_NR);
|
||||
return isolated;
|
||||
|
12
mm/mlock.c
12
mm/mlock.c
@ -103,7 +103,7 @@ static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
|
||||
if (PageLRU(page)) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
|
||||
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
|
||||
if (getpage)
|
||||
get_page(page);
|
||||
ClearPageLRU(page);
|
||||
@ -188,7 +188,7 @@ unsigned int munlock_vma_page(struct page *page)
|
||||
* might otherwise copy PageMlocked to part of the tail pages before
|
||||
* we clear it in the head page. It also stabilizes hpage_nr_pages().
|
||||
*/
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
|
||||
nr_pages = hpage_nr_pages(page);
|
||||
if (!TestClearPageMlocked(page))
|
||||
@ -197,14 +197,14 @@ unsigned int munlock_vma_page(struct page *page)
|
||||
__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
|
||||
|
||||
if (__munlock_isolate_lru_page(page, true)) {
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
__munlock_isolated_page(page);
|
||||
goto out;
|
||||
}
|
||||
__munlock_isolation_failed(page);
|
||||
|
||||
unlock_out:
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
|
||||
out:
|
||||
return nr_pages - 1;
|
||||
@ -289,7 +289,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
||||
pagevec_init(&pvec_putback, 0);
|
||||
|
||||
/* Phase 1: page isolation */
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
|
||||
@ -315,7 +315,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
|
||||
}
|
||||
delta_munlocked = -nr + pagevec_count(&pvec_putback);
|
||||
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
|
||||
/* Now we can release pins of pages that we are not munlocking */
|
||||
pagevec_release(&pvec_putback);
|
||||
|
20
mm/mmap.c
20
mm/mmap.c
@ -621,7 +621,6 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct vm_area_struct *next = vma->vm_next;
|
||||
struct vm_area_struct *importer = NULL;
|
||||
struct address_space *mapping = NULL;
|
||||
struct rb_root *root = NULL;
|
||||
struct anon_vma *anon_vma = NULL;
|
||||
@ -631,17 +630,25 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
||||
int remove_next = 0;
|
||||
|
||||
if (next && !insert) {
|
||||
struct vm_area_struct *exporter = NULL;
|
||||
struct vm_area_struct *exporter = NULL, *importer = NULL;
|
||||
|
||||
if (end >= next->vm_end) {
|
||||
/*
|
||||
* vma expands, overlapping all the next, and
|
||||
* perhaps the one after too (mprotect case 6).
|
||||
*/
|
||||
again: remove_next = 1 + (end > next->vm_end);
|
||||
remove_next = 1 + (end > next->vm_end);
|
||||
end = next->vm_end;
|
||||
exporter = next;
|
||||
importer = vma;
|
||||
|
||||
/*
|
||||
* If next doesn't have anon_vma, import from vma after
|
||||
* next, if the vma overlaps with it.
|
||||
*/
|
||||
if (remove_next == 2 && next && !next->anon_vma)
|
||||
exporter = next->vm_next;
|
||||
|
||||
} else if (end > next->vm_start) {
|
||||
/*
|
||||
* vma expands, overlapping part of the next:
|
||||
@ -675,7 +682,7 @@ again: remove_next = 1 + (end > next->vm_end);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
vma_adjust_trans_huge(vma, start, end, adjust_next);
|
||||
|
||||
if (file) {
|
||||
@ -796,8 +803,11 @@ again: remove_next = 1 + (end > next->vm_end);
|
||||
* up the code too much to do both in one go.
|
||||
*/
|
||||
next = vma->vm_next;
|
||||
if (remove_next == 2)
|
||||
if (remove_next == 2) {
|
||||
remove_next = 1;
|
||||
end = next->vm_end;
|
||||
goto again;
|
||||
}
|
||||
else if (next)
|
||||
vma_gap_update(next);
|
||||
else
|
||||
|
187
mm/oom_kill.c
187
mm/oom_kill.c
@ -176,11 +176,13 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
|
||||
|
||||
/*
|
||||
* Do not even consider tasks which are explicitly marked oom
|
||||
* unkillable or have been already oom reaped.
|
||||
* unkillable or have been already oom reaped or the are in
|
||||
* the middle of vfork
|
||||
*/
|
||||
adj = (long)p->signal->oom_score_adj;
|
||||
if (adj == OOM_SCORE_ADJ_MIN ||
|
||||
test_bit(MMF_OOM_REAPED, &p->mm->flags)) {
|
||||
test_bit(MMF_OOM_REAPED, &p->mm->flags) ||
|
||||
in_vfork(p)) {
|
||||
task_unlock(p);
|
||||
return 0;
|
||||
}
|
||||
@ -281,10 +283,22 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
|
||||
|
||||
/*
|
||||
* This task already has access to memory reserves and is being killed.
|
||||
* Don't allow any other task to have access to the reserves.
|
||||
* Don't allow any other task to have access to the reserves unless
|
||||
* the task has MMF_OOM_REAPED because chances that it would release
|
||||
* any memory is quite low.
|
||||
*/
|
||||
if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims))
|
||||
return OOM_SCAN_ABORT;
|
||||
if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
|
||||
struct task_struct *p = find_lock_task_mm(task);
|
||||
enum oom_scan_t ret = OOM_SCAN_ABORT;
|
||||
|
||||
if (p) {
|
||||
if (test_bit(MMF_OOM_REAPED, &p->mm->flags))
|
||||
ret = OOM_SCAN_CONTINUE;
|
||||
task_unlock(p);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If task is allocating a lot of memory and has been marked to be
|
||||
@ -415,7 +429,7 @@ bool oom_killer_disabled __read_mostly;
|
||||
* task's threads: if one of those is using this mm then this task was also
|
||||
* using it.
|
||||
*/
|
||||
static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
|
||||
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
@ -554,8 +568,27 @@ static void oom_reap_task(struct task_struct *tsk)
|
||||
schedule_timeout_idle(HZ/10);
|
||||
|
||||
if (attempts > MAX_OOM_REAP_RETRIES) {
|
||||
struct task_struct *p;
|
||||
|
||||
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
|
||||
task_pid_nr(tsk), tsk->comm);
|
||||
|
||||
/*
|
||||
* If we've already tried to reap this task in the past and
|
||||
* failed it probably doesn't make much sense to try yet again
|
||||
* so hide the mm from the oom killer so that it can move on
|
||||
* to another task with a different mm struct.
|
||||
*/
|
||||
p = find_lock_task_mm(tsk);
|
||||
if (p) {
|
||||
if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &p->mm->flags)) {
|
||||
pr_info("oom_reaper: giving up pid:%d (%s)\n",
|
||||
task_pid_nr(tsk), tsk->comm);
|
||||
set_bit(MMF_OOM_REAPED, &p->mm->flags);
|
||||
}
|
||||
task_unlock(p);
|
||||
}
|
||||
|
||||
debug_show_all_locks();
|
||||
}
|
||||
|
||||
@ -594,7 +627,7 @@ static int oom_reaper(void *unused)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wake_oom_reaper(struct task_struct *tsk)
|
||||
void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
if (!oom_reaper_th)
|
||||
return;
|
||||
@ -612,46 +645,6 @@ static void wake_oom_reaper(struct task_struct *tsk)
|
||||
wake_up(&oom_reaper_wait);
|
||||
}
|
||||
|
||||
/* Check if we can reap the given task. This has to be called with stable
|
||||
* tsk->mm
|
||||
*/
|
||||
void try_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
struct mm_struct *mm = tsk->mm;
|
||||
struct task_struct *p;
|
||||
|
||||
if (!mm)
|
||||
return;
|
||||
|
||||
/*
|
||||
* There might be other threads/processes which are either not
|
||||
* dying or even not killable.
|
||||
*/
|
||||
if (atomic_read(&mm->mm_users) > 1) {
|
||||
rcu_read_lock();
|
||||
for_each_process(p) {
|
||||
if (!process_shares_mm(p, mm))
|
||||
continue;
|
||||
if (fatal_signal_pending(p))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the task is exiting make sure the whole thread group
|
||||
* is exiting and cannot acces mm anymore.
|
||||
*/
|
||||
if (signal_group_exit(p->signal))
|
||||
continue;
|
||||
|
||||
/* Give up */
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
wake_oom_reaper(tsk);
|
||||
}
|
||||
|
||||
static int __init oom_init(void)
|
||||
{
|
||||
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
|
||||
@ -663,10 +656,6 @@ static int __init oom_init(void)
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(oom_init)
|
||||
#else
|
||||
static void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
@ -743,6 +732,80 @@ void oom_killer_enable(void)
|
||||
oom_killer_disabled = false;
|
||||
}
|
||||
|
||||
static inline bool __task_will_free_mem(struct task_struct *task)
|
||||
{
|
||||
struct signal_struct *sig = task->signal;
|
||||
|
||||
/*
|
||||
* A coredumping process may sleep for an extended period in exit_mm(),
|
||||
* so the oom killer cannot assume that the process will promptly exit
|
||||
* and release memory.
|
||||
*/
|
||||
if (sig->flags & SIGNAL_GROUP_COREDUMP)
|
||||
return false;
|
||||
|
||||
if (sig->flags & SIGNAL_GROUP_EXIT)
|
||||
return true;
|
||||
|
||||
if (thread_group_empty(task) && (task->flags & PF_EXITING))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether the given task is dying or exiting and likely to
|
||||
* release its address space. This means that all threads and processes
|
||||
* sharing the same mm have to be killed or exiting.
|
||||
* Caller has to make sure that task->mm is stable (hold task_lock or
|
||||
* it operates on the current).
|
||||
*/
|
||||
bool task_will_free_mem(struct task_struct *task)
|
||||
{
|
||||
struct mm_struct *mm = task->mm;
|
||||
struct task_struct *p;
|
||||
bool ret;
|
||||
|
||||
/*
|
||||
* Skip tasks without mm because it might have passed its exit_mm and
|
||||
* exit_oom_victim. oom_reaper could have rescued that but do not rely
|
||||
* on that for now. We can consider find_lock_task_mm in future.
|
||||
*/
|
||||
if (!mm)
|
||||
return false;
|
||||
|
||||
if (!__task_will_free_mem(task))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* This task has already been drained by the oom reaper so there are
|
||||
* only small chances it will free some more
|
||||
*/
|
||||
if (test_bit(MMF_OOM_REAPED, &mm->flags))
|
||||
return false;
|
||||
|
||||
if (atomic_read(&mm->mm_users) <= 1)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* This is really pessimistic but we do not have any reliable way
|
||||
* to check that external processes share with our mm
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for_each_process(p) {
|
||||
if (!process_shares_mm(p, mm))
|
||||
continue;
|
||||
if (same_thread_group(task, p))
|
||||
continue;
|
||||
ret = __task_will_free_mem(p);
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called while holding a reference to p, which will be released upon
|
||||
* returning.
|
||||
@ -765,9 +828,9 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
* its children or threads, just set TIF_MEMDIE so it can die quickly
|
||||
*/
|
||||
task_lock(p);
|
||||
if (p->mm && task_will_free_mem(p)) {
|
||||
if (task_will_free_mem(p)) {
|
||||
mark_oom_victim(p);
|
||||
try_oom_reaper(p);
|
||||
wake_oom_reaper(p);
|
||||
task_unlock(p);
|
||||
put_task_struct(p);
|
||||
return;
|
||||
@ -850,14 +913,18 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
|
||||
continue;
|
||||
if (same_thread_group(p, victim))
|
||||
continue;
|
||||
if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) ||
|
||||
p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
|
||||
if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p)) {
|
||||
/*
|
||||
* We cannot use oom_reaper for the mm shared by this
|
||||
* process because it wouldn't get killed and so the
|
||||
* memory might be still used.
|
||||
* memory might be still used. Hide the mm from the oom
|
||||
* killer to guarantee OOM forward progress.
|
||||
*/
|
||||
can_oom_reap = false;
|
||||
set_bit(MMF_OOM_REAPED, &mm->flags);
|
||||
pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
|
||||
task_pid_nr(victim), victim->comm,
|
||||
task_pid_nr(p), p->comm);
|
||||
continue;
|
||||
}
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
|
||||
@ -939,14 +1006,10 @@ bool out_of_memory(struct oom_control *oc)
|
||||
* If current has a pending SIGKILL or is exiting, then automatically
|
||||
* select it. The goal is to allow it to allocate so that it may
|
||||
* quickly exit and free its memory.
|
||||
*
|
||||
* But don't select if current has already released its mm and cleared
|
||||
* TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur.
|
||||
*/
|
||||
if (current->mm &&
|
||||
(fatal_signal_pending(current) || task_will_free_mem(current))) {
|
||||
if (task_will_free_mem(current)) {
|
||||
mark_oom_victim(current);
|
||||
try_oom_reaper(current);
|
||||
wake_oom_reaper(current);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -267,26 +267,35 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
|
||||
*/
|
||||
|
||||
/**
|
||||
* zone_dirtyable_memory - number of dirtyable pages in a zone
|
||||
* @zone: the zone
|
||||
* node_dirtyable_memory - number of dirtyable pages in a node
|
||||
* @pgdat: the node
|
||||
*
|
||||
* Returns the zone's number of pages potentially available for dirty
|
||||
* page cache. This is the base value for the per-zone dirty limits.
|
||||
* Returns the node's number of pages potentially available for dirty
|
||||
* page cache. This is the base value for the per-node dirty limits.
|
||||
*/
|
||||
static unsigned long zone_dirtyable_memory(struct zone *zone)
|
||||
static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long nr_pages;
|
||||
unsigned long nr_pages = 0;
|
||||
int z;
|
||||
|
||||
for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
struct zone *zone = pgdat->node_zones + z;
|
||||
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
|
||||
nr_pages += zone_page_state(zone, NR_FREE_PAGES);
|
||||
}
|
||||
|
||||
nr_pages = zone_page_state(zone, NR_FREE_PAGES);
|
||||
/*
|
||||
* Pages reserved for the kernel should not be considered
|
||||
* dirtyable, to prevent a situation where reclaim has to
|
||||
* clean pages in order to balance the zones.
|
||||
*/
|
||||
nr_pages -= min(nr_pages, zone->totalreserve_pages);
|
||||
nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
|
||||
|
||||
nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
|
||||
nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
|
||||
nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
|
||||
|
||||
return nr_pages;
|
||||
}
|
||||
@ -299,13 +308,26 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
|
||||
int i;
|
||||
|
||||
for_each_node_state(node, N_HIGH_MEMORY) {
|
||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
||||
struct zone *z = &NODE_DATA(node)->node_zones[i];
|
||||
for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
|
||||
struct zone *z;
|
||||
unsigned long nr_pages;
|
||||
|
||||
if (is_highmem(z))
|
||||
x += zone_dirtyable_memory(z);
|
||||
if (!is_highmem_idx(i))
|
||||
continue;
|
||||
|
||||
z = &NODE_DATA(node)->node_zones[i];
|
||||
if (!populated_zone(z))
|
||||
continue;
|
||||
|
||||
nr_pages = zone_page_state(z, NR_FREE_PAGES);
|
||||
/* watch for underflows */
|
||||
nr_pages -= min(nr_pages, high_wmark_pages(z));
|
||||
nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
|
||||
nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
|
||||
x += nr_pages;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Unreclaimable memory (kernel memory or anonymous memory
|
||||
* without swap) can bring down the dirtyable pages below
|
||||
@ -348,8 +370,8 @@ static unsigned long global_dirtyable_memory(void)
|
||||
*/
|
||||
x -= min(x, totalreserve_pages);
|
||||
|
||||
x += global_page_state(NR_INACTIVE_FILE);
|
||||
x += global_page_state(NR_ACTIVE_FILE);
|
||||
x += global_node_page_state(NR_INACTIVE_FILE);
|
||||
x += global_node_page_state(NR_ACTIVE_FILE);
|
||||
|
||||
if (!vm_highmem_is_dirtyable)
|
||||
x -= highmem_dirtyable_memory(x);
|
||||
@ -445,23 +467,23 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
|
||||
}
|
||||
|
||||
/**
|
||||
* zone_dirty_limit - maximum number of dirty pages allowed in a zone
|
||||
* @zone: the zone
|
||||
* node_dirty_limit - maximum number of dirty pages allowed in a node
|
||||
* @pgdat: the node
|
||||
*
|
||||
* Returns the maximum number of dirty pages allowed in a zone, based
|
||||
* on the zone's dirtyable memory.
|
||||
* Returns the maximum number of dirty pages allowed in a node, based
|
||||
* on the node's dirtyable memory.
|
||||
*/
|
||||
static unsigned long zone_dirty_limit(struct zone *zone)
|
||||
static unsigned long node_dirty_limit(struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long zone_memory = zone_dirtyable_memory(zone);
|
||||
unsigned long node_memory = node_dirtyable_memory(pgdat);
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long dirty;
|
||||
|
||||
if (vm_dirty_bytes)
|
||||
dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
|
||||
zone_memory / global_dirtyable_memory();
|
||||
node_memory / global_dirtyable_memory();
|
||||
else
|
||||
dirty = vm_dirty_ratio * zone_memory / 100;
|
||||
dirty = vm_dirty_ratio * node_memory / 100;
|
||||
|
||||
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
|
||||
dirty += dirty / 4;
|
||||
@ -470,19 +492,22 @@ static unsigned long zone_dirty_limit(struct zone *zone)
|
||||
}
|
||||
|
||||
/**
|
||||
* zone_dirty_ok - tells whether a zone is within its dirty limits
|
||||
* @zone: the zone to check
|
||||
* node_dirty_ok - tells whether a node is within its dirty limits
|
||||
* @pgdat: the node to check
|
||||
*
|
||||
* Returns %true when the dirty pages in @zone are within the zone's
|
||||
* Returns %true when the dirty pages in @pgdat are within the node's
|
||||
* dirty limit, %false if the limit is exceeded.
|
||||
*/
|
||||
bool zone_dirty_ok(struct zone *zone)
|
||||
bool node_dirty_ok(struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long limit = zone_dirty_limit(zone);
|
||||
unsigned long limit = node_dirty_limit(pgdat);
|
||||
unsigned long nr_pages = 0;
|
||||
|
||||
return zone_page_state(zone, NR_FILE_DIRTY) +
|
||||
zone_page_state(zone, NR_UNSTABLE_NFS) +
|
||||
zone_page_state(zone, NR_WRITEBACK) <= limit;
|
||||
nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
|
||||
nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
|
||||
nr_pages += node_page_state(pgdat, NR_WRITEBACK);
|
||||
|
||||
return nr_pages <= limit;
|
||||
}
|
||||
|
||||
int dirty_background_ratio_handler(struct ctl_table *table, int write,
|
||||
@ -1570,10 +1595,10 @@ static void balance_dirty_pages(struct address_space *mapping,
|
||||
* written to the server's write cache, but has not yet
|
||||
* been flushed to permanent storage.
|
||||
*/
|
||||
nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
|
||||
global_page_state(NR_UNSTABLE_NFS);
|
||||
nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
|
||||
global_node_page_state(NR_UNSTABLE_NFS);
|
||||
gdtc->avail = global_dirtyable_memory();
|
||||
gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
|
||||
gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
|
||||
|
||||
domain_dirty_limits(gdtc);
|
||||
|
||||
@ -1910,8 +1935,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
|
||||
* as we're trying to decide whether to put more under writeback.
|
||||
*/
|
||||
gdtc->avail = global_dirtyable_memory();
|
||||
gdtc->dirty = global_page_state(NR_FILE_DIRTY) +
|
||||
global_page_state(NR_UNSTABLE_NFS);
|
||||
gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
|
||||
global_node_page_state(NR_UNSTABLE_NFS);
|
||||
domain_dirty_limits(gdtc);
|
||||
|
||||
if (gdtc->dirty > gdtc->bg_thresh)
|
||||
@ -1955,8 +1980,8 @@ void throttle_vm_writeout(gfp_t gfp_mask)
|
||||
*/
|
||||
dirty_thresh += dirty_thresh / 10; /* wheeee... */
|
||||
|
||||
if (global_page_state(NR_UNSTABLE_NFS) +
|
||||
global_page_state(NR_WRITEBACK) <= dirty_thresh)
|
||||
if (global_node_page_state(NR_UNSTABLE_NFS) +
|
||||
global_node_page_state(NR_WRITEBACK) <= dirty_thresh)
|
||||
break;
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
||||
|
||||
@ -1984,8 +2009,8 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
|
||||
void laptop_mode_timer_fn(unsigned long data)
|
||||
{
|
||||
struct request_queue *q = (struct request_queue *)data;
|
||||
int nr_pages = global_page_state(NR_FILE_DIRTY) +
|
||||
global_page_state(NR_UNSTABLE_NFS);
|
||||
int nr_pages = global_node_page_state(NR_FILE_DIRTY) +
|
||||
global_node_page_state(NR_UNSTABLE_NFS);
|
||||
struct bdi_writeback *wb;
|
||||
|
||||
/*
|
||||
@ -2436,8 +2461,9 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
|
||||
wb = inode_to_wb(inode);
|
||||
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
|
||||
__inc_zone_page_state(page, NR_FILE_DIRTY);
|
||||
__inc_zone_page_state(page, NR_DIRTIED);
|
||||
__inc_node_page_state(page, NR_FILE_DIRTY);
|
||||
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
__inc_node_page_state(page, NR_DIRTIED);
|
||||
__inc_wb_stat(wb, WB_RECLAIMABLE);
|
||||
__inc_wb_stat(wb, WB_DIRTIED);
|
||||
task_io_account_write(PAGE_SIZE);
|
||||
@ -2457,7 +2483,8 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
|
||||
{
|
||||
if (mapping_cap_account_dirty(mapping)) {
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
|
||||
dec_zone_page_state(page, NR_FILE_DIRTY);
|
||||
dec_node_page_state(page, NR_FILE_DIRTY);
|
||||
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
dec_wb_stat(wb, WB_RECLAIMABLE);
|
||||
task_io_account_cancelled_write(PAGE_SIZE);
|
||||
}
|
||||
@ -2525,7 +2552,7 @@ void account_page_redirty(struct page *page)
|
||||
|
||||
wb = unlocked_inode_to_wb_begin(inode, &locked);
|
||||
current->nr_dirtied--;
|
||||
dec_zone_page_state(page, NR_DIRTIED);
|
||||
dec_node_page_state(page, NR_DIRTIED);
|
||||
dec_wb_stat(wb, WB_DIRTIED);
|
||||
unlocked_inode_to_wb_end(inode, locked);
|
||||
}
|
||||
@ -2713,7 +2740,8 @@ int clear_page_dirty_for_io(struct page *page)
|
||||
wb = unlocked_inode_to_wb_begin(inode, &locked);
|
||||
if (TestClearPageDirty(page)) {
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
|
||||
dec_zone_page_state(page, NR_FILE_DIRTY);
|
||||
dec_node_page_state(page, NR_FILE_DIRTY);
|
||||
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
dec_wb_stat(wb, WB_RECLAIMABLE);
|
||||
ret = 1;
|
||||
}
|
||||
@ -2759,8 +2787,9 @@ int test_clear_page_writeback(struct page *page)
|
||||
}
|
||||
if (ret) {
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_WRITEBACK);
|
||||
inc_zone_page_state(page, NR_WRITTEN);
|
||||
dec_node_page_state(page, NR_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
inc_node_page_state(page, NR_WRITTEN);
|
||||
}
|
||||
unlock_page_memcg(page);
|
||||
return ret;
|
||||
@ -2813,7 +2842,8 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
|
||||
}
|
||||
if (!ret) {
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
|
||||
inc_zone_page_state(page, NR_WRITEBACK);
|
||||
inc_node_page_state(page, NR_WRITEBACK);
|
||||
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
|
||||
}
|
||||
unlock_page_memcg(page);
|
||||
return ret;
|
||||
|
559
mm/page_alloc.c
559
mm/page_alloc.c
@ -295,14 +295,6 @@ static inline bool __meminit early_page_uninitialised(unsigned long pfn)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
|
||||
{
|
||||
if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false when the remaining initialisation should be deferred until
|
||||
* later in the boot cycle when it can be parallelised.
|
||||
@ -342,11 +334,6 @@ static inline bool early_page_uninitialised(unsigned long pfn)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool update_defer_init(pg_data_t *pgdat,
|
||||
unsigned long pfn, unsigned long zone_end,
|
||||
unsigned long *nr_initialised)
|
||||
@ -1091,9 +1078,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
|
||||
|
||||
spin_lock(&zone->lock);
|
||||
isolated_pageblocks = has_isolate_pageblock(zone);
|
||||
nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
|
||||
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
|
||||
if (nr_scanned)
|
||||
__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
|
||||
|
||||
while (count) {
|
||||
struct page *page;
|
||||
@ -1148,9 +1135,9 @@ static void free_one_page(struct zone *zone,
|
||||
{
|
||||
unsigned long nr_scanned;
|
||||
spin_lock(&zone->lock);
|
||||
nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
|
||||
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
|
||||
if (nr_scanned)
|
||||
__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
|
||||
|
||||
if (unlikely(has_isolate_pageblock(zone) ||
|
||||
is_migrate_isolate(migratetype))) {
|
||||
@ -2517,7 +2504,10 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
||||
zone->free_area[order].nr_free--;
|
||||
rmv_page_order(page);
|
||||
|
||||
/* Set the pageblock if the isolated page is at least a pageblock */
|
||||
/*
|
||||
* Set the pageblock if the isolated page is at least half of a
|
||||
* pageblock
|
||||
*/
|
||||
if (order >= pageblock_order - 1) {
|
||||
struct page *endpage = page + (1 << order) - 1;
|
||||
for (; page < endpage; page += pageblock_nr_pages) {
|
||||
@ -2597,7 +2587,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
||||
else
|
||||
page = list_first_entry(list, struct page, lru);
|
||||
|
||||
__dec_zone_state(zone, NR_ALLOC_BATCH);
|
||||
list_del(&page->lru);
|
||||
pcp->count--;
|
||||
|
||||
@ -2623,16 +2612,11 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
||||
spin_unlock(&zone->lock);
|
||||
if (!page)
|
||||
goto failed;
|
||||
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
|
||||
__mod_zone_freepage_state(zone, -(1 << order),
|
||||
get_pcppage_migratetype(page));
|
||||
}
|
||||
|
||||
if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
|
||||
!test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
|
||||
set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
|
||||
|
||||
__count_zone_vm_events(PGALLOC, zone, 1 << order);
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone, gfp_flags);
|
||||
local_irq_restore(flags);
|
||||
|
||||
@ -2842,40 +2826,18 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static bool zone_local(struct zone *local_zone, struct zone *zone)
|
||||
{
|
||||
return local_zone->node == zone->node;
|
||||
}
|
||||
|
||||
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
|
||||
{
|
||||
return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
|
||||
RECLAIM_DISTANCE;
|
||||
}
|
||||
#else /* CONFIG_NUMA */
|
||||
static bool zone_local(struct zone *local_zone, struct zone *zone)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
static void reset_alloc_batches(struct zone *preferred_zone)
|
||||
{
|
||||
struct zone *zone = preferred_zone->zone_pgdat->node_zones;
|
||||
|
||||
do {
|
||||
mod_zone_page_state(zone, NR_ALLOC_BATCH,
|
||||
high_wmark_pages(zone) - low_wmark_pages(zone) -
|
||||
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
|
||||
clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
|
||||
} while (zone++ != preferred_zone);
|
||||
}
|
||||
|
||||
/*
|
||||
* get_page_from_freelist goes through the zonelist trying to allocate
|
||||
* a page.
|
||||
@ -2886,10 +2848,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
|
||||
{
|
||||
struct zoneref *z = ac->preferred_zoneref;
|
||||
struct zone *zone;
|
||||
bool fair_skipped = false;
|
||||
bool apply_fair = (alloc_flags & ALLOC_FAIR);
|
||||
struct pglist_data *last_pgdat_dirty_limit = NULL;
|
||||
|
||||
zonelist_scan:
|
||||
/*
|
||||
* Scan zonelist, looking for a zone with enough free.
|
||||
* See also __cpuset_node_allowed() comment in kernel/cpuset.c.
|
||||
@ -2903,51 +2863,34 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
|
||||
(alloc_flags & ALLOC_CPUSET) &&
|
||||
!__cpuset_zone_allowed(zone, gfp_mask))
|
||||
continue;
|
||||
/*
|
||||
* Distribute pages in proportion to the individual
|
||||
* zone size to ensure fair page aging. The zone a
|
||||
* page was allocated in should have no effect on the
|
||||
* time the page has in memory before being reclaimed.
|
||||
*/
|
||||
if (apply_fair) {
|
||||
if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
|
||||
fair_skipped = true;
|
||||
continue;
|
||||
}
|
||||
if (!zone_local(ac->preferred_zoneref->zone, zone)) {
|
||||
if (fair_skipped)
|
||||
goto reset_fair;
|
||||
apply_fair = false;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* When allocating a page cache page for writing, we
|
||||
* want to get it from a zone that is within its dirty
|
||||
* limit, such that no single zone holds more than its
|
||||
* want to get it from a node that is within its dirty
|
||||
* limit, such that no single node holds more than its
|
||||
* proportional share of globally allowed dirty pages.
|
||||
* The dirty limits take into account the zone's
|
||||
* The dirty limits take into account the node's
|
||||
* lowmem reserves and high watermark so that kswapd
|
||||
* should be able to balance it without having to
|
||||
* write pages from its LRU list.
|
||||
*
|
||||
* This may look like it could increase pressure on
|
||||
* lower zones by failing allocations in higher zones
|
||||
* before they are full. But the pages that do spill
|
||||
* over are limited as the lower zones are protected
|
||||
* by this very same mechanism. It should not become
|
||||
* a practical burden to them.
|
||||
*
|
||||
* XXX: For now, allow allocations to potentially
|
||||
* exceed the per-zone dirty limit in the slowpath
|
||||
* exceed the per-node dirty limit in the slowpath
|
||||
* (spread_dirty_pages unset) before going into reclaim,
|
||||
* which is important when on a NUMA setup the allowed
|
||||
* zones are together not big enough to reach the
|
||||
* nodes are together not big enough to reach the
|
||||
* global limit. The proper fix for these situations
|
||||
* will require awareness of zones in the
|
||||
* will require awareness of nodes in the
|
||||
* dirty-throttling and the flusher threads.
|
||||
*/
|
||||
if (ac->spread_dirty_pages && !zone_dirty_ok(zone))
|
||||
continue;
|
||||
if (ac->spread_dirty_pages) {
|
||||
if (last_pgdat_dirty_limit == zone->zone_pgdat)
|
||||
continue;
|
||||
|
||||
if (!node_dirty_ok(zone->zone_pgdat)) {
|
||||
last_pgdat_dirty_limit = zone->zone_pgdat;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
|
||||
if (!zone_watermark_fast(zone, order, mark,
|
||||
@ -2959,16 +2902,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
|
||||
if (alloc_flags & ALLOC_NO_WATERMARKS)
|
||||
goto try_this_zone;
|
||||
|
||||
if (zone_reclaim_mode == 0 ||
|
||||
if (node_reclaim_mode == 0 ||
|
||||
!zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
|
||||
continue;
|
||||
|
||||
ret = zone_reclaim(zone, gfp_mask, order);
|
||||
ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
|
||||
switch (ret) {
|
||||
case ZONE_RECLAIM_NOSCAN:
|
||||
case NODE_RECLAIM_NOSCAN:
|
||||
/* did not scan */
|
||||
continue;
|
||||
case ZONE_RECLAIM_FULL:
|
||||
case NODE_RECLAIM_FULL:
|
||||
/* scanned but unreclaimable */
|
||||
continue;
|
||||
default:
|
||||
@ -2998,23 +2941,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The first pass makes sure allocations are spread fairly within the
|
||||
* local node. However, the local node might have free pages left
|
||||
* after the fairness batches are exhausted, and remote zones haven't
|
||||
* even been considered yet. Try once more without fairness, and
|
||||
* include remote zones now, before entering the slowpath and waking
|
||||
* kswapd: prefer spilling to a remote zone over swapping locally.
|
||||
*/
|
||||
if (fair_skipped) {
|
||||
reset_fair:
|
||||
apply_fair = false;
|
||||
fair_skipped = false;
|
||||
reset_alloc_batches(ac->preferred_zoneref->zone);
|
||||
z = ac->preferred_zoneref;
|
||||
goto zonelist_scan;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -3159,7 +3085,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
|
||||
return page;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Maximum number of compaction retries wit a progress before OOM
|
||||
* killer is consider as the only way to move forward.
|
||||
@ -3171,17 +3096,16 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
|
||||
static struct page *
|
||||
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
unsigned int alloc_flags, const struct alloc_context *ac,
|
||||
enum migrate_mode mode, enum compact_result *compact_result)
|
||||
enum compact_priority prio, enum compact_result *compact_result)
|
||||
{
|
||||
struct page *page;
|
||||
int contended_compaction;
|
||||
|
||||
if (!order)
|
||||
return NULL;
|
||||
|
||||
current->flags |= PF_MEMALLOC;
|
||||
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
|
||||
mode, &contended_compaction);
|
||||
prio);
|
||||
current->flags &= ~PF_MEMALLOC;
|
||||
|
||||
if (*compact_result <= COMPACT_INACTIVE)
|
||||
@ -3193,8 +3117,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
*/
|
||||
count_vm_event(COMPACTSTALL);
|
||||
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
|
||||
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
|
||||
|
||||
if (page) {
|
||||
struct zone *zone = page_zone(page);
|
||||
@ -3211,24 +3134,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
*/
|
||||
count_vm_event(COMPACTFAIL);
|
||||
|
||||
/*
|
||||
* In all zones where compaction was attempted (and not
|
||||
* deferred or skipped), lock contention has been detected.
|
||||
* For THP allocation we do not want to disrupt the others
|
||||
* so we fallback to base pages instead.
|
||||
*/
|
||||
if (contended_compaction == COMPACT_CONTENDED_LOCK)
|
||||
*compact_result = COMPACT_CONTENDED;
|
||||
|
||||
/*
|
||||
* If compaction was aborted due to need_resched(), we do not
|
||||
* want to further increase allocation latency, unless it is
|
||||
* khugepaged trying to collapse.
|
||||
*/
|
||||
if (contended_compaction == COMPACT_CONTENDED_SCHED
|
||||
&& !(current->flags & PF_KTHREAD))
|
||||
*compact_result = COMPACT_CONTENDED;
|
||||
|
||||
cond_resched();
|
||||
|
||||
return NULL;
|
||||
@ -3236,7 +3141,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
|
||||
static inline bool
|
||||
should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
||||
enum compact_result compact_result, enum migrate_mode *migrate_mode,
|
||||
enum compact_result compact_result,
|
||||
enum compact_priority *compact_priority,
|
||||
int compaction_retries)
|
||||
{
|
||||
int max_retries = MAX_COMPACT_RETRIES;
|
||||
@ -3247,11 +3153,11 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
||||
/*
|
||||
* compaction considers all the zone as desperately out of memory
|
||||
* so it doesn't really make much sense to retry except when the
|
||||
* failure could be caused by weak migration mode.
|
||||
* failure could be caused by insufficient priority
|
||||
*/
|
||||
if (compaction_failed(compact_result)) {
|
||||
if (*migrate_mode == MIGRATE_ASYNC) {
|
||||
*migrate_mode = MIGRATE_SYNC_LIGHT;
|
||||
if (*compact_priority > MIN_COMPACT_PRIORITY) {
|
||||
(*compact_priority)--;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -3285,7 +3191,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
|
||||
static inline struct page *
|
||||
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
unsigned int alloc_flags, const struct alloc_context *ac,
|
||||
enum migrate_mode mode, enum compact_result *compact_result)
|
||||
enum compact_priority prio, enum compact_result *compact_result)
|
||||
{
|
||||
*compact_result = COMPACT_SKIPPED;
|
||||
return NULL;
|
||||
@ -3294,7 +3200,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
|
||||
static inline bool
|
||||
should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
|
||||
enum compact_result compact_result,
|
||||
enum migrate_mode *migrate_mode,
|
||||
enum compact_priority *compact_priority,
|
||||
int compaction_retries)
|
||||
{
|
||||
struct zone *zone;
|
||||
@ -3362,8 +3268,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
|
||||
return NULL;
|
||||
|
||||
retry:
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
|
||||
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
|
||||
|
||||
/*
|
||||
* If an allocation failed after direct reclaim, it could be because
|
||||
@ -3384,10 +3289,14 @@ static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac)
|
||||
{
|
||||
struct zoneref *z;
|
||||
struct zone *zone;
|
||||
pg_data_t *last_pgdat = NULL;
|
||||
|
||||
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
|
||||
ac->high_zoneidx, ac->nodemask)
|
||||
wakeup_kswapd(zone, order, ac_classzone_idx(ac));
|
||||
ac->high_zoneidx, ac->nodemask) {
|
||||
if (last_pgdat != zone->zone_pgdat)
|
||||
wakeup_kswapd(zone, order, ac->high_zoneidx);
|
||||
last_pgdat = zone->zone_pgdat;
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
@ -3421,16 +3330,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
|
||||
} else if (unlikely(rt_task(current)) && !in_interrupt())
|
||||
alloc_flags |= ALLOC_HARDER;
|
||||
|
||||
if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
|
||||
if (gfp_mask & __GFP_MEMALLOC)
|
||||
alloc_flags |= ALLOC_NO_WATERMARKS;
|
||||
else if (in_serving_softirq() && (current->flags & PF_MEMALLOC))
|
||||
alloc_flags |= ALLOC_NO_WATERMARKS;
|
||||
else if (!in_interrupt() &&
|
||||
((current->flags & PF_MEMALLOC) ||
|
||||
unlikely(test_thread_flag(TIF_MEMDIE))))
|
||||
alloc_flags |= ALLOC_NO_WATERMARKS;
|
||||
}
|
||||
#ifdef CONFIG_CMA
|
||||
if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
|
||||
alloc_flags |= ALLOC_CMA;
|
||||
@ -3440,12 +3339,19 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
|
||||
|
||||
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
|
||||
{
|
||||
return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
|
||||
}
|
||||
if (unlikely(gfp_mask & __GFP_NOMEMALLOC))
|
||||
return false;
|
||||
|
||||
static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
|
||||
{
|
||||
return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
|
||||
if (gfp_mask & __GFP_MEMALLOC)
|
||||
return true;
|
||||
if (in_serving_softirq() && (current->flags & PF_MEMALLOC))
|
||||
return true;
|
||||
if (!in_interrupt() &&
|
||||
((current->flags & PF_MEMALLOC) ||
|
||||
unlikely(test_thread_flag(TIF_MEMDIE))))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3481,10 +3387,10 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Keep reclaiming pages while there is a chance this will lead somewhere.
|
||||
* If none of the target zones can satisfy our allocation request even
|
||||
* if all reclaimable pages are considered then we are screwed and have
|
||||
* to go OOM.
|
||||
* Keep reclaiming pages while there is a chance this will lead
|
||||
* somewhere. If none of the target zones can satisfy our allocation
|
||||
* request even if all reclaimable pages are considered then we are
|
||||
* screwed and have to go OOM.
|
||||
*/
|
||||
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
|
||||
ac->nodemask) {
|
||||
@ -3509,14 +3415,12 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
||||
* prevent from pre mature OOM
|
||||
*/
|
||||
if (!did_some_progress) {
|
||||
unsigned long writeback;
|
||||
unsigned long dirty;
|
||||
unsigned long write_pending;
|
||||
|
||||
writeback = zone_page_state_snapshot(zone,
|
||||
NR_WRITEBACK);
|
||||
dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY);
|
||||
write_pending = zone_page_state_snapshot(zone,
|
||||
NR_ZONE_WRITE_PENDING);
|
||||
|
||||
if (2*(writeback + dirty) > reclaimable) {
|
||||
if (2 * write_pending > reclaimable) {
|
||||
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
||||
return true;
|
||||
}
|
||||
@ -3551,7 +3455,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
struct page *page = NULL;
|
||||
unsigned int alloc_flags;
|
||||
unsigned long did_some_progress;
|
||||
enum migrate_mode migration_mode = MIGRATE_ASYNC;
|
||||
enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
|
||||
enum compact_result compact_result;
|
||||
int compaction_retries = 0;
|
||||
int no_progress_loops = 0;
|
||||
@ -3575,42 +3479,88 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
|
||||
gfp_mask &= ~__GFP_ATOMIC;
|
||||
|
||||
retry:
|
||||
/*
|
||||
* The fast path uses conservative alloc_flags to succeed only until
|
||||
* kswapd needs to be woken up, and to avoid the cost of setting up
|
||||
* alloc_flags precisely. So we do that now.
|
||||
*/
|
||||
alloc_flags = gfp_to_alloc_flags(gfp_mask);
|
||||
|
||||
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
|
||||
wake_all_kswapds(order, ac);
|
||||
|
||||
/*
|
||||
* OK, we're below the kswapd watermark and have kicked background
|
||||
* reclaim. Now things get more complex, so set up alloc_flags according
|
||||
* to how we want to proceed.
|
||||
* The adjusted alloc_flags might result in immediate success, so try
|
||||
* that first
|
||||
*/
|
||||
alloc_flags = gfp_to_alloc_flags(gfp_mask);
|
||||
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
/*
|
||||
* For costly allocations, try direct compaction first, as it's likely
|
||||
* that we have enough base pages and don't need to reclaim. Don't try
|
||||
* that for allocations that are allowed to ignore watermarks, as the
|
||||
* ALLOC_NO_WATERMARKS attempt didn't yet happen.
|
||||
*/
|
||||
if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER &&
|
||||
!gfp_pfmemalloc_allowed(gfp_mask)) {
|
||||
page = __alloc_pages_direct_compact(gfp_mask, order,
|
||||
alloc_flags, ac,
|
||||
INIT_COMPACT_PRIORITY,
|
||||
&compact_result);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
/*
|
||||
* Checks for costly allocations with __GFP_NORETRY, which
|
||||
* includes THP page fault allocations
|
||||
*/
|
||||
if (gfp_mask & __GFP_NORETRY) {
|
||||
/*
|
||||
* If compaction is deferred for high-order allocations,
|
||||
* it is because sync compaction recently failed. If
|
||||
* this is the case and the caller requested a THP
|
||||
* allocation, we do not want to heavily disrupt the
|
||||
* system, so we fail the allocation instead of entering
|
||||
* direct reclaim.
|
||||
*/
|
||||
if (compact_result == COMPACT_DEFERRED)
|
||||
goto nopage;
|
||||
|
||||
/*
|
||||
* Looks like reclaim/compaction is worth trying, but
|
||||
* sync compaction could be very expensive, so keep
|
||||
* using async compaction.
|
||||
*/
|
||||
compact_priority = INIT_COMPACT_PRIORITY;
|
||||
}
|
||||
}
|
||||
|
||||
retry:
|
||||
/* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
|
||||
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
|
||||
wake_all_kswapds(order, ac);
|
||||
|
||||
if (gfp_pfmemalloc_allowed(gfp_mask))
|
||||
alloc_flags = ALLOC_NO_WATERMARKS;
|
||||
|
||||
/*
|
||||
* Reset the zonelist iterators if memory policies can be ignored.
|
||||
* These allocations are high priority and system rather than user
|
||||
* orientated.
|
||||
*/
|
||||
if ((alloc_flags & ALLOC_NO_WATERMARKS) || !(alloc_flags & ALLOC_CPUSET)) {
|
||||
if (!(alloc_flags & ALLOC_CPUSET) || (alloc_flags & ALLOC_NO_WATERMARKS)) {
|
||||
ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
|
||||
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
|
||||
ac->high_zoneidx, ac->nodemask);
|
||||
}
|
||||
|
||||
/* This is the last chance, in general, before the goto nopage. */
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
|
||||
/* Attempt with potentially adjusted zonelist and alloc_flags */
|
||||
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
/* Allocate without watermarks if the context allows */
|
||||
if (alloc_flags & ALLOC_NO_WATERMARKS) {
|
||||
page = get_page_from_freelist(gfp_mask, order,
|
||||
ALLOC_NO_WATERMARKS, ac);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
}
|
||||
|
||||
/* Caller is not willing to reclaim, we can't balance anything */
|
||||
if (!can_direct_reclaim) {
|
||||
/*
|
||||
@ -3640,38 +3590,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
|
||||
goto nopage;
|
||||
|
||||
/*
|
||||
* Try direct compaction. The first pass is asynchronous. Subsequent
|
||||
* attempts after direct reclaim are synchronous
|
||||
*/
|
||||
page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
|
||||
migration_mode,
|
||||
&compact_result);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
/* Checks for THP-specific high-order allocations */
|
||||
if (is_thp_gfp_mask(gfp_mask)) {
|
||||
/*
|
||||
* If compaction is deferred for high-order allocations, it is
|
||||
* because sync compaction recently failed. If this is the case
|
||||
* and the caller requested a THP allocation, we do not want
|
||||
* to heavily disrupt the system, so we fail the allocation
|
||||
* instead of entering direct reclaim.
|
||||
*/
|
||||
if (compact_result == COMPACT_DEFERRED)
|
||||
goto nopage;
|
||||
|
||||
/*
|
||||
* Compaction is contended so rather back off than cause
|
||||
* excessive stalls.
|
||||
*/
|
||||
if(compact_result == COMPACT_CONTENDED)
|
||||
goto nopage;
|
||||
}
|
||||
|
||||
if (order && compaction_made_progress(compact_result))
|
||||
compaction_retries++;
|
||||
|
||||
/* Try direct reclaim and then allocating */
|
||||
page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
|
||||
@ -3679,16 +3597,25 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
/* Try direct compaction and then allocating */
|
||||
page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
|
||||
compact_priority, &compact_result);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
|
||||
if (order && compaction_made_progress(compact_result))
|
||||
compaction_retries++;
|
||||
|
||||
/* Do not loop if specifically requested */
|
||||
if (gfp_mask & __GFP_NORETRY)
|
||||
goto noretry;
|
||||
goto nopage;
|
||||
|
||||
/*
|
||||
* Do not retry costly high order allocations unless they are
|
||||
* __GFP_REPEAT
|
||||
*/
|
||||
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
|
||||
goto noretry;
|
||||
goto nopage;
|
||||
|
||||
/*
|
||||
* Costly allocations might have made a progress but this doesn't mean
|
||||
@ -3712,7 +3639,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
*/
|
||||
if (did_some_progress > 0 &&
|
||||
should_compact_retry(ac, order, alloc_flags,
|
||||
compact_result, &migration_mode,
|
||||
compact_result, &compact_priority,
|
||||
compaction_retries))
|
||||
goto retry;
|
||||
|
||||
@ -3727,25 +3654,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
goto retry;
|
||||
}
|
||||
|
||||
noretry:
|
||||
/*
|
||||
* High-order allocations do not necessarily loop after direct reclaim
|
||||
* and reclaim/compaction depends on compaction being called after
|
||||
* reclaim so call directly if necessary.
|
||||
* It can become very expensive to allocate transparent hugepages at
|
||||
* fault, so use asynchronous memory compaction for THP unless it is
|
||||
* khugepaged trying to collapse. All other requests should tolerate
|
||||
* at least light sync migration.
|
||||
*/
|
||||
if (is_thp_gfp_mask(gfp_mask) && !(current->flags & PF_KTHREAD))
|
||||
migration_mode = MIGRATE_ASYNC;
|
||||
else
|
||||
migration_mode = MIGRATE_SYNC_LIGHT;
|
||||
page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags,
|
||||
ac, migration_mode,
|
||||
&compact_result);
|
||||
if (page)
|
||||
goto got_pg;
|
||||
nopage:
|
||||
warn_alloc_failed(gfp_mask, order, NULL);
|
||||
got_pg:
|
||||
@ -3761,7 +3669,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
||||
{
|
||||
struct page *page;
|
||||
unsigned int cpuset_mems_cookie;
|
||||
unsigned int alloc_flags = ALLOC_WMARK_LOW|ALLOC_FAIR;
|
||||
unsigned int alloc_flags = ALLOC_WMARK_LOW;
|
||||
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
|
||||
struct alloc_context ac = {
|
||||
.high_zoneidx = gfp_zone(gfp_mask),
|
||||
@ -4192,7 +4100,7 @@ EXPORT_SYMBOL_GPL(si_mem_available);
|
||||
void si_meminfo(struct sysinfo *val)
|
||||
{
|
||||
val->totalram = totalram_pages;
|
||||
val->sharedram = global_page_state(NR_SHMEM);
|
||||
val->sharedram = global_node_page_state(NR_SHMEM);
|
||||
val->freeram = global_page_state(NR_FREE_PAGES);
|
||||
val->bufferram = nr_blockdev_pages();
|
||||
val->totalhigh = totalhigh_pages;
|
||||
@ -4214,8 +4122,8 @@ void si_meminfo_node(struct sysinfo *val, int nid)
|
||||
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
|
||||
managed_pages += pgdat->node_zones[zone_type].managed_pages;
|
||||
val->totalram = managed_pages;
|
||||
val->sharedram = node_page_state(nid, NR_SHMEM);
|
||||
val->freeram = node_page_state(nid, NR_FREE_PAGES);
|
||||
val->sharedram = node_page_state(pgdat, NR_SHMEM);
|
||||
val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
|
||||
struct zone *zone = &pgdat->node_zones[zone_type];
|
||||
@ -4298,6 +4206,7 @@ void show_free_areas(unsigned int filter)
|
||||
unsigned long free_pcp = 0;
|
||||
int cpu;
|
||||
struct zone *zone;
|
||||
pg_data_t *pgdat;
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
if (skip_free_areas_node(filter, zone_to_nid(zone)))
|
||||
@ -4312,35 +4221,74 @@ void show_free_areas(unsigned int filter)
|
||||
" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
|
||||
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
|
||||
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
" anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
|
||||
#endif
|
||||
" free:%lu free_pcp:%lu free_cma:%lu\n",
|
||||
global_page_state(NR_ACTIVE_ANON),
|
||||
global_page_state(NR_INACTIVE_ANON),
|
||||
global_page_state(NR_ISOLATED_ANON),
|
||||
global_page_state(NR_ACTIVE_FILE),
|
||||
global_page_state(NR_INACTIVE_FILE),
|
||||
global_page_state(NR_ISOLATED_FILE),
|
||||
global_page_state(NR_UNEVICTABLE),
|
||||
global_page_state(NR_FILE_DIRTY),
|
||||
global_page_state(NR_WRITEBACK),
|
||||
global_page_state(NR_UNSTABLE_NFS),
|
||||
global_node_page_state(NR_ACTIVE_ANON),
|
||||
global_node_page_state(NR_INACTIVE_ANON),
|
||||
global_node_page_state(NR_ISOLATED_ANON),
|
||||
global_node_page_state(NR_ACTIVE_FILE),
|
||||
global_node_page_state(NR_INACTIVE_FILE),
|
||||
global_node_page_state(NR_ISOLATED_FILE),
|
||||
global_node_page_state(NR_UNEVICTABLE),
|
||||
global_node_page_state(NR_FILE_DIRTY),
|
||||
global_node_page_state(NR_WRITEBACK),
|
||||
global_node_page_state(NR_UNSTABLE_NFS),
|
||||
global_page_state(NR_SLAB_RECLAIMABLE),
|
||||
global_page_state(NR_SLAB_UNRECLAIMABLE),
|
||||
global_page_state(NR_FILE_MAPPED),
|
||||
global_page_state(NR_SHMEM),
|
||||
global_node_page_state(NR_FILE_MAPPED),
|
||||
global_node_page_state(NR_SHMEM),
|
||||
global_page_state(NR_PAGETABLE),
|
||||
global_page_state(NR_BOUNCE),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
global_page_state(NR_ANON_THPS) * HPAGE_PMD_NR,
|
||||
global_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR,
|
||||
global_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR,
|
||||
#endif
|
||||
global_page_state(NR_FREE_PAGES),
|
||||
free_pcp,
|
||||
global_page_state(NR_FREE_CMA_PAGES));
|
||||
|
||||
for_each_online_pgdat(pgdat) {
|
||||
printk("Node %d"
|
||||
" active_anon:%lukB"
|
||||
" inactive_anon:%lukB"
|
||||
" active_file:%lukB"
|
||||
" inactive_file:%lukB"
|
||||
" unevictable:%lukB"
|
||||
" isolated(anon):%lukB"
|
||||
" isolated(file):%lukB"
|
||||
" mapped:%lukB"
|
||||
" dirty:%lukB"
|
||||
" writeback:%lukB"
|
||||
" shmem:%lukB"
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
" shmem_thp: %lukB"
|
||||
" shmem_pmdmapped: %lukB"
|
||||
" anon_thp: %lukB"
|
||||
#endif
|
||||
" writeback_tmp:%lukB"
|
||||
" unstable:%lukB"
|
||||
" pages_scanned:%lu"
|
||||
" all_unreclaimable? %s"
|
||||
"\n",
|
||||
pgdat->node_id,
|
||||
K(node_page_state(pgdat, NR_ACTIVE_ANON)),
|
||||
K(node_page_state(pgdat, NR_INACTIVE_ANON)),
|
||||
K(node_page_state(pgdat, NR_ACTIVE_FILE)),
|
||||
K(node_page_state(pgdat, NR_INACTIVE_FILE)),
|
||||
K(node_page_state(pgdat, NR_UNEVICTABLE)),
|
||||
K(node_page_state(pgdat, NR_ISOLATED_ANON)),
|
||||
K(node_page_state(pgdat, NR_ISOLATED_FILE)),
|
||||
K(node_page_state(pgdat, NR_FILE_MAPPED)),
|
||||
K(node_page_state(pgdat, NR_FILE_DIRTY)),
|
||||
K(node_page_state(pgdat, NR_WRITEBACK)),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
|
||||
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
|
||||
* HPAGE_PMD_NR),
|
||||
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
|
||||
#endif
|
||||
K(node_page_state(pgdat, NR_SHMEM)),
|
||||
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
|
||||
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
|
||||
node_page_state(pgdat, NR_PAGES_SCANNED),
|
||||
!pgdat_reclaimable(pgdat) ? "yes" : "no");
|
||||
}
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
int i;
|
||||
|
||||
@ -4362,72 +4310,41 @@ void show_free_areas(unsigned int filter)
|
||||
" active_file:%lukB"
|
||||
" inactive_file:%lukB"
|
||||
" unevictable:%lukB"
|
||||
" isolated(anon):%lukB"
|
||||
" isolated(file):%lukB"
|
||||
" writepending:%lukB"
|
||||
" present:%lukB"
|
||||
" managed:%lukB"
|
||||
" mlocked:%lukB"
|
||||
" dirty:%lukB"
|
||||
" writeback:%lukB"
|
||||
" mapped:%lukB"
|
||||
" shmem:%lukB"
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
" shmem_thp: %lukB"
|
||||
" shmem_pmdmapped: %lukB"
|
||||
" anon_thp: %lukB"
|
||||
#endif
|
||||
" slab_reclaimable:%lukB"
|
||||
" slab_unreclaimable:%lukB"
|
||||
" kernel_stack:%lukB"
|
||||
" pagetables:%lukB"
|
||||
" unstable:%lukB"
|
||||
" bounce:%lukB"
|
||||
" free_pcp:%lukB"
|
||||
" local_pcp:%ukB"
|
||||
" free_cma:%lukB"
|
||||
" writeback_tmp:%lukB"
|
||||
" pages_scanned:%lu"
|
||||
" all_unreclaimable? %s"
|
||||
"\n",
|
||||
zone->name,
|
||||
K(zone_page_state(zone, NR_FREE_PAGES)),
|
||||
K(min_wmark_pages(zone)),
|
||||
K(low_wmark_pages(zone)),
|
||||
K(high_wmark_pages(zone)),
|
||||
K(zone_page_state(zone, NR_ACTIVE_ANON)),
|
||||
K(zone_page_state(zone, NR_INACTIVE_ANON)),
|
||||
K(zone_page_state(zone, NR_ACTIVE_FILE)),
|
||||
K(zone_page_state(zone, NR_INACTIVE_FILE)),
|
||||
K(zone_page_state(zone, NR_UNEVICTABLE)),
|
||||
K(zone_page_state(zone, NR_ISOLATED_ANON)),
|
||||
K(zone_page_state(zone, NR_ISOLATED_FILE)),
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
|
||||
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
|
||||
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
|
||||
K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
|
||||
K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
|
||||
K(zone->present_pages),
|
||||
K(zone->managed_pages),
|
||||
K(zone_page_state(zone, NR_MLOCK)),
|
||||
K(zone_page_state(zone, NR_FILE_DIRTY)),
|
||||
K(zone_page_state(zone, NR_WRITEBACK)),
|
||||
K(zone_page_state(zone, NR_FILE_MAPPED)),
|
||||
K(zone_page_state(zone, NR_SHMEM)),
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
K(zone_page_state(zone, NR_SHMEM_THPS) * HPAGE_PMD_NR),
|
||||
K(zone_page_state(zone, NR_SHMEM_PMDMAPPED)
|
||||
* HPAGE_PMD_NR),
|
||||
K(zone_page_state(zone, NR_ANON_THPS) * HPAGE_PMD_NR),
|
||||
#endif
|
||||
K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
|
||||
K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
|
||||
zone_page_state(zone, NR_KERNEL_STACK) *
|
||||
THREAD_SIZE / 1024,
|
||||
zone_page_state(zone, NR_KERNEL_STACK_KB),
|
||||
K(zone_page_state(zone, NR_PAGETABLE)),
|
||||
K(zone_page_state(zone, NR_UNSTABLE_NFS)),
|
||||
K(zone_page_state(zone, NR_BOUNCE)),
|
||||
K(free_pcp),
|
||||
K(this_cpu_read(zone->pageset->pcp.count)),
|
||||
K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
|
||||
K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
|
||||
K(zone_page_state(zone, NR_PAGES_SCANNED)),
|
||||
(!zone_reclaimable(zone) ? "yes" : "no")
|
||||
);
|
||||
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
|
||||
printk("lowmem_reserve[]:");
|
||||
for (i = 0; i < MAX_NR_ZONES; i++)
|
||||
printk(" %ld", zone->lowmem_reserve[i]);
|
||||
@ -4469,7 +4386,7 @@ void show_free_areas(unsigned int filter)
|
||||
|
||||
hugetlb_show_meminfo();
|
||||
|
||||
printk("%ld total pagecache pages\n", global_page_state(NR_FILE_PAGES));
|
||||
printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
|
||||
|
||||
show_swap_cache_info();
|
||||
}
|
||||
@ -5340,6 +5257,11 @@ static void __meminit setup_zone_pageset(struct zone *zone)
|
||||
zone->pageset = alloc_percpu(struct per_cpu_pageset);
|
||||
for_each_possible_cpu(cpu)
|
||||
zone_pageset_init(zone, cpu);
|
||||
|
||||
if (!zone->zone_pgdat->per_cpu_nodestats) {
|
||||
zone->zone_pgdat->per_cpu_nodestats =
|
||||
alloc_percpu(struct per_cpu_nodestat);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5909,6 +5831,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
|
||||
init_waitqueue_head(&pgdat->kcompactd_wait);
|
||||
#endif
|
||||
pgdat_page_ext_init(pgdat);
|
||||
spin_lock_init(&pgdat->lru_lock);
|
||||
lruvec_init(node_lruvec(pgdat));
|
||||
|
||||
for (j = 0; j < MAX_NR_ZONES; j++) {
|
||||
struct zone *zone = pgdat->node_zones + j;
|
||||
@ -5958,21 +5882,16 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
|
||||
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
|
||||
#ifdef CONFIG_NUMA
|
||||
zone->node = nid;
|
||||
zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
|
||||
pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
|
||||
/ 100;
|
||||
zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
|
||||
pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
|
||||
#endif
|
||||
zone->name = zone_names[j];
|
||||
spin_lock_init(&zone->lock);
|
||||
spin_lock_init(&zone->lru_lock);
|
||||
zone_seqlock_init(zone);
|
||||
zone->zone_pgdat = pgdat;
|
||||
spin_lock_init(&zone->lock);
|
||||
zone_seqlock_init(zone);
|
||||
zone_pcp_init(zone);
|
||||
|
||||
/* For bootup, initialized properly in watermark setup */
|
||||
mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
|
||||
|
||||
lruvec_init(&zone->lruvec);
|
||||
if (!size)
|
||||
continue;
|
||||
|
||||
@ -6038,11 +5957,12 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
|
||||
unsigned long end_pfn = 0;
|
||||
|
||||
/* pg_data_t should be reset to zero when it's allocated */
|
||||
WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
|
||||
WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
|
||||
|
||||
reset_deferred_meminit(pgdat);
|
||||
pgdat->node_id = nid;
|
||||
pgdat->node_start_pfn = node_start_pfn;
|
||||
pgdat->per_cpu_nodestats = NULL;
|
||||
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
|
||||
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
|
||||
pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
|
||||
@ -6699,6 +6619,9 @@ static void calculate_totalreserve_pages(void)
|
||||
enum zone_type i, j;
|
||||
|
||||
for_each_online_pgdat(pgdat) {
|
||||
|
||||
pgdat->totalreserve_pages = 0;
|
||||
|
||||
for (i = 0; i < MAX_NR_ZONES; i++) {
|
||||
struct zone *zone = pgdat->node_zones + i;
|
||||
long max = 0;
|
||||
@ -6715,7 +6638,7 @@ static void calculate_totalreserve_pages(void)
|
||||
if (max > zone->managed_pages)
|
||||
max = zone->managed_pages;
|
||||
|
||||
zone->totalreserve_pages = max;
|
||||
pgdat->totalreserve_pages += max;
|
||||
|
||||
reserve_pages += max;
|
||||
}
|
||||
@ -6816,10 +6739,6 @@ static void __setup_per_zone_wmarks(void)
|
||||
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
|
||||
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
|
||||
|
||||
__mod_zone_page_state(zone, NR_ALLOC_BATCH,
|
||||
high_wmark_pages(zone) - low_wmark_pages(zone) -
|
||||
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
|
||||
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
}
|
||||
|
||||
@ -6930,6 +6849,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
|
||||
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *length, loff_t *ppos)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
int rc;
|
||||
|
||||
@ -6937,8 +6857,11 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for_each_online_pgdat(pgdat)
|
||||
pgdat->min_slab_pages = 0;
|
||||
|
||||
for_each_zone(zone)
|
||||
zone->min_unmapped_pages = (zone->managed_pages *
|
||||
zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
|
||||
sysctl_min_unmapped_ratio) / 100;
|
||||
return 0;
|
||||
}
|
||||
@ -6946,6 +6869,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
|
||||
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *length, loff_t *ppos)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
int rc;
|
||||
|
||||
@ -6953,8 +6877,11 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for_each_online_pgdat(pgdat)
|
||||
pgdat->min_slab_pages = 0;
|
||||
|
||||
for_each_zone(zone)
|
||||
zone->min_slab_pages = (zone->managed_pages *
|
||||
zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
|
||||
sysctl_min_slab_ratio) / 100;
|
||||
return 0;
|
||||
}
|
||||
|
@ -41,12 +41,12 @@ static struct page *page_idle_get_page(unsigned long pfn)
|
||||
return NULL;
|
||||
|
||||
zone = page_zone(page);
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
if (unlikely(!PageLRU(page))) {
|
||||
put_page(page);
|
||||
page = NULL;
|
||||
}
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@ -166,6 +166,8 @@ int generic_swapfile_activate(struct swap_info_struct *sis,
|
||||
unsigned block_in_page;
|
||||
sector_t first_block;
|
||||
|
||||
cond_resched();
|
||||
|
||||
first_block = bmap(inode, probe_block);
|
||||
if (first_block == 0)
|
||||
goto bad_bmap;
|
||||
|
26
mm/rmap.c
26
mm/rmap.c
@ -27,7 +27,7 @@
|
||||
* mapping->i_mmap_rwsem
|
||||
* anon_vma->rwsem
|
||||
* mm->page_table_lock or pte_lock
|
||||
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||
* zone_lru_lock (in mark_page_accessed, isolate_lru_page)
|
||||
* swap_lock (in swap_duplicate, swap_info_get)
|
||||
* mmlist_lock (in mmput, drain_mmlist and others)
|
||||
* mapping->private_lock (in __set_page_dirty_buffers)
|
||||
@ -1213,8 +1213,8 @@ void do_page_add_anon_rmap(struct page *page,
|
||||
* disabled.
|
||||
*/
|
||||
if (compound)
|
||||
__inc_zone_page_state(page, NR_ANON_THPS);
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
|
||||
__inc_node_page_state(page, NR_ANON_THPS);
|
||||
__mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
|
||||
}
|
||||
if (unlikely(PageKsm(page)))
|
||||
return;
|
||||
@ -1251,14 +1251,14 @@ void page_add_new_anon_rmap(struct page *page,
|
||||
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
|
||||
/* increment count (starts at -1) */
|
||||
atomic_set(compound_mapcount_ptr(page), 0);
|
||||
__inc_zone_page_state(page, NR_ANON_THPS);
|
||||
__inc_node_page_state(page, NR_ANON_THPS);
|
||||
} else {
|
||||
/* Anon THP always mapped first with PMD */
|
||||
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
||||
/* increment count (starts at -1) */
|
||||
atomic_set(&page->_mapcount, 0);
|
||||
}
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
|
||||
__page_set_anon_rmap(page, vma, address, 1);
|
||||
}
|
||||
|
||||
@ -1282,7 +1282,7 @@ void page_add_file_rmap(struct page *page, bool compound)
|
||||
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__inc_zone_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
} else {
|
||||
if (PageTransCompound(page)) {
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
@ -1293,7 +1293,7 @@ void page_add_file_rmap(struct page *page, bool compound)
|
||||
if (!atomic_inc_and_test(&page->_mapcount))
|
||||
goto out;
|
||||
}
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
out:
|
||||
unlock_page_memcg(page);
|
||||
@ -1322,18 +1322,18 @@ static void page_remove_file_rmap(struct page *page, bool compound)
|
||||
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
||||
goto out;
|
||||
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
|
||||
__dec_zone_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
|
||||
} else {
|
||||
if (!atomic_add_negative(-1, &page->_mapcount))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
|
||||
* We use the irq-unsafe __{inc|mod}_zone_page_state because
|
||||
* these counters are not modified in interrupt context, and
|
||||
* pte lock(a spinlock) is held, which implies preemption disabled.
|
||||
*/
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, -nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
|
||||
if (unlikely(PageMlocked(page)))
|
||||
@ -1356,7 +1356,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
|
||||
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
|
||||
return;
|
||||
|
||||
__dec_zone_page_state(page, NR_ANON_THPS);
|
||||
__dec_node_page_state(page, NR_ANON_THPS);
|
||||
|
||||
if (TestClearPageDoubleMap(page)) {
|
||||
/*
|
||||
@ -1375,7 +1375,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
|
||||
clear_page_mlock(page);
|
||||
|
||||
if (nr) {
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES, -nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
|
||||
deferred_split_huge_page(page);
|
||||
}
|
||||
}
|
||||
@ -1404,7 +1404,7 @@ void page_remove_rmap(struct page *page, bool compound)
|
||||
* these counters are not modified in interrupt context, and
|
||||
* pte lock(a spinlock) is held, which implies preemption disabled.
|
||||
*/
|
||||
__dec_zone_page_state(page, NR_ANON_PAGES);
|
||||
__dec_node_page_state(page, NR_ANON_MAPPED);
|
||||
|
||||
if (unlikely(PageMlocked(page)))
|
||||
clear_page_mlock(page);
|
||||
|
14
mm/shmem.c
14
mm/shmem.c
@ -575,9 +575,9 @@ static int shmem_add_to_page_cache(struct page *page,
|
||||
if (!error) {
|
||||
mapping->nrpages += nr;
|
||||
if (PageTransHuge(page))
|
||||
__inc_zone_page_state(page, NR_SHMEM_THPS);
|
||||
__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
|
||||
__mod_zone_page_state(page_zone(page), NR_SHMEM, nr);
|
||||
__inc_node_page_state(page, NR_SHMEM_THPS);
|
||||
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
|
||||
__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
} else {
|
||||
page->mapping = NULL;
|
||||
@ -601,8 +601,8 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
|
||||
error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
|
||||
page->mapping = NULL;
|
||||
mapping->nrpages--;
|
||||
__dec_zone_page_state(page, NR_FILE_PAGES);
|
||||
__dec_zone_page_state(page, NR_SHMEM);
|
||||
__dec_node_page_state(page, NR_FILE_PAGES);
|
||||
__dec_node_page_state(page, NR_SHMEM);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
put_page(page);
|
||||
BUG_ON(error);
|
||||
@ -1493,8 +1493,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
||||
error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
|
||||
newpage);
|
||||
if (!error) {
|
||||
__inc_zone_page_state(newpage, NR_FILE_PAGES);
|
||||
__dec_zone_page_state(oldpage, NR_FILE_PAGES);
|
||||
__inc_node_page_state(newpage, NR_FILE_PAGES);
|
||||
__dec_node_page_state(oldpage, NR_FILE_PAGES);
|
||||
}
|
||||
spin_unlock_irq(&swap_mapping->tree_lock);
|
||||
|
||||
|
@ -369,6 +369,8 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
|
||||
return s->object_size;
|
||||
# endif
|
||||
if (s->flags & SLAB_KASAN)
|
||||
return s->object_size;
|
||||
/*
|
||||
* If we have the need to store the freelist pointer
|
||||
* back there or track user information then we can
|
||||
|
59
mm/slub.c
59
mm/slub.c
@ -124,7 +124,7 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void *fixup_red_left(struct kmem_cache *s, void *p)
|
||||
inline void *fixup_red_left(struct kmem_cache *s, void *p)
|
||||
{
|
||||
if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
|
||||
p += s->red_left_pad;
|
||||
@ -454,8 +454,6 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p)
|
||||
*/
|
||||
#if defined(CONFIG_SLUB_DEBUG_ON)
|
||||
static int slub_debug = DEBUG_DEFAULT_FLAGS;
|
||||
#elif defined(CONFIG_KASAN)
|
||||
static int slub_debug = SLAB_STORE_USER;
|
||||
#else
|
||||
static int slub_debug;
|
||||
#endif
|
||||
@ -660,6 +658,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
||||
if (s->flags & SLAB_STORE_USER)
|
||||
off += 2 * sizeof(struct track);
|
||||
|
||||
off += kasan_metadata_size(s);
|
||||
|
||||
if (off != size_from_object(s))
|
||||
/* Beginning of the filler is the free pointer */
|
||||
print_section("Padding ", p + off, size_from_object(s) - off);
|
||||
@ -787,6 +787,8 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
|
||||
/* We also have user information there */
|
||||
off += 2 * sizeof(struct track);
|
||||
|
||||
off += kasan_metadata_size(s);
|
||||
|
||||
if (size_from_object(s) == off)
|
||||
return 1;
|
||||
|
||||
@ -1322,8 +1324,10 @@ static inline void kfree_hook(const void *x)
|
||||
kasan_kfree_large(x);
|
||||
}
|
||||
|
||||
static inline void slab_free_hook(struct kmem_cache *s, void *x)
|
||||
static inline void *slab_free_hook(struct kmem_cache *s, void *x)
|
||||
{
|
||||
void *freeptr;
|
||||
|
||||
kmemleak_free_recursive(x, s->flags);
|
||||
|
||||
/*
|
||||
@ -1344,7 +1348,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
|
||||
if (!(s->flags & SLAB_DEBUG_OBJECTS))
|
||||
debug_check_no_obj_freed(x, s->object_size);
|
||||
|
||||
freeptr = get_freepointer(s, x);
|
||||
/*
|
||||
* kasan_slab_free() may put x into memory quarantine, delaying its
|
||||
* reuse. In this case the object's freelist pointer is changed.
|
||||
*/
|
||||
kasan_slab_free(s, x);
|
||||
return freeptr;
|
||||
}
|
||||
|
||||
static inline void slab_free_freelist_hook(struct kmem_cache *s,
|
||||
@ -1362,11 +1372,11 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
|
||||
|
||||
void *object = head;
|
||||
void *tail_obj = tail ? : head;
|
||||
void *freeptr;
|
||||
|
||||
do {
|
||||
slab_free_hook(s, object);
|
||||
} while ((object != tail_obj) &&
|
||||
(object = get_freepointer(s, object)));
|
||||
freeptr = slab_free_hook(s, object);
|
||||
} while ((object != tail_obj) && (object = freeptr));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2878,16 +2888,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
|
||||
* same page) possible by specifying head and tail ptr, plus objects
|
||||
* count (cnt). Bulk free indicated by tail pointer being set.
|
||||
*/
|
||||
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
|
||||
void *head, void *tail, int cnt,
|
||||
unsigned long addr)
|
||||
static __always_inline void do_slab_free(struct kmem_cache *s,
|
||||
struct page *page, void *head, void *tail,
|
||||
int cnt, unsigned long addr)
|
||||
{
|
||||
void *tail_obj = tail ? : head;
|
||||
struct kmem_cache_cpu *c;
|
||||
unsigned long tid;
|
||||
|
||||
slab_free_freelist_hook(s, head, tail);
|
||||
|
||||
redo:
|
||||
/*
|
||||
* Determine the currently cpus per cpu slab.
|
||||
@ -2921,6 +2928,27 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
|
||||
|
||||
}
|
||||
|
||||
static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
|
||||
void *head, void *tail, int cnt,
|
||||
unsigned long addr)
|
||||
{
|
||||
slab_free_freelist_hook(s, head, tail);
|
||||
/*
|
||||
* slab_free_freelist_hook() could have put the items into quarantine.
|
||||
* If so, no need to free them.
|
||||
*/
|
||||
if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
|
||||
return;
|
||||
do_slab_free(s, page, head, tail, cnt, addr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
|
||||
{
|
||||
do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
void kmem_cache_free(struct kmem_cache *s, void *x)
|
||||
{
|
||||
s = cache_from_obj(s, x);
|
||||
@ -3363,7 +3391,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
|
||||
static int calculate_sizes(struct kmem_cache *s, int forced_order)
|
||||
{
|
||||
unsigned long flags = s->flags;
|
||||
unsigned long size = s->object_size;
|
||||
size_t size = s->object_size;
|
||||
int order;
|
||||
|
||||
/*
|
||||
@ -3422,7 +3450,10 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
|
||||
* the object.
|
||||
*/
|
||||
size += 2 * sizeof(struct track);
|
||||
#endif
|
||||
|
||||
kasan_cache_create(s, &size, &s->flags);
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
if (flags & SLAB_RED_ZONE) {
|
||||
/*
|
||||
* Add some empty padding so that we can catch
|
||||
|
12
mm/sparse.c
12
mm/sparse.c
@ -100,11 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Although written for the SPARSEMEM_EXTREME case, this happens
|
||||
* to also work for the flat array case because
|
||||
* NR_SECTION_ROOTS==NR_MEM_SECTIONS.
|
||||
*/
|
||||
#ifdef CONFIG_SPARSEMEM_EXTREME
|
||||
int __section_nr(struct mem_section* ms)
|
||||
{
|
||||
unsigned long root_nr;
|
||||
@ -123,6 +119,12 @@ int __section_nr(struct mem_section* ms)
|
||||
|
||||
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
|
||||
}
|
||||
#else
|
||||
int __section_nr(struct mem_section* ms)
|
||||
{
|
||||
return (int)(ms - mem_section[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* During early boot, before section_mem_map is used for an actual
|
||||
|
76
mm/swap.c
76
mm/swap.c
@ -62,12 +62,12 @@ static void __page_cache_release(struct page *page)
|
||||
struct lruvec *lruvec;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
spin_lock_irqsave(zone_lru_lock(zone), flags);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
|
||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||
__ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
}
|
||||
mem_cgroup_uncharge(page);
|
||||
}
|
||||
@ -179,26 +179,26 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
|
||||
void *arg)
|
||||
{
|
||||
int i;
|
||||
struct zone *zone = NULL;
|
||||
struct pglist_data *pgdat = NULL;
|
||||
struct lruvec *lruvec;
|
||||
unsigned long flags = 0;
|
||||
|
||||
for (i = 0; i < pagevec_count(pvec); i++) {
|
||||
struct page *page = pvec->pages[i];
|
||||
struct zone *pagezone = page_zone(page);
|
||||
struct pglist_data *pagepgdat = page_pgdat(page);
|
||||
|
||||
if (pagezone != zone) {
|
||||
if (zone)
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
zone = pagezone;
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
if (pagepgdat != pgdat) {
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
pgdat = pagepgdat;
|
||||
spin_lock_irqsave(&pgdat->lru_lock, flags);
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
(*move_fn)(page, lruvec, arg);
|
||||
}
|
||||
if (zone)
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
if (pgdat)
|
||||
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
|
||||
release_pages(pvec->pages, pvec->nr, pvec->cold);
|
||||
pagevec_reinit(pvec);
|
||||
}
|
||||
@ -318,9 +318,9 @@ void activate_page(struct page *page)
|
||||
struct zone *zone = page_zone(page);
|
||||
|
||||
page = compound_head(page);
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_lock_irq(zone_lru_lock(zone));
|
||||
__activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
|
||||
spin_unlock_irq(zone_lru_lock(zone));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -445,16 +445,16 @@ void lru_cache_add(struct page *page)
|
||||
*/
|
||||
void add_page_to_unevictable_list(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
struct pglist_data *pgdat = page_pgdat(page);
|
||||
struct lruvec *lruvec;
|
||||
|
||||
spin_lock_irq(&zone->lru_lock);
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
spin_lock_irq(&pgdat->lru_lock);
|
||||
lruvec = mem_cgroup_page_lruvec(page, pgdat);
|
||||
ClearPageActive(page);
|
||||
SetPageUnevictable(page);
|
||||
SetPageLRU(page);
|
||||
add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
|
||||
spin_unlock_irq(&zone->lru_lock);
|
||||
spin_unlock_irq(&pgdat->lru_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -730,7 +730,7 @@ void release_pages(struct page **pages, int nr, bool cold)
|
||||
{
|
||||
int i;
|
||||
LIST_HEAD(pages_to_free);
|
||||
struct zone *zone = NULL;
|
||||
struct pglist_data *locked_pgdat = NULL;
|
||||
struct lruvec *lruvec;
|
||||
unsigned long uninitialized_var(flags);
|
||||
unsigned int uninitialized_var(lock_batch);
|
||||
@ -741,11 +741,11 @@ void release_pages(struct page **pages, int nr, bool cold)
|
||||
/*
|
||||
* Make sure the IRQ-safe lock-holding time does not get
|
||||
* excessive with a continuous string of pages from the
|
||||
* same zone. The lock is held only if zone != NULL.
|
||||
* same pgdat. The lock is held only if pgdat != NULL.
|
||||
*/
|
||||
if (zone && ++lock_batch == SWAP_CLUSTER_MAX) {
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
zone = NULL;
|
||||
if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
locked_pgdat = NULL;
|
||||
}
|
||||
|
||||
if (is_huge_zero_page(page)) {
|
||||
@ -758,27 +758,27 @@ void release_pages(struct page **pages, int nr, bool cold)
|
||||
continue;
|
||||
|
||||
if (PageCompound(page)) {
|
||||
if (zone) {
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
zone = NULL;
|
||||
if (locked_pgdat) {
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
locked_pgdat = NULL;
|
||||
}
|
||||
__put_compound_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PageLRU(page)) {
|
||||
struct zone *pagezone = page_zone(page);
|
||||
struct pglist_data *pgdat = page_pgdat(page);
|
||||
|
||||
if (pagezone != zone) {
|
||||
if (zone)
|
||||
spin_unlock_irqrestore(&zone->lru_lock,
|
||||
if (pgdat != locked_pgdat) {
|
||||
if (locked_pgdat)
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock,
|
||||
flags);
|
||||
lock_batch = 0;
|
||||
zone = pagezone;
|
||||
spin_lock_irqsave(&zone->lru_lock, flags);
|
||||
locked_pgdat = pgdat;
|
||||
spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
|
||||
}
|
||||
|
||||
lruvec = mem_cgroup_page_lruvec(page, zone);
|
||||
lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
|
||||
VM_BUG_ON_PAGE(!PageLRU(page), page);
|
||||
__ClearPageLRU(page);
|
||||
del_page_from_lru_list(page, lruvec, page_off_lru(page));
|
||||
@ -789,8 +789,8 @@ void release_pages(struct page **pages, int nr, bool cold)
|
||||
|
||||
list_add(&page->lru, &pages_to_free);
|
||||
}
|
||||
if (zone)
|
||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||
if (locked_pgdat)
|
||||
spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
|
||||
|
||||
mem_cgroup_uncharge_list(&pages_to_free);
|
||||
free_hot_cold_page_list(&pages_to_free, cold);
|
||||
@ -826,7 +826,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
|
||||
VM_BUG_ON_PAGE(PageCompound(page_tail), page);
|
||||
VM_BUG_ON_PAGE(PageLRU(page_tail), page);
|
||||
VM_BUG_ON(NR_CPUS != 1 &&
|
||||
!spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
|
||||
!spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock));
|
||||
|
||||
if (!list)
|
||||
SetPageLRU(page_tail);
|
||||
|
@ -95,7 +95,7 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
|
||||
entry.val, page);
|
||||
if (likely(!error)) {
|
||||
address_space->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
__inc_node_page_state(page, NR_FILE_PAGES);
|
||||
INC_CACHE_INFO(add_total);
|
||||
}
|
||||
spin_unlock_irq(&address_space->tree_lock);
|
||||
@ -147,7 +147,7 @@ void __delete_from_swap_cache(struct page *page)
|
||||
set_page_private(page, 0);
|
||||
ClearPageSwapCache(page);
|
||||
address_space->nrpages--;
|
||||
__dec_zone_page_state(page, NR_FILE_PAGES);
|
||||
__dec_node_page_state(page, NR_FILE_PAGES);
|
||||
INC_CACHE_INFO(del_total);
|
||||
}
|
||||
|
||||
|
@ -528,7 +528,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
||||
|
||||
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
|
||||
free = global_page_state(NR_FREE_PAGES);
|
||||
free += global_page_state(NR_FILE_PAGES);
|
||||
free += global_node_page_state(NR_FILE_PAGES);
|
||||
|
||||
/*
|
||||
* shmem pages shouldn't be counted as free in this
|
||||
@ -536,7 +536,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
|
||||
* that won't affect the overall amount of available
|
||||
* memory in the system.
|
||||
*/
|
||||
free -= global_page_state(NR_SHMEM);
|
||||
free -= global_node_page_state(NR_SHMEM);
|
||||
|
||||
free += get_nr_swap_pages();
|
||||
|
||||
|
1037
mm/vmscan.c
1037
mm/vmscan.c
File diff suppressed because it is too large
Load Diff
417
mm/vmstat.c
417
mm/vmstat.c
@ -86,8 +86,10 @@ void vm_events_fold_cpu(int cpu)
|
||||
*
|
||||
* vm_stat contains the global counters
|
||||
*/
|
||||
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
|
||||
EXPORT_SYMBOL(vm_stat);
|
||||
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
|
||||
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
|
||||
EXPORT_SYMBOL(vm_zone_stat);
|
||||
EXPORT_SYMBOL(vm_node_stat);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
@ -167,19 +169,36 @@ int calculate_normal_threshold(struct zone *zone)
|
||||
*/
|
||||
void refresh_zone_stat_thresholds(void)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
int cpu;
|
||||
int threshold;
|
||||
|
||||
/* Zero current pgdat thresholds */
|
||||
for_each_online_pgdat(pgdat) {
|
||||
for_each_online_cpu(cpu) {
|
||||
per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
struct pglist_data *pgdat = zone->zone_pgdat;
|
||||
unsigned long max_drift, tolerate_drift;
|
||||
|
||||
threshold = calculate_normal_threshold(zone);
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_online_cpu(cpu) {
|
||||
int pgdat_threshold;
|
||||
|
||||
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
= threshold;
|
||||
|
||||
/* Base nodestat threshold on the largest populated zone. */
|
||||
pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
|
||||
per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
|
||||
= max(threshold, pgdat_threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only set percpu_drift_mark if there is a danger that
|
||||
* NR_FREE_PAGES reports the low watermark is ok when in fact
|
||||
@ -238,6 +257,26 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_zone_page_state);
|
||||
|
||||
void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
long delta)
|
||||
{
|
||||
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
long x;
|
||||
long t;
|
||||
|
||||
x = delta + __this_cpu_read(*p);
|
||||
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
|
||||
if (unlikely(x > t || x < -t)) {
|
||||
node_page_state_add(x, pgdat, item);
|
||||
x = 0;
|
||||
}
|
||||
__this_cpu_write(*p, x);
|
||||
}
|
||||
EXPORT_SYMBOL(__mod_node_page_state);
|
||||
|
||||
/*
|
||||
* Optimized increment and decrement functions.
|
||||
*
|
||||
@ -277,12 +316,34 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
}
|
||||
}
|
||||
|
||||
void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
v = __this_cpu_inc_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v > t)) {
|
||||
s8 overstep = t >> 1;
|
||||
|
||||
node_page_state_add(v + overstep, pgdat, item);
|
||||
__this_cpu_write(*p, -overstep);
|
||||
}
|
||||
}
|
||||
|
||||
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
{
|
||||
__inc_zone_state(page_zone(page), item);
|
||||
}
|
||||
EXPORT_SYMBOL(__inc_zone_page_state);
|
||||
|
||||
void __inc_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
__inc_node_state(page_pgdat(page), item);
|
||||
}
|
||||
EXPORT_SYMBOL(__inc_node_page_state);
|
||||
|
||||
void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
{
|
||||
struct per_cpu_pageset __percpu *pcp = zone->pageset;
|
||||
@ -299,12 +360,34 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
}
|
||||
}
|
||||
|
||||
void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
s8 v, t;
|
||||
|
||||
v = __this_cpu_dec_return(*p);
|
||||
t = __this_cpu_read(pcp->stat_threshold);
|
||||
if (unlikely(v < - t)) {
|
||||
s8 overstep = t >> 1;
|
||||
|
||||
node_page_state_add(v - overstep, pgdat, item);
|
||||
__this_cpu_write(*p, overstep);
|
||||
}
|
||||
}
|
||||
|
||||
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
{
|
||||
__dec_zone_state(page_zone(page), item);
|
||||
}
|
||||
EXPORT_SYMBOL(__dec_zone_page_state);
|
||||
|
||||
void __dec_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
__dec_node_state(page_pgdat(page), item);
|
||||
}
|
||||
EXPORT_SYMBOL(__dec_node_page_state);
|
||||
|
||||
#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
|
||||
/*
|
||||
* If we have cmpxchg_local support then we do not need to incur the overhead
|
||||
@ -318,8 +401,8 @@ EXPORT_SYMBOL(__dec_zone_page_state);
|
||||
* 1 Overstepping half of threshold
|
||||
* -1 Overstepping minus half of threshold
|
||||
*/
|
||||
static inline void mod_state(struct zone *zone, enum zone_stat_item item,
|
||||
long delta, int overstep_mode)
|
||||
static inline void mod_zone_state(struct zone *zone,
|
||||
enum zone_stat_item item, long delta, int overstep_mode)
|
||||
{
|
||||
struct per_cpu_pageset __percpu *pcp = zone->pageset;
|
||||
s8 __percpu *p = pcp->vm_stat_diff + item;
|
||||
@ -359,26 +442,83 @@ static inline void mod_state(struct zone *zone, enum zone_stat_item item,
|
||||
void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
long delta)
|
||||
{
|
||||
mod_state(zone, item, delta, 0);
|
||||
mod_zone_state(zone, item, delta, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_zone_page_state);
|
||||
|
||||
void inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
{
|
||||
mod_state(zone, item, 1, 1);
|
||||
}
|
||||
|
||||
void inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
{
|
||||
mod_state(page_zone(page), item, 1, 1);
|
||||
mod_zone_state(page_zone(page), item, 1, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(inc_zone_page_state);
|
||||
|
||||
void dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
{
|
||||
mod_state(page_zone(page), item, -1, -1);
|
||||
mod_zone_state(page_zone(page), item, -1, -1);
|
||||
}
|
||||
EXPORT_SYMBOL(dec_zone_page_state);
|
||||
|
||||
static inline void mod_node_state(struct pglist_data *pgdat,
|
||||
enum node_stat_item item, int delta, int overstep_mode)
|
||||
{
|
||||
struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
|
||||
s8 __percpu *p = pcp->vm_node_stat_diff + item;
|
||||
long o, n, t, z;
|
||||
|
||||
do {
|
||||
z = 0; /* overflow to node counters */
|
||||
|
||||
/*
|
||||
* The fetching of the stat_threshold is racy. We may apply
|
||||
* a counter threshold to the wrong the cpu if we get
|
||||
* rescheduled while executing here. However, the next
|
||||
* counter update will apply the threshold again and
|
||||
* therefore bring the counter under the threshold again.
|
||||
*
|
||||
* Most of the time the thresholds are the same anyways
|
||||
* for all cpus in a node.
|
||||
*/
|
||||
t = this_cpu_read(pcp->stat_threshold);
|
||||
|
||||
o = this_cpu_read(*p);
|
||||
n = delta + o;
|
||||
|
||||
if (n > t || n < -t) {
|
||||
int os = overstep_mode * (t >> 1) ;
|
||||
|
||||
/* Overflow must be added to node counters */
|
||||
z = n + os;
|
||||
n = -os;
|
||||
}
|
||||
} while (this_cpu_cmpxchg(*p, o, n) != o);
|
||||
|
||||
if (z)
|
||||
node_page_state_add(z, pgdat, item);
|
||||
}
|
||||
|
||||
void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
long delta)
|
||||
{
|
||||
mod_node_state(pgdat, item, delta, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_node_page_state);
|
||||
|
||||
void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
mod_node_state(pgdat, item, 1, 1);
|
||||
}
|
||||
|
||||
void inc_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
mod_node_state(page_pgdat(page), item, 1, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(inc_node_page_state);
|
||||
|
||||
void dec_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
mod_node_state(page_pgdat(page), item, -1, -1);
|
||||
}
|
||||
EXPORT_SYMBOL(dec_node_page_state);
|
||||
#else
|
||||
/*
|
||||
* Use interrupt disable to serialize counter updates
|
||||
@ -394,15 +534,6 @@ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
|
||||
}
|
||||
EXPORT_SYMBOL(mod_zone_page_state);
|
||||
|
||||
void inc_zone_state(struct zone *zone, enum zone_stat_item item)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__inc_zone_state(zone, item);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void inc_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -424,21 +555,69 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(dec_zone_page_state);
|
||||
#endif
|
||||
|
||||
void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__inc_node_state(pgdat, item);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(inc_node_state);
|
||||
|
||||
void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
|
||||
long delta)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__mod_node_page_state(pgdat, item, delta);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_node_page_state);
|
||||
|
||||
void inc_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pglist_data *pgdat;
|
||||
|
||||
pgdat = page_pgdat(page);
|
||||
local_irq_save(flags);
|
||||
__inc_node_state(pgdat, item);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(inc_node_page_state);
|
||||
|
||||
void dec_node_page_state(struct page *page, enum node_stat_item item)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
__dec_node_page_state(page, item);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(dec_node_page_state);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Fold a differential into the global counters.
|
||||
* Returns the number of counters updated.
|
||||
*/
|
||||
static int fold_diff(int *diff)
|
||||
static int fold_diff(int *zone_diff, int *node_diff)
|
||||
{
|
||||
int i;
|
||||
int changes = 0;
|
||||
|
||||
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
||||
if (diff[i]) {
|
||||
atomic_long_add(diff[i], &vm_stat[i]);
|
||||
if (zone_diff[i]) {
|
||||
atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
|
||||
changes++;
|
||||
}
|
||||
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||
if (node_diff[i]) {
|
||||
atomic_long_add(node_diff[i], &vm_node_stat[i]);
|
||||
changes++;
|
||||
}
|
||||
return changes;
|
||||
@ -462,9 +641,11 @@ static int fold_diff(int *diff)
|
||||
*/
|
||||
static int refresh_cpu_vm_stats(bool do_pagesets)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
int i;
|
||||
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
||||
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
||||
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
|
||||
int changes = 0;
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
@ -477,7 +658,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
|
||||
if (v) {
|
||||
|
||||
atomic_long_add(v, &zone->vm_stat[i]);
|
||||
global_diff[i] += v;
|
||||
global_zone_diff[i] += v;
|
||||
#ifdef CONFIG_NUMA
|
||||
/* 3 seconds idle till flush */
|
||||
__this_cpu_write(p->expire, 3);
|
||||
@ -516,7 +697,22 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
changes += fold_diff(global_diff);
|
||||
|
||||
for_each_online_pgdat(pgdat) {
|
||||
struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
|
||||
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
|
||||
int v;
|
||||
|
||||
v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
|
||||
if (v) {
|
||||
atomic_long_add(v, &pgdat->vm_stat[i]);
|
||||
global_node_diff[i] += v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
changes += fold_diff(global_zone_diff, global_node_diff);
|
||||
return changes;
|
||||
}
|
||||
|
||||
@ -527,9 +723,11 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
|
||||
*/
|
||||
void cpu_vm_stats_fold(int cpu)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
int i;
|
||||
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
||||
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
||||
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
|
||||
|
||||
for_each_populated_zone(zone) {
|
||||
struct per_cpu_pageset *p;
|
||||
@ -543,11 +741,27 @@ void cpu_vm_stats_fold(int cpu)
|
||||
v = p->vm_stat_diff[i];
|
||||
p->vm_stat_diff[i] = 0;
|
||||
atomic_long_add(v, &zone->vm_stat[i]);
|
||||
global_diff[i] += v;
|
||||
global_zone_diff[i] += v;
|
||||
}
|
||||
}
|
||||
|
||||
fold_diff(global_diff);
|
||||
for_each_online_pgdat(pgdat) {
|
||||
struct per_cpu_nodestat *p;
|
||||
|
||||
p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
|
||||
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||
if (p->vm_node_stat_diff[i]) {
|
||||
int v;
|
||||
|
||||
v = p->vm_node_stat_diff[i];
|
||||
p->vm_node_stat_diff[i] = 0;
|
||||
atomic_long_add(v, &pgdat->vm_stat[i]);
|
||||
global_node_diff[i] += v;
|
||||
}
|
||||
}
|
||||
|
||||
fold_diff(global_zone_diff, global_node_diff);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -563,16 +777,19 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
|
||||
int v = pset->vm_stat_diff[i];
|
||||
pset->vm_stat_diff[i] = 0;
|
||||
atomic_long_add(v, &zone->vm_stat[i]);
|
||||
atomic_long_add(v, &vm_stat[i]);
|
||||
atomic_long_add(v, &vm_zone_stat[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Determine the per node value of a stat item.
|
||||
* Determine the per node value of a stat item. This function
|
||||
* is called frequently in a NUMA machine, so try to be as
|
||||
* frugal as possible.
|
||||
*/
|
||||
unsigned long node_page_state(int node, enum zone_stat_item item)
|
||||
unsigned long sum_zone_node_page_state(int node,
|
||||
enum zone_stat_item item)
|
||||
{
|
||||
struct zone *zones = NODE_DATA(node)->node_zones;
|
||||
int i;
|
||||
@ -584,6 +801,19 @@ unsigned long node_page_state(int node, enum zone_stat_item item)
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the per node value of a stat item.
|
||||
*/
|
||||
unsigned long node_page_state(struct pglist_data *pgdat,
|
||||
enum node_stat_item item)
|
||||
{
|
||||
long x = atomic_long_read(&pgdat->vm_stat[item]);
|
||||
#ifdef CONFIG_SMP
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMPACTION
|
||||
@ -691,33 +921,18 @@ int fragmentation_index(struct zone *zone, unsigned int order)
|
||||
const char * const vmstat_text[] = {
|
||||
/* enum zone_stat_item countes */
|
||||
"nr_free_pages",
|
||||
"nr_alloc_batch",
|
||||
"nr_inactive_anon",
|
||||
"nr_active_anon",
|
||||
"nr_inactive_file",
|
||||
"nr_active_file",
|
||||
"nr_unevictable",
|
||||
"nr_zone_inactive_anon",
|
||||
"nr_zone_active_anon",
|
||||
"nr_zone_inactive_file",
|
||||
"nr_zone_active_file",
|
||||
"nr_zone_unevictable",
|
||||
"nr_zone_write_pending",
|
||||
"nr_mlock",
|
||||
"nr_anon_pages",
|
||||
"nr_mapped",
|
||||
"nr_file_pages",
|
||||
"nr_dirty",
|
||||
"nr_writeback",
|
||||
"nr_slab_reclaimable",
|
||||
"nr_slab_unreclaimable",
|
||||
"nr_page_table_pages",
|
||||
"nr_kernel_stack",
|
||||
"nr_unstable",
|
||||
"nr_bounce",
|
||||
"nr_vmscan_write",
|
||||
"nr_vmscan_immediate_reclaim",
|
||||
"nr_writeback_temp",
|
||||
"nr_isolated_anon",
|
||||
"nr_isolated_file",
|
||||
"nr_shmem",
|
||||
"nr_dirtied",
|
||||
"nr_written",
|
||||
"nr_pages_scanned",
|
||||
#if IS_ENABLED(CONFIG_ZSMALLOC)
|
||||
"nr_zspages",
|
||||
#endif
|
||||
@ -729,13 +944,35 @@ const char * const vmstat_text[] = {
|
||||
"numa_local",
|
||||
"numa_other",
|
||||
#endif
|
||||
"nr_free_cma",
|
||||
|
||||
/* Node-based counters */
|
||||
"nr_inactive_anon",
|
||||
"nr_active_anon",
|
||||
"nr_inactive_file",
|
||||
"nr_active_file",
|
||||
"nr_unevictable",
|
||||
"nr_isolated_anon",
|
||||
"nr_isolated_file",
|
||||
"nr_pages_scanned",
|
||||
"workingset_refault",
|
||||
"workingset_activate",
|
||||
"workingset_nodereclaim",
|
||||
"nr_anon_transparent_hugepages",
|
||||
"nr_anon_pages",
|
||||
"nr_mapped",
|
||||
"nr_file_pages",
|
||||
"nr_dirty",
|
||||
"nr_writeback",
|
||||
"nr_writeback_temp",
|
||||
"nr_shmem",
|
||||
"nr_shmem_hugepages",
|
||||
"nr_shmem_pmdmapped",
|
||||
"nr_free_cma",
|
||||
"nr_anon_transparent_hugepages",
|
||||
"nr_unstable",
|
||||
"nr_vmscan_write",
|
||||
"nr_vmscan_immediate_reclaim",
|
||||
"nr_dirtied",
|
||||
"nr_written",
|
||||
|
||||
/* enum writeback_stat_item counters */
|
||||
"nr_dirty_threshold",
|
||||
@ -749,6 +986,8 @@ const char * const vmstat_text[] = {
|
||||
"pswpout",
|
||||
|
||||
TEXTS_FOR_ZONES("pgalloc")
|
||||
TEXTS_FOR_ZONES("allocstall")
|
||||
TEXTS_FOR_ZONES("pgskip")
|
||||
|
||||
"pgfree",
|
||||
"pgactivate",
|
||||
@ -758,11 +997,11 @@ const char * const vmstat_text[] = {
|
||||
"pgmajfault",
|
||||
"pglazyfreed",
|
||||
|
||||
TEXTS_FOR_ZONES("pgrefill")
|
||||
TEXTS_FOR_ZONES("pgsteal_kswapd")
|
||||
TEXTS_FOR_ZONES("pgsteal_direct")
|
||||
TEXTS_FOR_ZONES("pgscan_kswapd")
|
||||
TEXTS_FOR_ZONES("pgscan_direct")
|
||||
"pgrefill",
|
||||
"pgsteal_kswapd",
|
||||
"pgsteal_direct",
|
||||
"pgscan_kswapd",
|
||||
"pgscan_direct",
|
||||
"pgscan_direct_throttle",
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@ -774,7 +1013,6 @@ const char * const vmstat_text[] = {
|
||||
"kswapd_low_wmark_hit_quickly",
|
||||
"kswapd_high_wmark_hit_quickly",
|
||||
"pageoutrun",
|
||||
"allocstall",
|
||||
|
||||
"pgrotated",
|
||||
|
||||
@ -1180,17 +1418,41 @@ static const struct file_operations pagetypeinfo_file_ops = {
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
|
||||
{
|
||||
int zid;
|
||||
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
|
||||
struct zone *compare = &pgdat->node_zones[zid];
|
||||
|
||||
if (populated_zone(compare))
|
||||
return zone == compare;
|
||||
}
|
||||
|
||||
/* The zone must be somewhere! */
|
||||
WARN_ON_ONCE(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
||||
struct zone *zone)
|
||||
{
|
||||
int i;
|
||||
seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
|
||||
if (is_zone_first_populated(pgdat, zone)) {
|
||||
seq_printf(m, "\n per-node stats");
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
|
||||
seq_printf(m, "\n %-12s %lu",
|
||||
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
|
||||
node_page_state(pgdat, i));
|
||||
}
|
||||
}
|
||||
seq_printf(m,
|
||||
"\n pages free %lu"
|
||||
"\n min %lu"
|
||||
"\n low %lu"
|
||||
"\n high %lu"
|
||||
"\n scanned %lu"
|
||||
"\n node_scanned %lu"
|
||||
"\n spanned %lu"
|
||||
"\n present %lu"
|
||||
"\n managed %lu",
|
||||
@ -1198,13 +1460,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
||||
min_wmark_pages(zone),
|
||||
low_wmark_pages(zone),
|
||||
high_wmark_pages(zone),
|
||||
zone_page_state(zone, NR_PAGES_SCANNED),
|
||||
node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED),
|
||||
zone->spanned_pages,
|
||||
zone->present_pages,
|
||||
zone->managed_pages);
|
||||
|
||||
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
||||
seq_printf(m, "\n %-12s %lu", vmstat_text[i],
|
||||
seq_printf(m, "\n %-12s %lu", vmstat_text[i],
|
||||
zone_page_state(zone, i));
|
||||
|
||||
seq_printf(m,
|
||||
@ -1234,12 +1496,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
||||
#endif
|
||||
}
|
||||
seq_printf(m,
|
||||
"\n all_unreclaimable: %u"
|
||||
"\n start_pfn: %lu"
|
||||
"\n inactive_ratio: %u",
|
||||
!zone_reclaimable(zone),
|
||||
"\n node_unreclaimable: %u"
|
||||
"\n start_pfn: %lu"
|
||||
"\n node_inactive_ratio: %u",
|
||||
!pgdat_reclaimable(zone->zone_pgdat),
|
||||
zone->zone_start_pfn,
|
||||
zone->inactive_ratio);
|
||||
zone->zone_pgdat->inactive_ratio);
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
@ -1287,6 +1549,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
|
||||
if (*pos >= ARRAY_SIZE(vmstat_text))
|
||||
return NULL;
|
||||
stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
|
||||
NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
|
||||
NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
|
||||
|
||||
#ifdef CONFIG_VM_EVENT_COUNTERS
|
||||
@ -1301,6 +1564,10 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
|
||||
v[i] = global_page_state(i);
|
||||
v += NR_VM_ZONE_STAT_ITEMS;
|
||||
|
||||
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
|
||||
v[i] = global_node_page_state(i);
|
||||
v += NR_VM_NODE_STAT_ITEMS;
|
||||
|
||||
global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
|
||||
v + NR_DIRTY_THRESHOLD);
|
||||
v += NR_VM_WRITEBACK_STAT_ITEMS;
|
||||
@ -1325,7 +1592,6 @@ static int vmstat_show(struct seq_file *m, void *arg)
|
||||
{
|
||||
unsigned long *l = arg;
|
||||
unsigned long off = l - (unsigned long *)m->private;
|
||||
|
||||
seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
|
||||
return 0;
|
||||
}
|
||||
@ -1390,13 +1656,12 @@ int vmstat_refresh(struct ctl_table *table, int write,
|
||||
if (err)
|
||||
return err;
|
||||
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
|
||||
val = atomic_long_read(&vm_stat[i]);
|
||||
val = atomic_long_read(&vm_zone_stat[i]);
|
||||
if (val < 0) {
|
||||
switch (i) {
|
||||
case NR_ALLOC_BATCH:
|
||||
case NR_PAGES_SCANNED:
|
||||
/*
|
||||
* These are often seen to go negative in
|
||||
* This is often seen to go negative in
|
||||
* recent kernels, but not to go permanently
|
||||
* negative. Whilst it would be nicer not to
|
||||
* have exceptions, rooting them out would be
|
||||
|
@ -16,7 +16,7 @@
|
||||
/*
|
||||
* Double CLOCK lists
|
||||
*
|
||||
* Per zone, two clock lists are maintained for file pages: the
|
||||
* Per node, two clock lists are maintained for file pages: the
|
||||
* inactive and the active list. Freshly faulted pages start out at
|
||||
* the head of the inactive list and page reclaim scans pages from the
|
||||
* tail. Pages that are accessed multiple times on the inactive list
|
||||
@ -141,11 +141,11 @@
|
||||
*
|
||||
* Implementation
|
||||
*
|
||||
* For each zone's file LRU lists, a counter for inactive evictions
|
||||
* and activations is maintained (zone->inactive_age).
|
||||
* For each node's file LRU lists, a counter for inactive evictions
|
||||
* and activations is maintained (node->inactive_age).
|
||||
*
|
||||
* On eviction, a snapshot of this counter (along with some bits to
|
||||
* identify the zone) is stored in the now empty page cache radix tree
|
||||
* identify the node) is stored in the now empty page cache radix tree
|
||||
* slot of the evicted page. This is called a shadow entry.
|
||||
*
|
||||
* On cache misses for which there are shadow entries, an eligible
|
||||
@ -153,7 +153,7 @@
|
||||
*/
|
||||
|
||||
#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
|
||||
ZONES_SHIFT + NODES_SHIFT + \
|
||||
NODES_SHIFT + \
|
||||
MEM_CGROUP_ID_SHIFT)
|
||||
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
|
||||
|
||||
@ -167,33 +167,30 @@
|
||||
*/
|
||||
static unsigned int bucket_order __read_mostly;
|
||||
|
||||
static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction)
|
||||
static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
|
||||
{
|
||||
eviction >>= bucket_order;
|
||||
eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
|
||||
eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone);
|
||||
eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
|
||||
eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
|
||||
eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
|
||||
|
||||
return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
|
||||
}
|
||||
|
||||
static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
|
||||
static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
||||
unsigned long *evictionp)
|
||||
{
|
||||
unsigned long entry = (unsigned long)shadow;
|
||||
int memcgid, nid, zid;
|
||||
int memcgid, nid;
|
||||
|
||||
entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
|
||||
zid = entry & ((1UL << ZONES_SHIFT) - 1);
|
||||
entry >>= ZONES_SHIFT;
|
||||
nid = entry & ((1UL << NODES_SHIFT) - 1);
|
||||
entry >>= NODES_SHIFT;
|
||||
memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
|
||||
entry >>= MEM_CGROUP_ID_SHIFT;
|
||||
|
||||
*memcgidp = memcgid;
|
||||
*zonep = NODE_DATA(nid)->node_zones + zid;
|
||||
*pgdat = NODE_DATA(nid);
|
||||
*evictionp = entry << bucket_order;
|
||||
}
|
||||
|
||||
@ -208,7 +205,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
|
||||
void *workingset_eviction(struct address_space *mapping, struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg = page_memcg(page);
|
||||
struct zone *zone = page_zone(page);
|
||||
struct pglist_data *pgdat = page_pgdat(page);
|
||||
int memcgid = mem_cgroup_id(memcg);
|
||||
unsigned long eviction;
|
||||
struct lruvec *lruvec;
|
||||
@ -218,9 +215,9 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||
lruvec = mem_cgroup_lruvec(pgdat, memcg);
|
||||
eviction = atomic_long_inc_return(&lruvec->inactive_age);
|
||||
return pack_shadow(memcgid, zone, eviction);
|
||||
return pack_shadow(memcgid, pgdat, eviction);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -228,7 +225,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
|
||||
* @shadow: shadow entry of the evicted page
|
||||
*
|
||||
* Calculates and evaluates the refault distance of the previously
|
||||
* evicted page in the context of the zone it was allocated in.
|
||||
* evicted page in the context of the node it was allocated in.
|
||||
*
|
||||
* Returns %true if the page should be activated, %false otherwise.
|
||||
*/
|
||||
@ -240,10 +237,10 @@ bool workingset_refault(void *shadow)
|
||||
unsigned long eviction;
|
||||
struct lruvec *lruvec;
|
||||
unsigned long refault;
|
||||
struct zone *zone;
|
||||
struct pglist_data *pgdat;
|
||||
int memcgid;
|
||||
|
||||
unpack_shadow(shadow, &memcgid, &zone, &eviction);
|
||||
unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
@ -267,7 +264,7 @@ bool workingset_refault(void *shadow)
|
||||
rcu_read_unlock();
|
||||
return false;
|
||||
}
|
||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||
lruvec = mem_cgroup_lruvec(pgdat, memcg);
|
||||
refault = atomic_long_read(&lruvec->inactive_age);
|
||||
active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
|
||||
rcu_read_unlock();
|
||||
@ -290,10 +287,10 @@ bool workingset_refault(void *shadow)
|
||||
*/
|
||||
refault_distance = (refault - eviction) & EVICTION_MASK;
|
||||
|
||||
inc_zone_state(zone, WORKINGSET_REFAULT);
|
||||
inc_node_state(pgdat, WORKINGSET_REFAULT);
|
||||
|
||||
if (refault_distance <= active_file) {
|
||||
inc_zone_state(zone, WORKINGSET_ACTIVATE);
|
||||
inc_node_state(pgdat, WORKINGSET_ACTIVATE);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -305,9 +302,10 @@ bool workingset_refault(void *shadow)
|
||||
*/
|
||||
void workingset_activation(struct page *page)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
lock_page_memcg(page);
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* Filter non-memcg pages here, e.g. unmap can call
|
||||
* mark_page_accessed() on VDSO pages.
|
||||
@ -315,12 +313,13 @@ void workingset_activation(struct page *page)
|
||||
* XXX: See workingset_refault() - this should return
|
||||
* root_mem_cgroup even for !CONFIG_MEMCG.
|
||||
*/
|
||||
if (!mem_cgroup_disabled() && !page_memcg(page))
|
||||
memcg = page_memcg_rcu(page);
|
||||
if (!mem_cgroup_disabled() && !memcg)
|
||||
goto out;
|
||||
lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page));
|
||||
lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
|
||||
atomic_long_inc(&lruvec->inactive_age);
|
||||
out:
|
||||
unlock_page_memcg(page);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -349,12 +348,13 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
||||
shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
|
||||
local_irq_enable();
|
||||
|
||||
if (memcg_kmem_enabled())
|
||||
if (memcg_kmem_enabled()) {
|
||||
pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
|
||||
LRU_ALL_FILE);
|
||||
else
|
||||
pages = node_page_state(sc->nid, NR_ACTIVE_FILE) +
|
||||
node_page_state(sc->nid, NR_INACTIVE_FILE);
|
||||
} else {
|
||||
pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
|
||||
node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Active cache pages are limited to 50% of memory, and shadow
|
||||
@ -433,7 +433,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
|
||||
}
|
||||
}
|
||||
BUG_ON(node->count);
|
||||
inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM);
|
||||
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
|
||||
if (!__radix_tree_delete_node(&mapping->page_tree, node))
|
||||
BUG();
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
* page->freelist(index): links together all component pages of a zspage
|
||||
* For the huge page, this is always 0, so we use this field
|
||||
* to store handle.
|
||||
* page->units: first object offset in a subpage of zspage
|
||||
*
|
||||
* Usage of struct page flags:
|
||||
* PG_private: identifies the first component page
|
||||
@ -137,9 +138,6 @@
|
||||
*/
|
||||
#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> CLASS_BITS)
|
||||
|
||||
/*
|
||||
* We do not maintain any list for completely empty or full pages
|
||||
*/
|
||||
enum fullness_group {
|
||||
ZS_EMPTY,
|
||||
ZS_ALMOST_EMPTY,
|
||||
@ -467,11 +465,6 @@ static struct zpool_driver zs_zpool_driver = {
|
||||
MODULE_ALIAS("zpool-zsmalloc");
|
||||
#endif /* CONFIG_ZPOOL */
|
||||
|
||||
static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
|
||||
{
|
||||
return pages_per_zspage * PAGE_SIZE / size;
|
||||
}
|
||||
|
||||
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
|
||||
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
|
||||
|
||||
@ -635,8 +628,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
|
||||
freeable = zs_can_compact(class);
|
||||
spin_unlock(&class->lock);
|
||||
|
||||
objs_per_zspage = get_maxobj_per_zspage(class->size,
|
||||
class->pages_per_zspage);
|
||||
objs_per_zspage = class->objs_per_zspage;
|
||||
pages_used = obj_allocated / objs_per_zspage *
|
||||
class->pages_per_zspage;
|
||||
|
||||
@ -945,8 +937,8 @@ static void unpin_tag(unsigned long handle)
|
||||
static void reset_page(struct page *page)
|
||||
{
|
||||
__ClearPageMovable(page);
|
||||
clear_bit(PG_private, &page->flags);
|
||||
clear_bit(PG_private_2, &page->flags);
|
||||
ClearPagePrivate(page);
|
||||
ClearPagePrivate2(page);
|
||||
set_page_private(page, 0);
|
||||
page_mapcount_reset(page);
|
||||
ClearPageHugeObject(page);
|
||||
@ -1014,8 +1006,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
|
||||
cache_free_zspage(pool, zspage);
|
||||
|
||||
zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
|
||||
class->size, class->pages_per_zspage));
|
||||
zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
|
||||
atomic_long_sub(class->pages_per_zspage,
|
||||
&pool->pages_allocated);
|
||||
}
|
||||
@ -1350,7 +1341,7 @@ static void zs_unregister_cpu_notifier(void)
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
|
||||
static void init_zs_size_classes(void)
|
||||
static void __init init_zs_size_classes(void)
|
||||
{
|
||||
int nr;
|
||||
|
||||
@ -1361,16 +1352,14 @@ static void init_zs_size_classes(void)
|
||||
zs_size_classes = nr;
|
||||
}
|
||||
|
||||
static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
|
||||
static bool can_merge(struct size_class *prev, int pages_per_zspage,
|
||||
int objs_per_zspage)
|
||||
{
|
||||
if (prev->pages_per_zspage != pages_per_zspage)
|
||||
return false;
|
||||
if (prev->pages_per_zspage == pages_per_zspage &&
|
||||
prev->objs_per_zspage == objs_per_zspage)
|
||||
return true;
|
||||
|
||||
if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)
|
||||
!= get_maxobj_per_zspage(size, pages_per_zspage))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool zspage_full(struct size_class *class, struct zspage *zspage)
|
||||
@ -1541,6 +1530,7 @@ static unsigned long obj_malloc(struct size_class *class,
|
||||
* zs_malloc - Allocate block of given size from pool.
|
||||
* @pool: pool to allocate from
|
||||
* @size: size of block to allocate
|
||||
* @gfp: gfp flags when allocating object
|
||||
*
|
||||
* On success, handle to the allocated object is returned,
|
||||
* otherwise 0.
|
||||
@ -1592,8 +1582,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
|
||||
record_obj(handle, obj);
|
||||
atomic_long_add(class->pages_per_zspage,
|
||||
&pool->pages_allocated);
|
||||
zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
|
||||
class->size, class->pages_per_zspage));
|
||||
zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
|
||||
|
||||
/* We completely set up zspage so mark them as movable */
|
||||
SetZsPageMovable(pool, zspage);
|
||||
@ -1741,10 +1730,11 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
|
||||
* return handle.
|
||||
*/
|
||||
static unsigned long find_alloced_obj(struct size_class *class,
|
||||
struct page *page, int index)
|
||||
struct page *page, int *obj_idx)
|
||||
{
|
||||
unsigned long head;
|
||||
int offset = 0;
|
||||
int index = *obj_idx;
|
||||
unsigned long handle = 0;
|
||||
void *addr = kmap_atomic(page);
|
||||
|
||||
@ -1765,6 +1755,9 @@ static unsigned long find_alloced_obj(struct size_class *class,
|
||||
}
|
||||
|
||||
kunmap_atomic(addr);
|
||||
|
||||
*obj_idx = index;
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
@ -1776,7 +1769,7 @@ struct zs_compact_control {
|
||||
struct page *d_page;
|
||||
/* Starting object index within @s_page which used for live object
|
||||
* in the subpage. */
|
||||
int index;
|
||||
int obj_idx;
|
||||
};
|
||||
|
||||
static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
@ -1786,16 +1779,16 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
unsigned long handle;
|
||||
struct page *s_page = cc->s_page;
|
||||
struct page *d_page = cc->d_page;
|
||||
unsigned long index = cc->index;
|
||||
int obj_idx = cc->obj_idx;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
handle = find_alloced_obj(class, s_page, index);
|
||||
handle = find_alloced_obj(class, s_page, &obj_idx);
|
||||
if (!handle) {
|
||||
s_page = get_next_page(s_page);
|
||||
if (!s_page)
|
||||
break;
|
||||
index = 0;
|
||||
obj_idx = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1809,7 +1802,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
used_obj = handle_to_obj(handle);
|
||||
free_obj = obj_malloc(class, get_zspage(d_page), handle);
|
||||
zs_object_copy(class, free_obj, used_obj);
|
||||
index++;
|
||||
obj_idx++;
|
||||
/*
|
||||
* record_obj updates handle's value to free_obj and it will
|
||||
* invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
|
||||
@ -1824,7 +1817,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
|
||||
/* Remember last position in this iteration */
|
||||
cc->s_page = s_page;
|
||||
cc->index = index;
|
||||
cc->obj_idx = obj_idx;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2181,8 +2174,7 @@ static int zs_register_migration(struct zs_pool *pool)
|
||||
static void zs_unregister_migration(struct zs_pool *pool)
|
||||
{
|
||||
flush_work(&pool->free_work);
|
||||
if (pool->inode)
|
||||
iput(pool->inode);
|
||||
iput(pool->inode);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2261,8 +2253,7 @@ static unsigned long zs_can_compact(struct size_class *class)
|
||||
return 0;
|
||||
|
||||
obj_wasted = obj_allocated - obj_used;
|
||||
obj_wasted /= get_maxobj_per_zspage(class->size,
|
||||
class->pages_per_zspage);
|
||||
obj_wasted /= class->objs_per_zspage;
|
||||
|
||||
return obj_wasted * class->pages_per_zspage;
|
||||
}
|
||||
@ -2279,7 +2270,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
|
||||
if (!zs_can_compact(class))
|
||||
break;
|
||||
|
||||
cc.index = 0;
|
||||
cc.obj_idx = 0;
|
||||
cc.s_page = get_first_page(src_zspage);
|
||||
|
||||
while ((dst_zspage = isolate_zspage(class, false))) {
|
||||
@ -2398,7 +2389,7 @@ static int zs_register_shrinker(struct zs_pool *pool)
|
||||
|
||||
/**
|
||||
* zs_create_pool - Creates an allocation pool to work from.
|
||||
* @flags: allocation flags used to allocate pool metadata
|
||||
* @name: pool name to be created
|
||||
*
|
||||
* This function must be called before anything when using
|
||||
* the zsmalloc allocator.
|
||||
@ -2438,6 +2429,7 @@ struct zs_pool *zs_create_pool(const char *name)
|
||||
for (i = zs_size_classes - 1; i >= 0; i--) {
|
||||
int size;
|
||||
int pages_per_zspage;
|
||||
int objs_per_zspage;
|
||||
struct size_class *class;
|
||||
int fullness = 0;
|
||||
|
||||
@ -2445,6 +2437,7 @@ struct zs_pool *zs_create_pool(const char *name)
|
||||
if (size > ZS_MAX_ALLOC_SIZE)
|
||||
size = ZS_MAX_ALLOC_SIZE;
|
||||
pages_per_zspage = get_pages_per_zspage(size);
|
||||
objs_per_zspage = pages_per_zspage * PAGE_SIZE / size;
|
||||
|
||||
/*
|
||||
* size_class is used for normal zsmalloc operation such
|
||||
@ -2456,7 +2449,7 @@ struct zs_pool *zs_create_pool(const char *name)
|
||||
* previous size_class if possible.
|
||||
*/
|
||||
if (prev_class) {
|
||||
if (can_merge(prev_class, size, pages_per_zspage)) {
|
||||
if (can_merge(prev_class, pages_per_zspage, objs_per_zspage)) {
|
||||
pool->size_class[i] = prev_class;
|
||||
continue;
|
||||
}
|
||||
@ -2469,8 +2462,7 @@ struct zs_pool *zs_create_pool(const char *name)
|
||||
class->size = size;
|
||||
class->index = i;
|
||||
class->pages_per_zspage = pages_per_zspage;
|
||||
class->objs_per_zspage = class->pages_per_zspage *
|
||||
PAGE_SIZE / class->size;
|
||||
class->objs_per_zspage = objs_per_zspage;
|
||||
spin_lock_init(&class->lock);
|
||||
pool->size_class[i] = class;
|
||||
for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
|
||||
|
@ -608,6 +608,7 @@ static const struct {
|
||||
const char *compact;
|
||||
} gfp_compact_table[] = {
|
||||
{ "GFP_TRANSHUGE", "THP" },
|
||||
{ "GFP_TRANSHUGE_LIGHT", "THL" },
|
||||
{ "GFP_HIGHUSER_MOVABLE", "HUM" },
|
||||
{ "GFP_HIGHUSER", "HU" },
|
||||
{ "GFP_USER", "U" },
|
||||
|
Loading…
Reference in New Issue
Block a user