forked from luck/tmp_suning_uos_patched
f3f0e1d215
This patch extends khugepaged to support collapse of tmpfs/shmem pages. We share fair amount of infrastructure with anon-THP collapse. Few design points: - First we are looking for VMA which can be suitable for mapping huge page; - If the VMA maps shmem file, the rest scan/collapse operations operates on page cache, not on page tables as in anon VMA case. - khugepaged_scan_shmem() finds a range which is suitable for huge page. The scan is lockless and shouldn't disturb system too much. - once the candidate for collapse is found, collapse_shmem() attempts to create a huge page: + scan over radix tree, making the range point to new huge page; + new huge page is not-uptodate, locked and freezed (refcount is 0), so nobody can touch them until we say so. + we swap in pages during the scan. khugepaged_scan_shmem() filters out ranges with more than khugepaged_max_ptes_swap swapped out pages. It's HPAGE_PMD_NR/8 by default. + old pages are isolated, unmapped and put to local list in case to be restored back if collapse failed. - if collapse succeed, we retract pte page tables from VMAs where huge pages mapping is possible. The huge page will be mapped as PMD on next minor fault into the range. Link: http://lkml.kernel.org/r/1466021202-61880-35-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
165 lines
4.2 KiB
C
165 lines
4.2 KiB
C
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM huge_memory
|
|
|
|
#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define __HUGE_MEMORY_H
|
|
|
|
#include <linux/tracepoint.h>
|
|
|
|
#define SCAN_STATUS \
|
|
EM( SCAN_FAIL, "failed") \
|
|
EM( SCAN_SUCCEED, "succeeded") \
|
|
EM( SCAN_PMD_NULL, "pmd_null") \
|
|
EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
|
|
EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
|
|
EM( SCAN_PAGE_RO, "no_writable_page") \
|
|
EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \
|
|
EM( SCAN_PAGE_NULL, "page_null") \
|
|
EM( SCAN_SCAN_ABORT, "scan_aborted") \
|
|
EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \
|
|
EM( SCAN_PAGE_LRU, "page_not_in_lru") \
|
|
EM( SCAN_PAGE_LOCK, "page_locked") \
|
|
EM( SCAN_PAGE_ANON, "page_not_anon") \
|
|
EM( SCAN_PAGE_COMPOUND, "page_compound") \
|
|
EM( SCAN_ANY_PROCESS, "no_process_for_page") \
|
|
EM( SCAN_VMA_NULL, "vma_null") \
|
|
EM( SCAN_VMA_CHECK, "vma_check_failed") \
|
|
EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
|
|
EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \
|
|
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
|
|
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
|
|
EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \
|
|
EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \
|
|
EMe(SCAN_TRUNCATED, "truncated") \
|
|
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) TRACE_DEFINE_ENUM(a);
|
|
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
|
|
|
|
SCAN_STATUS
|
|
|
|
#undef EM
|
|
#undef EMe
|
|
#define EM(a, b) {a, b},
|
|
#define EMe(a, b) {a, b}
|
|
|
|
TRACE_EVENT(mm_khugepaged_scan_pmd,
|
|
|
|
TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
|
|
bool referenced, int none_or_zero, int status, int unmapped),
|
|
|
|
TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(unsigned long, pfn)
|
|
__field(bool, writable)
|
|
__field(bool, referenced)
|
|
__field(int, none_or_zero)
|
|
__field(int, status)
|
|
__field(int, unmapped)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->pfn = page ? page_to_pfn(page) : -1;
|
|
__entry->writable = writable;
|
|
__entry->referenced = referenced;
|
|
__entry->none_or_zero = none_or_zero;
|
|
__entry->status = status;
|
|
__entry->unmapped = unmapped;
|
|
),
|
|
|
|
TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d",
|
|
__entry->mm,
|
|
__entry->pfn,
|
|
__entry->writable,
|
|
__entry->referenced,
|
|
__entry->none_or_zero,
|
|
__print_symbolic(__entry->status, SCAN_STATUS),
|
|
__entry->unmapped)
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page,
|
|
|
|
TP_PROTO(struct mm_struct *mm, int isolated, int status),
|
|
|
|
TP_ARGS(mm, isolated, status),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(int, isolated)
|
|
__field(int, status)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->isolated = isolated;
|
|
__entry->status = status;
|
|
),
|
|
|
|
TP_printk("mm=%p, isolated=%d, status=%s",
|
|
__entry->mm,
|
|
__entry->isolated,
|
|
__print_symbolic(__entry->status, SCAN_STATUS))
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page_isolate,
|
|
|
|
TP_PROTO(struct page *page, int none_or_zero,
|
|
bool referenced, bool writable, int status),
|
|
|
|
TP_ARGS(page, none_or_zero, referenced, writable, status),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(unsigned long, pfn)
|
|
__field(int, none_or_zero)
|
|
__field(bool, referenced)
|
|
__field(bool, writable)
|
|
__field(int, status)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->pfn = page ? page_to_pfn(page) : -1;
|
|
__entry->none_or_zero = none_or_zero;
|
|
__entry->referenced = referenced;
|
|
__entry->writable = writable;
|
|
__entry->status = status;
|
|
),
|
|
|
|
TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s",
|
|
__entry->pfn,
|
|
__entry->none_or_zero,
|
|
__entry->referenced,
|
|
__entry->writable,
|
|
__print_symbolic(__entry->status, SCAN_STATUS))
|
|
);
|
|
|
|
TRACE_EVENT(mm_collapse_huge_page_swapin,
|
|
|
|
TP_PROTO(struct mm_struct *mm, int swapped_in, int ret),
|
|
|
|
TP_ARGS(mm, swapped_in, ret),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(int, swapped_in)
|
|
__field(int, ret)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->swapped_in = swapped_in;
|
|
__entry->ret = ret;
|
|
),
|
|
|
|
TP_printk("mm=%p, swapped_in=%d, ret=%d",
|
|
__entry->mm,
|
|
__entry->swapped_in,
|
|
__entry->ret)
|
|
);
|
|
|
|
#endif /* __HUGE_MEMORY_H */
|
|
#include <trace/define_trace.h>
|