forked from luck/tmp_suning_uos_patched
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - a lot more of MM, quite a bit more yet to come: (memcg, pagemap, vmalloc, pagealloc, migration, thp, ksm, madvise, virtio, userfaultfd, memory-hotplug, shmem, rmap, zswap, zsmalloc, cleanups) - various other subsystems (procfs, misc, MAINTAINERS, bitops, lib, checkpatch, epoll, binfmt, kallsyms, reiserfs, kmod, gcov, kconfig, ubsan, fault-injection, ipc) * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (158 commits) ipc/shm.c: make compat_ksys_shmctl() static ipc/mqueue.c: fix a brace coding style issue lib/Kconfig.debug: fix a typo "capabilitiy" -> "capability" ubsan: include bug type in report header kasan: unset panic_on_warn before calling panic() ubsan: check panic_on_warn drivers/misc/lkdtm/bugs.c: add arithmetic overflow and array bounds checks ubsan: split "bounds" checker from other options ubsan: add trap instrumentation option init/Kconfig: clean up ANON_INODES and old IO schedulers options kernel/gcov/fs.c: replace zero-length array with flexible-array member gcov: gcc_3_4: replace zero-length array with flexible-array member gcov: gcc_4_7: replace zero-length array with flexible-array member kernel/kmod.c: fix a typo "assuems" -> "assumes" reiserfs: clean up several indentation issues kallsyms: unexport kallsyms_lookup_name() and kallsyms_on_each_symbol() samples/hw_breakpoint: drop use of kallsyms_lookup_name() samples/hw_breakpoint: drop HW_BREAKPOINT_R when reporting writes fs/binfmt_elf.c: don't free interpreter's ELF pheaders on common path fs/binfmt_elf.c: allocate less for static executable ...
This commit is contained in:
commit
63bef48fd6
|
@ -2573,13 +2573,22 @@
|
|||
For details see: Documentation/admin-guide/hw-vuln/mds.rst
|
||||
|
||||
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
||||
Amount of memory to be used when the kernel is not able
|
||||
to see the whole system memory or for test.
|
||||
Amount of memory to be used in cases as follows:
|
||||
|
||||
1 for test;
|
||||
2 when the kernel is not able to see the whole system memory;
|
||||
3 memory that lies after 'mem=' boundary is excluded from
|
||||
the hypervisor, then assigned to KVM guests.
|
||||
|
||||
[X86] Work as limiting max address. Use together
|
||||
with memmap= to avoid physical address space collisions.
|
||||
Without memmap= PCI devices could be placed at addresses
|
||||
belonging to unused RAM.
|
||||
|
||||
Note that this only takes effects during boot time since
|
||||
in above case 3, memory may need be hot added after boot
|
||||
if system memory of hypervisor is not sufficient.
|
||||
|
||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||
memory.
|
||||
|
||||
|
|
|
@ -310,6 +310,11 @@ thp_fault_fallback
|
|||
is incremented if a page fault fails to allocate
|
||||
a huge page and instead falls back to using small pages.
|
||||
|
||||
thp_fault_fallback_charge
|
||||
is incremented if a page fault fails to charge a huge page and
|
||||
instead falls back to using small pages even though the
|
||||
allocation was successful.
|
||||
|
||||
thp_collapse_alloc_failed
|
||||
is incremented if khugepaged found a range
|
||||
of pages that should be collapsed into one huge page but failed
|
||||
|
@ -319,6 +324,15 @@ thp_file_alloc
|
|||
is incremented every time a file huge page is successfully
|
||||
allocated.
|
||||
|
||||
thp_file_fallback
|
||||
is incremented if a file huge page is attempted to be allocated
|
||||
but fails and instead falls back to using small pages.
|
||||
|
||||
thp_file_fallback_charge
|
||||
is incremented if a file huge page cannot be charged and instead
|
||||
falls back to using small pages even though the allocation was
|
||||
successful.
|
||||
|
||||
thp_file_mapped
|
||||
is incremented every time a file huge page is mapped into
|
||||
user address space.
|
||||
|
|
|
@ -108,6 +108,57 @@ UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
|
|||
half copied page since it'll keep userfaulting until the copy has
|
||||
finished.
|
||||
|
||||
Notes:
|
||||
|
||||
- If you requested UFFDIO_REGISTER_MODE_MISSING when registering then
|
||||
you must provide some kind of page in your thread after reading from
|
||||
the uffd. You must provide either UFFDIO_COPY or UFFDIO_ZEROPAGE.
|
||||
The normal behavior of the OS automatically providing a zero page on
|
||||
an annonymous mmaping is not in place.
|
||||
|
||||
- None of the page-delivering ioctls default to the range that you
|
||||
registered with. You must fill in all fields for the appropriate
|
||||
ioctl struct including the range.
|
||||
|
||||
- You get the address of the access that triggered the missing page
|
||||
event out of a struct uffd_msg that you read in the thread from the
|
||||
uffd. You can supply as many pages as you want with UFFDIO_COPY or
|
||||
UFFDIO_ZEROPAGE. Keep in mind that unless you used DONTWAKE then
|
||||
the first of any of those IOCTLs wakes up the faulting thread.
|
||||
|
||||
- Be sure to test for all errors including (pollfd[0].revents &
|
||||
POLLERR). This can happen, e.g. when ranges supplied were
|
||||
incorrect.
|
||||
|
||||
Write Protect Notifications
|
||||
---------------------------
|
||||
|
||||
This is equivalent to (but faster than) using mprotect and a SIGSEGV
|
||||
signal handler.
|
||||
|
||||
Firstly you need to register a range with UFFDIO_REGISTER_MODE_WP.
|
||||
Instead of using mprotect(2) you use ioctl(uffd, UFFDIO_WRITEPROTECT,
|
||||
struct *uffdio_writeprotect) while mode = UFFDIO_WRITEPROTECT_MODE_WP
|
||||
in the struct passed in. The range does not default to and does not
|
||||
have to be identical to the range you registered with. You can write
|
||||
protect as many ranges as you like (inside the registered range).
|
||||
Then, in the thread reading from uffd the struct will have
|
||||
msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP set. Now you send
|
||||
ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect) again
|
||||
while pagefault.mode does not have UFFDIO_WRITEPROTECT_MODE_WP set.
|
||||
This wakes up the thread which will continue to run with writes. This
|
||||
allows you to do the bookkeeping about the write in the uffd reading
|
||||
thread before the ioctl.
|
||||
|
||||
If you registered with both UFFDIO_REGISTER_MODE_MISSING and
|
||||
UFFDIO_REGISTER_MODE_WP then you need to think about the sequence in
|
||||
which you supply a page and undo write protect. Note that there is a
|
||||
difference between writes into a WP area and into a !WP area. The
|
||||
former will have UFFD_PAGEFAULT_FLAG_WP set, the latter
|
||||
UFFD_PAGEFAULT_FLAG_WRITE. The latter did not fail on protection but
|
||||
you still need to supply a page when UFFDIO_REGISTER_MODE_MISSING was
|
||||
used.
|
||||
|
||||
QEMU/KVM
|
||||
========
|
||||
|
||||
|
|
40
Documentation/vm/free_page_reporting.rst
Normal file
40
Documentation/vm/free_page_reporting.rst
Normal file
|
@ -0,0 +1,40 @@
|
|||
.. _free_page_reporting:
|
||||
|
||||
=====================
|
||||
Free Page Reporting
|
||||
=====================
|
||||
|
||||
Free page reporting is an API by which a device can register to receive
|
||||
lists of pages that are currently unused by the system. This is useful in
|
||||
the case of virtualization where a guest is then able to use this data to
|
||||
notify the hypervisor that it is no longer using certain pages in memory.
|
||||
|
||||
For the driver, typically a balloon driver, to use of this functionality
|
||||
it will allocate and initialize a page_reporting_dev_info structure. The
|
||||
field within the structure it will populate is the "report" function
|
||||
pointer used to process the scatterlist. It must also guarantee that it can
|
||||
handle at least PAGE_REPORTING_CAPACITY worth of scatterlist entries per
|
||||
call to the function. A call to page_reporting_register will register the
|
||||
page reporting interface with the reporting framework assuming no other
|
||||
page reporting devices are already registered.
|
||||
|
||||
Once registered the page reporting API will begin reporting batches of
|
||||
pages to the driver. The API will start reporting pages 2 seconds after
|
||||
the interface is registered and will continue to do so 2 seconds after any
|
||||
page of a sufficiently high order is freed.
|
||||
|
||||
Pages reported will be stored in the scatterlist passed to the reporting
|
||||
function with the final entry having the end bit set in entry nent - 1.
|
||||
While pages are being processed by the report function they will not be
|
||||
accessible to the allocator. Once the report function has been completed
|
||||
the pages will be returned to the free area from which they were obtained.
|
||||
|
||||
Prior to removing a driver that is making use of free page reporting it
|
||||
is necessary to call page_reporting_unregister to have the
|
||||
page_reporting_dev_info structure that is currently in use by free page
|
||||
reporting removed. Doing this will prevent further reports from being
|
||||
issued via the interface. If another driver or the same driver is
|
||||
registered it is possible for it to resume where the previous driver had
|
||||
left off in terms of reporting free pages.
|
||||
|
||||
Alexander Duyck, Dec 04, 2019
|
|
@ -35,9 +35,11 @@ Zswap evicts pages from compressed cache on an LRU basis to the backing swap
|
|||
device when the compressed pool reaches its size limit. This requirement had
|
||||
been identified in prior community discussions.
|
||||
|
||||
Zswap is disabled by default but can be enabled at boot time by setting
|
||||
the ``enabled`` attribute to 1 at boot time. ie: ``zswap.enabled=1``. Zswap
|
||||
can also be enabled and disabled at runtime using the sysfs interface.
|
||||
Whether Zswap is enabled at the boot time depends on whether
|
||||
the ``CONFIG_ZSWAP_DEFAULT_ON`` Kconfig option is enabled or not.
|
||||
This setting can then be overridden by providing the kernel command line
|
||||
``zswap.enabled=`` option, for example ``zswap.enabled=0``.
|
||||
Zswap can also be enabled and disabled at runtime using the sysfs interface.
|
||||
An example command to enable zswap at runtime, assuming sysfs is mounted
|
||||
at ``/sys``, is::
|
||||
|
||||
|
@ -64,9 +66,10 @@ allocation in zpool is not directly accessible by address. Rather, a handle is
|
|||
returned by the allocation routine and that handle must be mapped before being
|
||||
accessed. The compressed memory pool grows on demand and shrinks as compressed
|
||||
pages are freed. The pool is not preallocated. By default, a zpool
|
||||
of type zbud is created, but it can be selected at boot time by
|
||||
setting the ``zpool`` attribute, e.g. ``zswap.zpool=zbud``. It can
|
||||
also be changed at runtime using the sysfs ``zpool`` attribute, e.g.::
|
||||
of type selected in ``CONFIG_ZSWAP_ZPOOL_DEFAULT`` Kconfig option is created,
|
||||
but it can be overridden at boot time by setting the ``zpool`` attribute,
|
||||
e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs
|
||||
``zpool`` attribute, e.g.::
|
||||
|
||||
echo zbud > /sys/module/zswap/parameters/zpool
|
||||
|
||||
|
@ -97,8 +100,9 @@ controlled policy:
|
|||
* max_pool_percent - The maximum percentage of memory that the compressed
|
||||
pool can occupy.
|
||||
|
||||
The default compressor is lzo, but it can be selected at boot time by
|
||||
setting the ``compressor`` attribute, e.g. ``zswap.compressor=lzo``.
|
||||
The default compressor is selected in ``CONFIG_ZSWAP_COMPRESSOR_DEFAULT``
|
||||
Kconfig option, but it can be overridden at boot time by setting the
|
||||
``compressor`` attribute, e.g. ``zswap.compressor=lzo``.
|
||||
It can also be changed at runtime using the sysfs "compressor"
|
||||
attribute, e.g.::
|
||||
|
||||
|
|
37
MAINTAINERS
37
MAINTAINERS
|
@ -77,21 +77,13 @@ Tips for patch submitters
|
|||
|
||||
8. Happy hacking.
|
||||
|
||||
Descriptions of section entries
|
||||
-------------------------------
|
||||
Descriptions of section entries and preferred order
|
||||
---------------------------------------------------
|
||||
|
||||
M: *Mail* patches to: FullName <address@domain>
|
||||
R: Designated *Reviewer*: FullName <address@domain>
|
||||
These reviewers should be CCed on patches.
|
||||
L: *Mailing list* that is relevant to this area
|
||||
W: *Web-page* with status/info
|
||||
B: URI for where to file *bugs*. A web-page with detailed bug
|
||||
filing info, a direct bug tracker link, or a mailto: URI.
|
||||
C: URI for *chat* protocol, server and channel where developers
|
||||
usually hang out, for example irc://server/channel.
|
||||
Q: *Patchwork* web based patch tracking system site
|
||||
T: *SCM* tree type and location.
|
||||
Type is one of: git, hg, quilt, stgit, topgit
|
||||
S: *Status*, one of the following:
|
||||
Supported: Someone is actually paid to look after this.
|
||||
Maintained: Someone actually looks after it.
|
||||
|
@ -102,30 +94,39 @@ Descriptions of section entries
|
|||
Obsolete: Old code. Something tagged obsolete generally means
|
||||
it has been replaced by a better system and you
|
||||
should be using that.
|
||||
W: *Web-page* with status/info
|
||||
Q: *Patchwork* web based patch tracking system site
|
||||
B: URI for where to file *bugs*. A web-page with detailed bug
|
||||
filing info, a direct bug tracker link, or a mailto: URI.
|
||||
C: URI for *chat* protocol, server and channel where developers
|
||||
usually hang out, for example irc://server/channel.
|
||||
P: Subsystem Profile document for more details submitting
|
||||
patches to the given subsystem. This is either an in-tree file,
|
||||
or a URI. See Documentation/maintainer/maintainer-entry-profile.rst
|
||||
for details.
|
||||
T: *SCM* tree type and location.
|
||||
Type is one of: git, hg, quilt, stgit, topgit
|
||||
F: *Files* and directories wildcard patterns.
|
||||
A trailing slash includes all files and subdirectory files.
|
||||
F: drivers/net/ all files in and below drivers/net
|
||||
F: drivers/net/* all files in drivers/net, but not below
|
||||
F: */net/* all files in "any top level directory"/net
|
||||
One pattern per line. Multiple F: lines acceptable.
|
||||
N: Files and directories *Regex* patterns.
|
||||
N: [^a-z]tegra all files whose path contains the word tegra
|
||||
One pattern per line. Multiple N: lines acceptable.
|
||||
scripts/get_maintainer.pl has different behavior for files that
|
||||
match F: pattern and matches of N: patterns. By default,
|
||||
get_maintainer will not look at git log history when an F: pattern
|
||||
match occurs. When an N: match occurs, git log history is used
|
||||
to also notify the people that have git commit signatures.
|
||||
X: *Excluded* files and directories that are NOT maintained, same
|
||||
rules as F:. Files exclusions are tested before file matches.
|
||||
Can be useful for excluding a specific subdirectory, for instance:
|
||||
F: net/
|
||||
X: net/ipv6/
|
||||
matches all files in and below net excluding net/ipv6/
|
||||
N: Files and directories *Regex* patterns.
|
||||
N: [^a-z]tegra all files whose path contains tegra
|
||||
(not including files like integrator)
|
||||
One pattern per line. Multiple N: lines acceptable.
|
||||
scripts/get_maintainer.pl has different behavior for files that
|
||||
match F: pattern and matches of N: patterns. By default,
|
||||
get_maintainer will not look at git log history when an F: pattern
|
||||
match occurs. When an N: match occurs, git log history is used
|
||||
to also notify the people that have git commit signatures.
|
||||
K: *Content regex* (perl extended) pattern match in a patch or file.
|
||||
For instance:
|
||||
K: of_get_profile
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
|
||||
#include <asm/smp.h>
|
||||
|
||||
struct bootmem_data_t; /* stupid forward decl. */
|
||||
|
||||
/*
|
||||
* Following are macros that are specific to this numa platform.
|
||||
*/
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -141,7 +141,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto bad_area;
|
||||
} else {
|
||||
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
goto bad_area;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -54,6 +54,8 @@ SECTIONS {
|
|||
CPUIDLE_TEXT
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.gnu.linkonce.t*)
|
||||
}
|
||||
|
||||
|
|
|
@ -125,7 +125,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
case 1: /* read, present */
|
||||
goto acc_err;
|
||||
case 0: /* read, not present */
|
||||
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
goto acc_err;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -32,6 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -142,7 +142,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||
goto bad_area;
|
||||
}
|
||||
} else {
|
||||
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
goto bad_area;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ SECTIONS
|
|||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
IRQENTRY_TEXT
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
}
|
||||
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -32,6 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -422,7 +422,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||
break;
|
||||
}
|
||||
} else if (vma && hva >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_HUGETLB)) {
|
||||
is_vm_hugetlb_page(vma)) {
|
||||
unsigned long psize = vma_kernel_pagesize(vma);
|
||||
|
||||
tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
|
||||
|
|
|
@ -314,7 +314,7 @@ static bool access_error(bool is_write, bool is_exec,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
return true;
|
||||
/*
|
||||
* We should ideally do the vma pkey access check here. But in the
|
||||
|
|
|
@ -231,16 +231,10 @@ static int memtrace_online(void)
|
|||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If kernel isn't compiled with the auto online option
|
||||
* we need to online the memory ourselves.
|
||||
*/
|
||||
if (!memhp_auto_online) {
|
||||
lock_device_hotplug();
|
||||
walk_memory_blocks(ent->start, ent->size, NULL,
|
||||
online_mem_block);
|
||||
unlock_device_hotplug();
|
||||
}
|
||||
lock_device_hotplug();
|
||||
walk_memory_blocks(ent->start, ent->size, NULL,
|
||||
online_mem_block);
|
||||
unlock_device_hotplug();
|
||||
|
||||
/*
|
||||
* Memory was added successfully so clean up references to it
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -355,7 +355,7 @@ static inline int access_error(int error_code, struct vm_area_struct *vma)
|
|||
return 1;
|
||||
|
||||
/* read, not present: */
|
||||
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -4,10 +4,6 @@
|
|||
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
#undef CONFIG_OPTIMIZE_INLINING
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
|
||||
/*
|
||||
|
|
|
@ -149,6 +149,7 @@ config X86
|
|||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
|
||||
select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD
|
||||
select HAVE_ARCH_VMAP_STACK if X86_64
|
||||
select HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
select HAVE_ASM_MODVERSIONS
|
||||
|
|
|
@ -285,7 +285,6 @@ CONFIG_EARLY_PRINTK_DBGP=y
|
|||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
|
|
|
@ -282,7 +282,6 @@ CONFIG_EARLY_PRINTK_DBGP=y
|
|||
CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
CONFIG_UNWINDER_ORC=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
#undef CONFIG_OPTIMIZE_INLINING
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <asm/x86_init.h>
|
||||
#include <asm/fpu/xstate.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm-generic/pgtable_uffd.h>
|
||||
|
||||
extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
||||
|
@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
|
|||
return native_make_pte(v & ~clear);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
static inline int pte_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_flags(pte) & _PAGE_UFFD_WP;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkuffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_set_flags(pte, _PAGE_UFFD_WP);
|
||||
}
|
||||
|
||||
static inline pte_t pte_clear_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_clear_flags(pte, _PAGE_UFFD_WP);
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
|
||||
|
||||
static inline pte_t pte_mkclean(pte_t pte)
|
||||
{
|
||||
return pte_clear_flags(pte, _PAGE_DIRTY);
|
||||
|
@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
|
|||
return native_make_pmd(v & ~clear);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
static inline int pmd_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_UFFD_WP;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_set_flags(pmd, _PAGE_UFFD_WP);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
|
||||
|
||||
static inline pmd_t pmd_mkold(pmd_t pmd)
|
||||
{
|
||||
return pmd_clear_flags(pmd, _PAGE_ACCESSED);
|
||||
|
@ -1374,6 +1409,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
|
||||
}
|
||||
|
||||
static inline int pte_swp_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
|
||||
}
|
||||
|
||||
static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
|
||||
}
|
||||
|
||||
static inline int pmd_swp_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
|
||||
|
||||
#define PKRU_AD_BIT 0x1
|
||||
#define PKRU_WD_BIT 0x2
|
||||
#define PKRU_BITS_PER_PKEY 2
|
||||
|
|
|
@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
|
|||
*
|
||||
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
|
||||
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
|
||||
* | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry
|
||||
* | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry
|
||||
*
|
||||
* G (8) is aliased and used as a PROT_NONE indicator for
|
||||
* !present ptes. We need to start storing swap entries above
|
||||
|
@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
|
|||
* erratum where they can be incorrectly set by hardware on
|
||||
* non-present PTEs.
|
||||
*
|
||||
* SD Bits 1-4 are not used in non-present format and available for
|
||||
* special use described below:
|
||||
*
|
||||
* SD (1) in swp entry is used to store soft dirty bit, which helps us
|
||||
* remember soft dirty over page migration
|
||||
*
|
||||
* F (2) in swp entry is used to record when a pagetable is
|
||||
* writeprotected by userfaultfd WP support.
|
||||
*
|
||||
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
|
||||
* but also L and G.
|
||||
*
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
|
||||
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
|
||||
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
|
||||
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
|
||||
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
|
||||
|
||||
|
@ -100,6 +101,14 @@
|
|||
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
|
||||
#define _PAGE_SWP_UFFD_WP _PAGE_USER
|
||||
#else
|
||||
#define _PAGE_UFFD_WP (_AT(pteval_t, 0))
|
||||
#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0))
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
||||
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
|
||||
#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
|
||||
|
@ -118,7 +127,8 @@
|
|||
*/
|
||||
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
|
||||
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
|
||||
_PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC)
|
||||
_PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
|
||||
_PAGE_UFFD_WP)
|
||||
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
|
||||
|
||||
/*
|
||||
|
|
|
@ -1222,7 +1222,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
|||
return 1;
|
||||
|
||||
/* read, not present: */
|
||||
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
|
||||
if (unlikely(!vma_is_accessible(vma)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -32,5 +32,5 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
|||
printf "#define __NR_syscalls\t%s\n" "${nxt}"
|
||||
printf "#endif\n"
|
||||
printf "\n"
|
||||
printf "#endif /* %s */" "${fileguard}"
|
||||
printf "#endif /* %s */\n" "${fileguard}"
|
||||
) > "$out"
|
||||
|
|
|
@ -27,6 +27,24 @@
|
|||
|
||||
#define MEMORY_CLASS_NAME "memory"
|
||||
|
||||
static const char *const online_type_to_str[] = {
|
||||
[MMOP_OFFLINE] = "offline",
|
||||
[MMOP_ONLINE] = "online",
|
||||
[MMOP_ONLINE_KERNEL] = "online_kernel",
|
||||
[MMOP_ONLINE_MOVABLE] = "online_movable",
|
||||
};
|
||||
|
||||
int memhp_online_type_from_str(const char *str)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
|
||||
if (sysfs_streq(str, online_type_to_str[i]))
|
||||
return i;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
|
||||
|
||||
static int sections_per_block;
|
||||
|
@ -144,45 +162,6 @@ int memory_notify(unsigned long val, void *v)
|
|||
return blocking_notifier_call_chain(&memory_chain, val, v);
|
||||
}
|
||||
|
||||
/*
|
||||
* The probe routines leave the pages uninitialized, just as the bootmem code
|
||||
* does. Make sure we do not access them, but instead use only information from
|
||||
* within sections.
|
||||
*/
|
||||
static bool pages_correctly_probed(unsigned long start_pfn)
|
||||
{
|
||||
unsigned long section_nr = pfn_to_section_nr(start_pfn);
|
||||
unsigned long section_nr_end = section_nr + sections_per_block;
|
||||
unsigned long pfn = start_pfn;
|
||||
|
||||
/*
|
||||
* memmap between sections is not contiguous except with
|
||||
* SPARSEMEM_VMEMMAP. We lookup the page once per section
|
||||
* and assume memmap is contiguous within each section
|
||||
*/
|
||||
for (; section_nr < section_nr_end; section_nr++) {
|
||||
if (WARN_ON_ONCE(!pfn_valid(pfn)))
|
||||
return false;
|
||||
|
||||
if (!present_section_nr(section_nr)) {
|
||||
pr_warn("section %ld pfn[%lx, %lx) not present\n",
|
||||
section_nr, pfn, pfn + PAGES_PER_SECTION);
|
||||
return false;
|
||||
} else if (!valid_section_nr(section_nr)) {
|
||||
pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n",
|
||||
section_nr, pfn, pfn + PAGES_PER_SECTION);
|
||||
return false;
|
||||
} else if (online_section_nr(section_nr)) {
|
||||
pr_warn("section %ld pfn[%lx, %lx) is already online\n",
|
||||
section_nr, pfn, pfn + PAGES_PER_SECTION);
|
||||
return false;
|
||||
}
|
||||
pfn += PAGES_PER_SECTION;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
|
||||
* OK to have direct references to sparsemem variables in here.
|
||||
|
@ -199,9 +178,6 @@ memory_block_action(unsigned long start_section_nr, unsigned long action,
|
|||
|
||||
switch (action) {
|
||||
case MEM_ONLINE:
|
||||
if (!pages_correctly_probed(start_pfn))
|
||||
return -EBUSY;
|
||||
|
||||
ret = online_pages(start_pfn, nr_pages, online_type, nid);
|
||||
break;
|
||||
case MEM_OFFLINE:
|
||||
|
@ -245,17 +221,14 @@ static int memory_subsys_online(struct device *dev)
|
|||
return 0;
|
||||
|
||||
/*
|
||||
* If we are called from state_store(), online_type will be
|
||||
* set >= 0 Otherwise we were called from the device online
|
||||
* attribute and need to set the online_type.
|
||||
* When called via device_online() without configuring the online_type,
|
||||
* we want to default to MMOP_ONLINE.
|
||||
*/
|
||||
if (mem->online_type < 0)
|
||||
mem->online_type = MMOP_ONLINE_KEEP;
|
||||
if (mem->online_type == MMOP_OFFLINE)
|
||||
mem->online_type = MMOP_ONLINE;
|
||||
|
||||
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
|
||||
|
||||
/* clear online_type */
|
||||
mem->online_type = -1;
|
||||
mem->online_type = MMOP_OFFLINE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -267,40 +240,27 @@ static int memory_subsys_offline(struct device *dev)
|
|||
if (mem->state == MEM_OFFLINE)
|
||||
return 0;
|
||||
|
||||
/* Can't offline block with non-present sections */
|
||||
if (mem->section_count != sections_per_block)
|
||||
return -EINVAL;
|
||||
|
||||
return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
|
||||
}
|
||||
|
||||
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
const int online_type = memhp_online_type_from_str(buf);
|
||||
struct memory_block *mem = to_memory_block(dev);
|
||||
int ret, online_type;
|
||||
int ret;
|
||||
|
||||
if (online_type < 0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = lock_device_hotplug_sysfs();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sysfs_streq(buf, "online_kernel"))
|
||||
online_type = MMOP_ONLINE_KERNEL;
|
||||
else if (sysfs_streq(buf, "online_movable"))
|
||||
online_type = MMOP_ONLINE_MOVABLE;
|
||||
else if (sysfs_streq(buf, "online"))
|
||||
online_type = MMOP_ONLINE_KEEP;
|
||||
else if (sysfs_streq(buf, "offline"))
|
||||
online_type = MMOP_OFFLINE;
|
||||
else {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
switch (online_type) {
|
||||
case MMOP_ONLINE_KERNEL:
|
||||
case MMOP_ONLINE_MOVABLE:
|
||||
case MMOP_ONLINE_KEEP:
|
||||
case MMOP_ONLINE:
|
||||
/* mem->online_type is protected by device_hotplug_lock */
|
||||
mem->online_type = online_type;
|
||||
ret = device_online(&mem->dev);
|
||||
|
@ -312,7 +272,6 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
|
|||
ret = -EINVAL; /* should never happen */
|
||||
}
|
||||
|
||||
err:
|
||||
unlock_device_hotplug();
|
||||
|
||||
if (ret < 0)
|
||||
|
@ -380,7 +339,8 @@ static ssize_t valid_zones_show(struct device *dev,
|
|||
}
|
||||
|
||||
nid = mem->nid;
|
||||
default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
|
||||
default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
|
||||
nr_pages);
|
||||
strcat(buf, default_zone->name);
|
||||
|
||||
print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
|
||||
|
@ -418,23 +378,20 @@ static DEVICE_ATTR_RO(block_size_bytes);
|
|||
static ssize_t auto_online_blocks_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
if (memhp_auto_online)
|
||||
return sprintf(buf, "online\n");
|
||||
else
|
||||
return sprintf(buf, "offline\n");
|
||||
return sprintf(buf, "%s\n",
|
||||
online_type_to_str[memhp_default_online_type]);
|
||||
}
|
||||
|
||||
static ssize_t auto_online_blocks_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
if (sysfs_streq(buf, "online"))
|
||||
memhp_auto_online = true;
|
||||
else if (sysfs_streq(buf, "offline"))
|
||||
memhp_auto_online = false;
|
||||
else
|
||||
const int online_type = memhp_online_type_from_str(buf);
|
||||
|
||||
if (online_type < 0)
|
||||
return -EINVAL;
|
||||
|
||||
memhp_default_online_type = online_type;
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -627,7 +584,7 @@ static int init_memory_block(struct memory_block **memory,
|
|||
|
||||
static int add_memory_block(unsigned long base_section_nr)
|
||||
{
|
||||
int ret, section_count = 0;
|
||||
int section_count = 0;
|
||||
struct memory_block *mem;
|
||||
unsigned long nr;
|
||||
|
||||
|
@ -638,12 +595,8 @@ static int add_memory_block(unsigned long base_section_nr)
|
|||
|
||||
if (section_count == 0)
|
||||
return 0;
|
||||
ret = init_memory_block(&mem, base_memory_block_id(base_section_nr),
|
||||
MEM_ONLINE);
|
||||
if (ret)
|
||||
return ret;
|
||||
mem->section_count = section_count;
|
||||
return 0;
|
||||
return init_memory_block(&mem, base_memory_block_id(base_section_nr),
|
||||
MEM_ONLINE);
|
||||
}
|
||||
|
||||
static void unregister_memory(struct memory_block *memory)
|
||||
|
@ -679,7 +632,6 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
|
|||
ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
|
||||
if (ret)
|
||||
break;
|
||||
mem->section_count = sections_per_block;
|
||||
}
|
||||
if (ret) {
|
||||
end_block_id = block_id;
|
||||
|
@ -688,7 +640,6 @@ int create_memory_block_devices(unsigned long start, unsigned long size)
|
|||
mem = find_memory_block_by_id(block_id);
|
||||
if (WARN_ON_ONCE(!mem))
|
||||
continue;
|
||||
mem->section_count = 0;
|
||||
unregister_memory(mem);
|
||||
}
|
||||
}
|
||||
|
@ -717,7 +668,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
|
|||
mem = find_memory_block_by_id(block_id);
|
||||
if (WARN_ON_ONCE(!mem))
|
||||
continue;
|
||||
mem->section_count = 0;
|
||||
unregister_memory_block_under_nodes(mem);
|
||||
unregister_memory(mem);
|
||||
}
|
||||
|
|
|
@ -533,7 +533,6 @@ struct hv_dynmem_device {
|
|||
* State to synchronize hot-add.
|
||||
*/
|
||||
struct completion ol_waitevent;
|
||||
bool ha_waiting;
|
||||
/*
|
||||
* This thread handles hot-add
|
||||
* requests from the host as well as notifying
|
||||
|
@ -634,10 +633,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
|
|||
switch (val) {
|
||||
case MEM_ONLINE:
|
||||
case MEM_CANCEL_ONLINE:
|
||||
if (dm_device.ha_waiting) {
|
||||
dm_device.ha_waiting = false;
|
||||
complete(&dm_device.ol_waitevent);
|
||||
}
|
||||
complete(&dm_device.ol_waitevent);
|
||||
break;
|
||||
|
||||
case MEM_OFFLINE:
|
||||
|
@ -726,8 +722,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
|
|||
has->covered_end_pfn += processed_pfn;
|
||||
spin_unlock_irqrestore(&dm_device.ha_lock, flags);
|
||||
|
||||
init_completion(&dm_device.ol_waitevent);
|
||||
dm_device.ha_waiting = !memhp_auto_online;
|
||||
reinit_completion(&dm_device.ol_waitevent);
|
||||
|
||||
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
|
||||
ret = add_memory(nid, PFN_PHYS((start_pfn)),
|
||||
|
@ -753,15 +748,14 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
|
|||
}
|
||||
|
||||
/*
|
||||
* Wait for the memory block to be onlined when memory onlining
|
||||
* is done outside of kernel (memhp_auto_online). Since the hot
|
||||
* add has succeeded, it is ok to proceed even if the pages in
|
||||
* the hot added region have not been "onlined" within the
|
||||
* allowed time.
|
||||
* Wait for memory to get onlined. If the kernel onlined the
|
||||
* memory when adding it, this will return directly. Otherwise,
|
||||
* it will wait for user space to online the memory. This helps
|
||||
* to avoid adding memory faster than it is getting onlined. As
|
||||
* adding succeeded, it is ok to proceed even if the memory was
|
||||
* not onlined in time.
|
||||
*/
|
||||
if (dm_device.ha_waiting)
|
||||
wait_for_completion_timeout(&dm_device.ol_waitevent,
|
||||
5*HZ);
|
||||
wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ);
|
||||
post_status(&dm_device);
|
||||
}
|
||||
}
|
||||
|
@ -1706,6 +1700,7 @@ static int balloon_probe(struct hv_device *dev,
|
|||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
set_online_page_callback(&hv_online_page);
|
||||
init_completion(&dm_device.ol_waitevent);
|
||||
register_memory_notifier(&hv_memory_nb);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#include <asm/desc.h>
|
||||
|
@ -175,6 +176,80 @@ void lkdtm_HUNG_TASK(void)
|
|||
schedule();
|
||||
}
|
||||
|
||||
volatile unsigned int huge = INT_MAX - 2;
|
||||
volatile unsigned int ignored;
|
||||
|
||||
void lkdtm_OVERFLOW_SIGNED(void)
|
||||
{
|
||||
int value;
|
||||
|
||||
value = huge;
|
||||
pr_info("Normal signed addition ...\n");
|
||||
value += 1;
|
||||
ignored = value;
|
||||
|
||||
pr_info("Overflowing signed addition ...\n");
|
||||
value += 4;
|
||||
ignored = value;
|
||||
}
|
||||
|
||||
|
||||
void lkdtm_OVERFLOW_UNSIGNED(void)
|
||||
{
|
||||
unsigned int value;
|
||||
|
||||
value = huge;
|
||||
pr_info("Normal unsigned addition ...\n");
|
||||
value += 1;
|
||||
ignored = value;
|
||||
|
||||
pr_info("Overflowing unsigned addition ...\n");
|
||||
value += 4;
|
||||
ignored = value;
|
||||
}
|
||||
|
||||
/* Intentially using old-style flex array definition of 1 byte. */
|
||||
struct array_bounds_flex_array {
|
||||
int one;
|
||||
int two;
|
||||
char data[1];
|
||||
};
|
||||
|
||||
struct array_bounds {
|
||||
int one;
|
||||
int two;
|
||||
char data[8];
|
||||
int three;
|
||||
};
|
||||
|
||||
void lkdtm_ARRAY_BOUNDS(void)
|
||||
{
|
||||
struct array_bounds_flex_array *not_checked;
|
||||
struct array_bounds *checked;
|
||||
volatile int i;
|
||||
|
||||
not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
|
||||
checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
|
||||
|
||||
pr_info("Array access within bounds ...\n");
|
||||
/* For both, touch all bytes in the actual member size. */
|
||||
for (i = 0; i < sizeof(checked->data); i++)
|
||||
checked->data[i] = 'A';
|
||||
/*
|
||||
* For the uninstrumented flex array member, also touch 1 byte
|
||||
* beyond to verify it is correctly uninstrumented.
|
||||
*/
|
||||
for (i = 0; i < sizeof(not_checked->data) + 1; i++)
|
||||
not_checked->data[i] = 'A';
|
||||
|
||||
pr_info("Array access beyond bounds ...\n");
|
||||
for (i = 0; i < sizeof(checked->data) + 1; i++)
|
||||
checked->data[i] = 'B';
|
||||
|
||||
kfree(not_checked);
|
||||
kfree(checked);
|
||||
}
|
||||
|
||||
void lkdtm_CORRUPT_LIST_ADD(void)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -130,6 +130,9 @@ static const struct crashtype crashtypes[] = {
|
|||
CRASHTYPE(HARDLOCKUP),
|
||||
CRASHTYPE(SPINLOCKUP),
|
||||
CRASHTYPE(HUNG_TASK),
|
||||
CRASHTYPE(OVERFLOW_SIGNED),
|
||||
CRASHTYPE(OVERFLOW_UNSIGNED),
|
||||
CRASHTYPE(ARRAY_BOUNDS),
|
||||
CRASHTYPE(EXEC_DATA),
|
||||
CRASHTYPE(EXEC_STACK),
|
||||
CRASHTYPE(EXEC_KMALLOC),
|
||||
|
|
|
@ -22,6 +22,9 @@ void lkdtm_SOFTLOCKUP(void);
|
|||
void lkdtm_HARDLOCKUP(void);
|
||||
void lkdtm_SPINLOCKUP(void);
|
||||
void lkdtm_HUNG_TASK(void);
|
||||
void lkdtm_OVERFLOW_SIGNED(void);
|
||||
void lkdtm_OVERFLOW_UNSIGNED(void);
|
||||
void lkdtm_ARRAY_BOUNDS(void);
|
||||
void lkdtm_CORRUPT_LIST_ADD(void);
|
||||
void lkdtm_CORRUPT_LIST_DEL(void);
|
||||
void lkdtm_CORRUPT_USER_DS(void);
|
||||
|
|
|
@ -58,6 +58,7 @@ config VIRTIO_BALLOON
|
|||
tristate "Virtio balloon driver"
|
||||
depends on VIRTIO
|
||||
select MEMORY_BALLOON
|
||||
select PAGE_REPORTING
|
||||
---help---
|
||||
This driver supports increasing and decreasing the amount
|
||||
of memory within a KVM guest.
|
||||
|
|
|
@ -14,11 +14,13 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/balloon_compaction.h>
|
||||
#include <linux/oom.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/pseudo_fs.h>
|
||||
#include <linux/page_reporting.h>
|
||||
|
||||
/*
|
||||
* Balloon device works in 4K page units. So each page is pointed to by
|
||||
|
@ -27,7 +29,9 @@
|
|||
*/
|
||||
#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
|
||||
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
|
||||
#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
|
||||
/* Maximum number of (4k) pages to deflate on OOM notifications. */
|
||||
#define VIRTIO_BALLOON_OOM_NR_PAGES 256
|
||||
#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80
|
||||
|
||||
#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
|
||||
__GFP_NOMEMALLOC)
|
||||
|
@ -47,6 +51,7 @@ enum virtio_balloon_vq {
|
|||
VIRTIO_BALLOON_VQ_DEFLATE,
|
||||
VIRTIO_BALLOON_VQ_STATS,
|
||||
VIRTIO_BALLOON_VQ_FREE_PAGE,
|
||||
VIRTIO_BALLOON_VQ_REPORTING,
|
||||
VIRTIO_BALLOON_VQ_MAX
|
||||
};
|
||||
|
||||
|
@ -112,8 +117,15 @@ struct virtio_balloon {
|
|||
/* Memory statistics */
|
||||
struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
|
||||
|
||||
/* To register a shrinker to shrink memory upon memory pressure */
|
||||
/* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */
|
||||
struct shrinker shrinker;
|
||||
|
||||
/* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */
|
||||
struct notifier_block oom_nb;
|
||||
|
||||
/* Free page reporting device */
|
||||
struct virtqueue *reporting_vq;
|
||||
struct page_reporting_dev_info pr_dev_info;
|
||||
};
|
||||
|
||||
static struct virtio_device_id id_table[] = {
|
||||
|
@ -153,6 +165,33 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
|
|||
|
||||
}
|
||||
|
||||
int virtballoon_free_page_report(struct page_reporting_dev_info *pr_dev_info,
|
||||
struct scatterlist *sg, unsigned int nents)
|
||||
{
|
||||
struct virtio_balloon *vb =
|
||||
container_of(pr_dev_info, struct virtio_balloon, pr_dev_info);
|
||||
struct virtqueue *vq = vb->reporting_vq;
|
||||
unsigned int unused, err;
|
||||
|
||||
/* We should always be able to add these buffers to an empty queue. */
|
||||
err = virtqueue_add_inbuf(vq, sg, nents, vb, GFP_NOWAIT | __GFP_NOWARN);
|
||||
|
||||
/*
|
||||
* In the extremely unlikely case that something has occurred and we
|
||||
* are able to trigger an error we will simply display a warning
|
||||
* and exit without actually processing the pages.
|
||||
*/
|
||||
if (WARN_ON_ONCE(err))
|
||||
return err;
|
||||
|
||||
virtqueue_kick(vq);
|
||||
|
||||
/* When host has read buffer, this completes via balloon_ack */
|
||||
wait_event(vb->acked, virtqueue_get_buf(vq, &unused));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_page_pfns(struct virtio_balloon *vb,
|
||||
__virtio32 pfns[], struct page *page)
|
||||
{
|
||||
|
@ -481,6 +520,7 @@ static int init_vqs(struct virtio_balloon *vb)
|
|||
names[VIRTIO_BALLOON_VQ_STATS] = NULL;
|
||||
callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
|
||||
names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
|
||||
names[VIRTIO_BALLOON_VQ_REPORTING] = NULL;
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
|
||||
names[VIRTIO_BALLOON_VQ_STATS] = "stats";
|
||||
|
@ -492,6 +532,11 @@ static int init_vqs(struct virtio_balloon *vb)
|
|||
callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
names[VIRTIO_BALLOON_VQ_REPORTING] = "reporting_vq";
|
||||
callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
|
||||
}
|
||||
|
||||
err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
|
||||
vqs, callbacks, names, NULL, NULL);
|
||||
if (err)
|
||||
|
@ -524,6 +569,9 @@ static int init_vqs(struct virtio_balloon *vb)
|
|||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
|
||||
vb->reporting_vq = vqs[VIRTIO_BALLOON_VQ_REPORTING];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -788,50 +836,13 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb,
|
|||
return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
|
||||
}
|
||||
|
||||
static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
|
||||
unsigned long pages_to_free)
|
||||
{
|
||||
return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
|
||||
VIRTIO_BALLOON_PAGES_PER_PAGE;
|
||||
}
|
||||
|
||||
static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
|
||||
unsigned long pages_to_free)
|
||||
{
|
||||
unsigned long pages_freed = 0;
|
||||
|
||||
/*
|
||||
* One invocation of leak_balloon can deflate at most
|
||||
* VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
|
||||
* multiple times to deflate pages till reaching pages_to_free.
|
||||
*/
|
||||
while (vb->num_pages && pages_freed < pages_to_free)
|
||||
pages_freed += leak_balloon_pages(vb,
|
||||
pages_to_free - pages_freed);
|
||||
|
||||
update_balloon_size(vb);
|
||||
|
||||
return pages_freed;
|
||||
}
|
||||
|
||||
static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
unsigned long pages_to_free, pages_freed = 0;
|
||||
struct virtio_balloon *vb = container_of(shrinker,
|
||||
struct virtio_balloon, shrinker);
|
||||
|
||||
pages_to_free = sc->nr_to_scan;
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
pages_freed = shrink_free_pages(vb, pages_to_free);
|
||||
|
||||
if (pages_freed >= pages_to_free)
|
||||
return pages_freed;
|
||||
|
||||
pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
|
||||
|
||||
return pages_freed;
|
||||
return shrink_free_pages(vb, sc->nr_to_scan);
|
||||
}
|
||||
|
||||
static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
|
||||
|
@ -839,12 +850,22 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
|
|||
{
|
||||
struct virtio_balloon *vb = container_of(shrinker,
|
||||
struct virtio_balloon, shrinker);
|
||||
unsigned long count;
|
||||
|
||||
count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
|
||||
count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
|
||||
return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
|
||||
}
|
||||
|
||||
return count;
|
||||
static int virtio_balloon_oom_notify(struct notifier_block *nb,
|
||||
unsigned long dummy, void *parm)
|
||||
{
|
||||
struct virtio_balloon *vb = container_of(nb,
|
||||
struct virtio_balloon, oom_nb);
|
||||
unsigned long *freed = parm;
|
||||
|
||||
*freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) /
|
||||
VIRTIO_BALLOON_PAGES_PER_PAGE;
|
||||
update_balloon_size(vb);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
|
||||
|
@ -864,7 +885,6 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
|
|||
static int virtballoon_probe(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_balloon *vb;
|
||||
__u32 poison_val;
|
||||
int err;
|
||||
|
||||
if (!vdev->config->get) {
|
||||
|
@ -930,27 +950,65 @@ static int virtballoon_probe(struct virtio_device *vdev)
|
|||
VIRTIO_BALLOON_CMD_ID_STOP);
|
||||
spin_lock_init(&vb->free_page_list_lock);
|
||||
INIT_LIST_HEAD(&vb->free_page_list);
|
||||
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
|
||||
memset(&poison_val, PAGE_POISON, sizeof(poison_val));
|
||||
virtio_cwrite(vb->vdev, struct virtio_balloon_config,
|
||||
poison_val, &poison_val);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
|
||||
* shrinker needs to be registered to relieve memory pressure.
|
||||
*/
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
|
||||
/*
|
||||
* We're allowed to reuse any free pages, even if they are
|
||||
* still to be processed by the host.
|
||||
*/
|
||||
err = virtio_balloon_register_shrinker(vb);
|
||||
if (err)
|
||||
goto out_del_balloon_wq;
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
|
||||
vb->oom_nb.notifier_call = virtio_balloon_oom_notify;
|
||||
vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY;
|
||||
err = register_oom_notifier(&vb->oom_nb);
|
||||
if (err < 0)
|
||||
goto out_unregister_shrinker;
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
|
||||
/* Start with poison val of 0 representing general init */
|
||||
__u32 poison_val = 0;
|
||||
|
||||
/*
|
||||
* Let the hypervisor know that we are expecting a
|
||||
* specific value to be written back in balloon pages.
|
||||
*/
|
||||
if (!want_init_on_free())
|
||||
memset(&poison_val, PAGE_POISON, sizeof(poison_val));
|
||||
|
||||
virtio_cwrite(vb->vdev, struct virtio_balloon_config,
|
||||
poison_val, &poison_val);
|
||||
}
|
||||
|
||||
vb->pr_dev_info.report = virtballoon_free_page_report;
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
unsigned int capacity;
|
||||
|
||||
capacity = virtqueue_get_vring_size(vb->reporting_vq);
|
||||
if (capacity < PAGE_REPORTING_CAPACITY) {
|
||||
err = -ENOSPC;
|
||||
goto out_unregister_oom;
|
||||
}
|
||||
|
||||
err = page_reporting_register(&vb->pr_dev_info);
|
||||
if (err)
|
||||
goto out_unregister_oom;
|
||||
}
|
||||
|
||||
virtio_device_ready(vdev);
|
||||
|
||||
if (towards_target(vb))
|
||||
virtballoon_changed(vdev);
|
||||
return 0;
|
||||
|
||||
out_unregister_oom:
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
|
||||
unregister_oom_notifier(&vb->oom_nb);
|
||||
out_unregister_shrinker:
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
virtio_balloon_unregister_shrinker(vb);
|
||||
out_del_balloon_wq:
|
||||
if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
destroy_workqueue(vb->balloon_wq);
|
||||
|
@ -989,7 +1047,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
|
|||
{
|
||||
struct virtio_balloon *vb = vdev->priv;
|
||||
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_REPORTING))
|
||||
page_reporting_unregister(&vb->pr_dev_info);
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
|
||||
unregister_oom_notifier(&vb->oom_nb);
|
||||
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
|
||||
virtio_balloon_unregister_shrinker(vb);
|
||||
spin_lock_irq(&vb->stop_update_lock);
|
||||
vb->stop_update = true;
|
||||
|
@ -1045,7 +1107,10 @@ static int virtballoon_restore(struct virtio_device *vdev)
|
|||
|
||||
static int virtballoon_validate(struct virtio_device *vdev)
|
||||
{
|
||||
if (!page_poisoning_enabled())
|
||||
/* Tell the host whether we care about poisoned pages. */
|
||||
if (!want_init_on_free() &&
|
||||
(IS_ENABLED(CONFIG_PAGE_POISONING_NO_SANITY) ||
|
||||
!page_poisoning_enabled()))
|
||||
__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
|
||||
|
||||
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
|
||||
|
@ -1058,6 +1123,7 @@ static unsigned int features[] = {
|
|||
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
|
||||
VIRTIO_BALLOON_F_FREE_PAGE_HINT,
|
||||
VIRTIO_BALLOON_F_PAGE_POISON,
|
||||
VIRTIO_BALLOON_F_REPORTING,
|
||||
};
|
||||
|
||||
static struct virtio_driver virtio_balloon_driver = {
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/highuid.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/security.h>
|
||||
|
@ -698,19 +699,11 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
unsigned long reloc_func_desc __maybe_unused = 0;
|
||||
int executable_stack = EXSTACK_DEFAULT;
|
||||
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
|
||||
struct {
|
||||
struct elfhdr interp_elf_ex;
|
||||
} *loc;
|
||||
struct elfhdr *interp_elf_ex = NULL;
|
||||
struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
|
||||
struct mm_struct *mm;
|
||||
struct pt_regs *regs;
|
||||
|
||||
loc = kmalloc(sizeof(*loc), GFP_KERNEL);
|
||||
if (!loc) {
|
||||
retval = -ENOMEM;
|
||||
goto out_ret;
|
||||
}
|
||||
|
||||
retval = -ENOEXEC;
|
||||
/* First of all, some simple consistency checks */
|
||||
if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
|
||||
|
@ -770,9 +763,15 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
*/
|
||||
would_dump(bprm, interpreter);
|
||||
|
||||
interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
|
||||
if (!interp_elf_ex) {
|
||||
retval = -ENOMEM;
|
||||
goto out_free_ph;
|
||||
}
|
||||
|
||||
/* Get the exec headers */
|
||||
retval = elf_read(interpreter, &loc->interp_elf_ex,
|
||||
sizeof(loc->interp_elf_ex), 0);
|
||||
retval = elf_read(interpreter, interp_elf_ex,
|
||||
sizeof(*interp_elf_ex), 0);
|
||||
if (retval < 0)
|
||||
goto out_free_dentry;
|
||||
|
||||
|
@ -806,25 +805,25 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
if (interpreter) {
|
||||
retval = -ELIBBAD;
|
||||
/* Not an ELF interpreter */
|
||||
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
|
||||
if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
|
||||
goto out_free_dentry;
|
||||
/* Verify the interpreter has a valid arch */
|
||||
if (!elf_check_arch(&loc->interp_elf_ex) ||
|
||||
elf_check_fdpic(&loc->interp_elf_ex))
|
||||
if (!elf_check_arch(interp_elf_ex) ||
|
||||
elf_check_fdpic(interp_elf_ex))
|
||||
goto out_free_dentry;
|
||||
|
||||
/* Load the interpreter program headers */
|
||||
interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
|
||||
interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
|
||||
interpreter);
|
||||
if (!interp_elf_phdata)
|
||||
goto out_free_dentry;
|
||||
|
||||
/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
|
||||
elf_ppnt = interp_elf_phdata;
|
||||
for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
|
||||
for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
|
||||
switch (elf_ppnt->p_type) {
|
||||
case PT_LOPROC ... PT_HIPROC:
|
||||
retval = arch_elf_pt_proc(&loc->interp_elf_ex,
|
||||
retval = arch_elf_pt_proc(interp_elf_ex,
|
||||
elf_ppnt, interpreter,
|
||||
true, &arch_state);
|
||||
if (retval)
|
||||
|
@ -839,7 +838,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
* the exec syscall.
|
||||
*/
|
||||
retval = arch_check_elf(elf_ex,
|
||||
!!interpreter, &loc->interp_elf_ex,
|
||||
!!interpreter, interp_elf_ex,
|
||||
&arch_state);
|
||||
if (retval)
|
||||
goto out_free_dentry;
|
||||
|
@ -1055,7 +1054,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
}
|
||||
|
||||
if (interpreter) {
|
||||
elf_entry = load_elf_interp(&loc->interp_elf_ex,
|
||||
elf_entry = load_elf_interp(interp_elf_ex,
|
||||
interpreter,
|
||||
load_bias, interp_elf_phdata);
|
||||
if (!IS_ERR((void *)elf_entry)) {
|
||||
|
@ -1064,7 +1063,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
* adjustment
|
||||
*/
|
||||
interp_load_addr = elf_entry;
|
||||
elf_entry += loc->interp_elf_ex.e_entry;
|
||||
elf_entry += interp_elf_ex->e_entry;
|
||||
}
|
||||
if (BAD_ADDR(elf_entry)) {
|
||||
retval = IS_ERR((void *)elf_entry) ?
|
||||
|
@ -1075,6 +1074,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
|
||||
allow_write_access(interpreter);
|
||||
fput(interpreter);
|
||||
|
||||
kfree(interp_elf_ex);
|
||||
kfree(interp_elf_phdata);
|
||||
} else {
|
||||
elf_entry = e_entry;
|
||||
if (BAD_ADDR(elf_entry)) {
|
||||
|
@ -1083,7 +1085,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
}
|
||||
}
|
||||
|
||||
kfree(interp_elf_phdata);
|
||||
kfree(elf_phdata);
|
||||
|
||||
set_binfmt(&elf_format);
|
||||
|
@ -1153,12 +1154,11 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
|||
start_thread(regs, elf_entry, bprm->p);
|
||||
retval = 0;
|
||||
out:
|
||||
kfree(loc);
|
||||
out_ret:
|
||||
return retval;
|
||||
|
||||
/* error cleanup */
|
||||
out_free_dentry:
|
||||
kfree(interp_elf_ex);
|
||||
kfree(interp_elf_phdata);
|
||||
allow_write_access(interpreter);
|
||||
if (interpreter)
|
||||
|
@ -1317,7 +1317,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
|
|||
}
|
||||
|
||||
/* Hugetlb memory check */
|
||||
if (vma->vm_flags & VM_HUGETLB) {
|
||||
if (is_vm_hugetlb_page(vma)) {
|
||||
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
|
||||
goto whole;
|
||||
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
|
||||
|
|
|
@ -218,13 +218,18 @@ struct eventpoll {
|
|||
struct file *file;
|
||||
|
||||
/* used to optimize loop detection check */
|
||||
int visited;
|
||||
struct list_head visited_list_link;
|
||||
int visited;
|
||||
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
/* used to track busy poll napi_id */
|
||||
unsigned int napi_id;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
/* tracks wakeup nests for lockdep validation */
|
||||
u8 nests;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Wait structure used by the poll hooks */
|
||||
|
@ -545,30 +550,47 @@ static int ep_call_nested(struct nested_calls *ncalls,
|
|||
*/
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
static DEFINE_PER_CPU(int, wakeup_nest);
|
||||
|
||||
static void ep_poll_safewake(wait_queue_head_t *wq)
|
||||
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
|
||||
{
|
||||
struct eventpoll *ep_src;
|
||||
unsigned long flags;
|
||||
int subclass;
|
||||
u8 nests = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
subclass = __this_cpu_read(wakeup_nest);
|
||||
spin_lock_nested(&wq->lock, subclass + 1);
|
||||
__this_cpu_inc(wakeup_nest);
|
||||
wake_up_locked_poll(wq, POLLIN);
|
||||
__this_cpu_dec(wakeup_nest);
|
||||
spin_unlock(&wq->lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
/*
|
||||
* To set the subclass or nesting level for spin_lock_irqsave_nested()
|
||||
* it might be natural to create a per-cpu nest count. However, since
|
||||
* we can recurse on ep->poll_wait.lock, and a non-raw spinlock can
|
||||
* schedule() in the -rt kernel, the per-cpu variable are no longer
|
||||
* protected. Thus, we are introducing a per eventpoll nest field.
|
||||
* If we are not being call from ep_poll_callback(), epi is NULL and
|
||||
* we are at the first level of nesting, 0. Otherwise, we are being
|
||||
* called from ep_poll_callback() and if a previous wakeup source is
|
||||
* not an epoll file itself, we are at depth 1 since the wakeup source
|
||||
* is depth 0. If the wakeup source is a previous epoll file in the
|
||||
* wakeup chain then we use its nests value and record ours as
|
||||
* nests + 1. The previous epoll file nests value is stable since its
|
||||
* already holding its own poll_wait.lock.
|
||||
*/
|
||||
if (epi) {
|
||||
if ((is_file_epoll(epi->ffd.file))) {
|
||||
ep_src = epi->ffd.file->private_data;
|
||||
nests = ep_src->nests;
|
||||
} else {
|
||||
nests = 1;
|
||||
}
|
||||
}
|
||||
spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
|
||||
ep->nests = nests + 1;
|
||||
wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
|
||||
ep->nests = 0;
|
||||
spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void ep_poll_safewake(wait_queue_head_t *wq)
|
||||
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
|
||||
{
|
||||
wake_up_poll(wq, EPOLLIN);
|
||||
wake_up_poll(&ep->poll_wait, EPOLLIN);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -789,7 +811,7 @@ static void ep_free(struct eventpoll *ep)
|
|||
|
||||
/* We need to release all tasks waiting for these file */
|
||||
if (waitqueue_active(&ep->poll_wait))
|
||||
ep_poll_safewake(&ep->poll_wait);
|
||||
ep_poll_safewake(ep, NULL);
|
||||
|
||||
/*
|
||||
* We need to lock this because we could be hit by
|
||||
|
@ -1258,7 +1280,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
|
|||
|
||||
/* We have to call this outside the lock */
|
||||
if (pwake)
|
||||
ep_poll_safewake(&ep->poll_wait);
|
||||
ep_poll_safewake(ep, epi);
|
||||
|
||||
if (!(epi->event.events & EPOLLEXCLUSIVE))
|
||||
ewake = 1;
|
||||
|
@ -1562,7 +1584,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||
|
||||
/* We have to call this outside the lock */
|
||||
if (pwake)
|
||||
ep_poll_safewake(&ep->poll_wait);
|
||||
ep_poll_safewake(ep, NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -1666,7 +1688,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
|
|||
|
||||
/* We have to call this outside the lock */
|
||||
if (pwake)
|
||||
ep_poll_safewake(&ep->poll_wait);
|
||||
ep_poll_safewake(ep, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -635,28 +635,35 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
|
|||
int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task)
|
||||
{
|
||||
unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
|
||||
struct mm_struct *mm = get_task_mm(task);
|
||||
|
||||
if (mm) {
|
||||
unsigned long size;
|
||||
unsigned long resident = 0;
|
||||
unsigned long shared = 0;
|
||||
unsigned long text = 0;
|
||||
unsigned long data = 0;
|
||||
|
||||
size = task_statm(mm, &shared, &text, &data, &resident);
|
||||
mmput(mm);
|
||||
}
|
||||
/*
|
||||
* For quick read, open code by putting numbers directly
|
||||
* expected format is
|
||||
* seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
|
||||
* size, resident, shared, text, data);
|
||||
*/
|
||||
seq_put_decimal_ull(m, "", size);
|
||||
seq_put_decimal_ull(m, " ", resident);
|
||||
seq_put_decimal_ull(m, " ", shared);
|
||||
seq_put_decimal_ull(m, " ", text);
|
||||
seq_put_decimal_ull(m, " ", 0);
|
||||
seq_put_decimal_ull(m, " ", data);
|
||||
seq_put_decimal_ull(m, " ", 0);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
/*
|
||||
* For quick read, open code by putting numbers directly
|
||||
* expected format is
|
||||
* seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
|
||||
* size, resident, shared, text, data);
|
||||
*/
|
||||
seq_put_decimal_ull(m, "", size);
|
||||
seq_put_decimal_ull(m, " ", resident);
|
||||
seq_put_decimal_ull(m, " ", shared);
|
||||
seq_put_decimal_ull(m, " ", text);
|
||||
seq_put_decimal_ull(m, " ", 0);
|
||||
seq_put_decimal_ull(m, " ", data);
|
||||
seq_put_decimal_ull(m, " ", 0);
|
||||
seq_putc(m, '\n');
|
||||
} else {
|
||||
seq_write(m, "0 0 0 0 0 0 0\n", 14);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
static const struct proc_ops cpuinfo_proc_ops = {
|
||||
.proc_flags = PROC_ENTRY_PERMANENT,
|
||||
.proc_open = cpuinfo_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
|
|
|
@ -531,6 +531,12 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
|
|||
return p;
|
||||
}
|
||||
|
||||
static inline void pde_set_flags(struct proc_dir_entry *pde)
|
||||
{
|
||||
if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
|
||||
pde->flags |= PROC_ENTRY_PERMANENT;
|
||||
}
|
||||
|
||||
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
|
||||
struct proc_dir_entry *parent,
|
||||
const struct proc_ops *proc_ops, void *data)
|
||||
|
@ -541,6 +547,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
|
|||
if (!p)
|
||||
return NULL;
|
||||
p->proc_ops = proc_ops;
|
||||
pde_set_flags(p);
|
||||
return proc_register(parent, p);
|
||||
}
|
||||
EXPORT_SYMBOL(proc_create_data);
|
||||
|
@ -572,6 +579,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
static const struct proc_ops proc_seq_ops = {
|
||||
/* not permanent -- can call into arbitrary seq_operations */
|
||||
.proc_open = proc_seq_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
|
@ -602,6 +610,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
static const struct proc_ops proc_single_ops = {
|
||||
/* not permanent -- can call into arbitrary ->single_show */
|
||||
.proc_open = proc_single_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
|
@ -662,9 +671,13 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
|
|||
|
||||
de = pde_subdir_find(parent, fn, len);
|
||||
if (de) {
|
||||
rb_erase(&de->subdir_node, &parent->subdir);
|
||||
if (S_ISDIR(de->mode)) {
|
||||
parent->nlink--;
|
||||
if (unlikely(pde_is_permanent(de))) {
|
||||
WARN(1, "removing permanent /proc entry '%s'", de->name);
|
||||
de = NULL;
|
||||
} else {
|
||||
rb_erase(&de->subdir_node, &parent->subdir);
|
||||
if (S_ISDIR(de->mode))
|
||||
parent->nlink--;
|
||||
}
|
||||
}
|
||||
write_unlock(&proc_subdir_lock);
|
||||
|
@ -700,12 +713,24 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
|
|||
write_unlock(&proc_subdir_lock);
|
||||
return -ENOENT;
|
||||
}
|
||||
if (unlikely(pde_is_permanent(root))) {
|
||||
write_unlock(&proc_subdir_lock);
|
||||
WARN(1, "removing permanent /proc entry '%s/%s'",
|
||||
root->parent->name, root->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
rb_erase(&root->subdir_node, &parent->subdir);
|
||||
|
||||
de = root;
|
||||
while (1) {
|
||||
next = pde_subdir_first(de);
|
||||
if (next) {
|
||||
if (unlikely(pde_is_permanent(root))) {
|
||||
write_unlock(&proc_subdir_lock);
|
||||
WARN(1, "removing permanent /proc entry '%s/%s'",
|
||||
next->parent->name, next->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
rb_erase(&next->subdir_node, &de->subdir);
|
||||
de = next;
|
||||
continue;
|
||||
|
|
188
fs/proc/inode.c
188
fs/proc/inode.c
|
@ -202,6 +202,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
|
|||
|
||||
/* pde is locked on entry, unlocked on exit */
|
||||
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
|
||||
__releases(&pde->pde_unload_lock)
|
||||
{
|
||||
/*
|
||||
* close() (proc_reg_release()) can't delete an entry and proceed:
|
||||
|
@ -258,114 +259,192 @@ void proc_entry_rundown(struct proc_dir_entry *de)
|
|||
spin_unlock(&de->pde_unload_lock);
|
||||
}
|
||||
|
||||
static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_lseek) lseek;
|
||||
|
||||
lseek = pde->proc_ops->proc_lseek;
|
||||
if (!lseek)
|
||||
lseek = default_llseek;
|
||||
return lseek(file, offset, whence);
|
||||
}
|
||||
|
||||
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
loff_t rv = -EINVAL;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_lseek) lseek;
|
||||
|
||||
lseek = pde->proc_ops->proc_lseek;
|
||||
if (!lseek)
|
||||
lseek = default_llseek;
|
||||
rv = lseek(file, offset, whence);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_lseek(pde, file, offset, whence);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_lseek(pde, file, offset, whence);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_read) read;
|
||||
|
||||
read = pde->proc_ops->proc_read;
|
||||
if (read)
|
||||
return read(file, buf, count, ppos);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
ssize_t rv = -EIO;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_read) read;
|
||||
|
||||
read = pde->proc_ops->proc_read;
|
||||
if (read)
|
||||
rv = read(file, buf, count, ppos);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_read(pde, file, buf, count, ppos);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_read(pde, file, buf, count, ppos);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_write) write;
|
||||
|
||||
write = pde->proc_ops->proc_write;
|
||||
if (write)
|
||||
return write(file, buf, count, ppos);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
ssize_t rv = -EIO;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_write) write;
|
||||
|
||||
write = pde->proc_ops->proc_write;
|
||||
if (write)
|
||||
rv = write(file, buf, count, ppos);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_write(pde, file, buf, count, ppos);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_write(pde, file, buf, count, ppos);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_poll) poll;
|
||||
|
||||
poll = pde->proc_ops->proc_poll;
|
||||
if (poll)
|
||||
return poll(file, pts);
|
||||
return DEFAULT_POLLMASK;
|
||||
}
|
||||
|
||||
static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
__poll_t rv = DEFAULT_POLLMASK;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_poll) poll;
|
||||
|
||||
poll = pde->proc_ops->proc_poll;
|
||||
if (poll)
|
||||
rv = poll(file, pts);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_poll(pde, file, pts);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_poll(pde, file, pts);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_ioctl) ioctl;
|
||||
|
||||
ioctl = pde->proc_ops->proc_ioctl;
|
||||
if (ioctl)
|
||||
return ioctl(file, cmd, arg);
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
long rv = -ENOTTY;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_ioctl) ioctl;
|
||||
|
||||
ioctl = pde->proc_ops->proc_ioctl;
|
||||
if (ioctl)
|
||||
rv = ioctl(file, cmd, arg);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_ioctl(pde, file, cmd, arg);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_ioctl(pde, file, cmd, arg);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
|
||||
|
||||
compat_ioctl = pde->proc_ops->proc_compat_ioctl;
|
||||
if (compat_ioctl)
|
||||
return compat_ioctl(file, cmd, arg);
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
long rv = -ENOTTY;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
|
||||
|
||||
compat_ioctl = pde->proc_ops->proc_compat_ioctl;
|
||||
if (compat_ioctl)
|
||||
rv = compat_ioctl(file, cmd, arg);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_compat_ioctl(pde, file, cmd, arg);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_compat_ioctl(pde, file, cmd, arg);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_mmap) mmap;
|
||||
|
||||
mmap = pde->proc_ops->proc_mmap;
|
||||
if (mmap)
|
||||
return mmap(file, vma);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
int rv = -EIO;
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_mmap) mmap;
|
||||
|
||||
mmap = pde->proc_ops->proc_mmap;
|
||||
if (mmap)
|
||||
rv = mmap(file, vma);
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_mmap(pde, file, vma);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_mmap(pde, file, vma);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr,
|
||||
unsigned long len, unsigned long pgoff,
|
||||
unsigned long flags)
|
||||
{
|
||||
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
|
||||
|
||||
get_area = pde->proc_ops->proc_get_unmapped_area;
|
||||
#ifdef CONFIG_MMU
|
||||
if (!get_area)
|
||||
get_area = current->mm->get_unmapped_area;
|
||||
#endif
|
||||
if (get_area)
|
||||
return get_area(file, orig_addr, len, pgoff, flags);
|
||||
return orig_addr;
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
|
||||
unsigned long len, unsigned long pgoff,
|
||||
|
@ -374,19 +453,10 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
|
|||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||
unsigned long rv = -EIO;
|
||||
|
||||
if (use_pde(pde)) {
|
||||
typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
|
||||
|
||||
get_area = pde->proc_ops->proc_get_unmapped_area;
|
||||
#ifdef CONFIG_MMU
|
||||
if (!get_area)
|
||||
get_area = current->mm->get_unmapped_area;
|
||||
#endif
|
||||
|
||||
if (get_area)
|
||||
rv = get_area(file, orig_addr, len, pgoff, flags);
|
||||
else
|
||||
rv = orig_addr;
|
||||
if (pde_is_permanent(pde)) {
|
||||
return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
|
||||
} else if (use_pde(pde)) {
|
||||
rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
|
||||
unuse_pde(pde);
|
||||
}
|
||||
return rv;
|
||||
|
@ -400,6 +470,13 @@ static int proc_reg_open(struct inode *inode, struct file *file)
|
|||
typeof_member(struct proc_ops, proc_release) release;
|
||||
struct pde_opener *pdeo;
|
||||
|
||||
if (pde_is_permanent(pde)) {
|
||||
open = pde->proc_ops->proc_open;
|
||||
if (open)
|
||||
rv = open(inode, file);
|
||||
return rv;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that
|
||||
* 1) PDE's ->release hook will be called no matter what
|
||||
|
@ -449,6 +526,17 @@ static int proc_reg_release(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct proc_dir_entry *pde = PDE(inode);
|
||||
struct pde_opener *pdeo;
|
||||
|
||||
if (pde_is_permanent(pde)) {
|
||||
typeof_member(struct proc_ops, proc_release) release;
|
||||
|
||||
release = pde->proc_ops->proc_release;
|
||||
if (release) {
|
||||
return release(inode, file);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
spin_lock(&pde->pde_unload_lock);
|
||||
list_for_each_entry(pdeo, &pde->pde_openers, lh) {
|
||||
if (pdeo->file == file) {
|
||||
|
|
|
@ -61,6 +61,7 @@ struct proc_dir_entry {
|
|||
struct rb_node subdir_node;
|
||||
char *name;
|
||||
umode_t mode;
|
||||
u8 flags;
|
||||
u8 namelen;
|
||||
char inline_name[];
|
||||
} __randomize_layout;
|
||||
|
@ -73,6 +74,11 @@ struct proc_dir_entry {
|
|||
0)
|
||||
#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
|
||||
|
||||
static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
|
||||
{
|
||||
return pde->flags & PROC_ENTRY_PERMANENT;
|
||||
}
|
||||
|
||||
extern struct kmem_cache *proc_dir_entry_cache;
|
||||
void pde_free(struct proc_dir_entry *pde);
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ static __poll_t kmsg_poll(struct file *file, poll_table *wait)
|
|||
|
||||
|
||||
static const struct proc_ops kmsg_proc_ops = {
|
||||
.proc_flags = PROC_ENTRY_PERMANENT,
|
||||
.proc_read = kmsg_read,
|
||||
.proc_poll = kmsg_poll,
|
||||
.proc_open = kmsg_open,
|
||||
|
|
|
@ -224,6 +224,7 @@ static int stat_open(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
static const struct proc_ops stat_proc_ops = {
|
||||
.proc_flags = PROC_ENTRY_PERMANENT,
|
||||
.proc_open = stat_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
|
|
|
@ -123,38 +123,14 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void vma_stop(struct proc_maps_private *priv)
|
||||
{
|
||||
struct mm_struct *mm = priv->mm;
|
||||
|
||||
release_task_mempolicy(priv);
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
static struct vm_area_struct *
|
||||
m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma == priv->tail_vma)
|
||||
return NULL;
|
||||
return vma->vm_next ?: priv->tail_vma;
|
||||
}
|
||||
|
||||
static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
{
|
||||
if (m->count < m->size) /* vma is copied successfully */
|
||||
m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL;
|
||||
}
|
||||
|
||||
static void *m_start(struct seq_file *m, loff_t *ppos)
|
||||
{
|
||||
struct proc_maps_private *priv = m->private;
|
||||
unsigned long last_addr = m->version;
|
||||
unsigned long last_addr = *ppos;
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int pos = *ppos;
|
||||
|
||||
/* See m_cache_vma(). Zero at the start or after lseek. */
|
||||
/* See m_next(). Zero at the start or after lseek. */
|
||||
if (last_addr == -1UL)
|
||||
return NULL;
|
||||
|
||||
|
@ -163,64 +139,59 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
|
|||
return ERR_PTR(-ESRCH);
|
||||
|
||||
mm = priv->mm;
|
||||
if (!mm || !mmget_not_zero(mm))
|
||||
if (!mm || !mmget_not_zero(mm)) {
|
||||
put_task_struct(priv->task);
|
||||
priv->task = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (down_read_killable(&mm->mmap_sem)) {
|
||||
mmput(mm);
|
||||
put_task_struct(priv->task);
|
||||
priv->task = NULL;
|
||||
return ERR_PTR(-EINTR);
|
||||
}
|
||||
|
||||
hold_task_mempolicy(priv);
|
||||
priv->tail_vma = get_gate_vma(mm);
|
||||
|
||||
if (last_addr) {
|
||||
vma = find_vma(mm, last_addr - 1);
|
||||
if (vma && vma->vm_start <= last_addr)
|
||||
vma = m_next_vma(priv, vma);
|
||||
if (vma)
|
||||
return vma;
|
||||
}
|
||||
|
||||
m->version = 0;
|
||||
if (pos < mm->map_count) {
|
||||
for (vma = mm->mmap; pos; pos--) {
|
||||
m->version = vma->vm_start;
|
||||
vma = vma->vm_next;
|
||||
}
|
||||
vma = find_vma(mm, last_addr);
|
||||
if (vma)
|
||||
return vma;
|
||||
}
|
||||
|
||||
/* we do not bother to update m->version in this case */
|
||||
if (pos == mm->map_count && priv->tail_vma)
|
||||
return priv->tail_vma;
|
||||
|
||||
vma_stop(priv);
|
||||
return NULL;
|
||||
return priv->tail_vma;
|
||||
}
|
||||
|
||||
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
|
||||
{
|
||||
struct proc_maps_private *priv = m->private;
|
||||
struct vm_area_struct *next;
|
||||
struct vm_area_struct *next, *vma = v;
|
||||
|
||||
if (vma == priv->tail_vma)
|
||||
next = NULL;
|
||||
else if (vma->vm_next)
|
||||
next = vma->vm_next;
|
||||
else
|
||||
next = priv->tail_vma;
|
||||
|
||||
*ppos = next ? next->vm_start : -1UL;
|
||||
|
||||
(*pos)++;
|
||||
next = m_next_vma(priv, v);
|
||||
if (!next)
|
||||
vma_stop(priv);
|
||||
return next;
|
||||
}
|
||||
|
||||
static void m_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
struct proc_maps_private *priv = m->private;
|
||||
struct mm_struct *mm = priv->mm;
|
||||
|
||||
if (!IS_ERR_OR_NULL(v))
|
||||
vma_stop(priv);
|
||||
if (priv->task) {
|
||||
put_task_struct(priv->task);
|
||||
priv->task = NULL;
|
||||
}
|
||||
if (!priv->task)
|
||||
return;
|
||||
|
||||
release_task_mempolicy(priv);
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
put_task_struct(priv->task);
|
||||
priv->task = NULL;
|
||||
}
|
||||
|
||||
static int proc_maps_open(struct inode *inode, struct file *file,
|
||||
|
@ -363,7 +334,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
|||
static int show_map(struct seq_file *m, void *v)
|
||||
{
|
||||
show_map_vma(m, v);
|
||||
m_cache_vma(m, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -847,8 +817,6 @@ static int show_smap(struct seq_file *m, void *v)
|
|||
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
|
||||
show_smap_vma_flags(m, vma);
|
||||
|
||||
m_cache_vma(m, vma);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1887,7 +1855,6 @@ static int show_numa_map(struct seq_file *m, void *v)
|
|||
seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
|
||||
out:
|
||||
seq_putc(m, '\n');
|
||||
m_cache_vma(m, vma);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -842,7 +842,7 @@ static void balance_leaf_paste_right_whole(struct tree_balance *tb,
|
|||
struct item_head *pasted;
|
||||
struct buffer_info bi;
|
||||
|
||||
buffer_info_init_right(tb, &bi);
|
||||
buffer_info_init_right(tb, &bi);
|
||||
leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
|
||||
|
||||
/* append item in R[0] */
|
||||
|
|
|
@ -184,11 +184,12 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
|
|||
}
|
||||
|
||||
/* we need to make sure nobody is changing the file size beneath us */
|
||||
{
|
||||
int depth = reiserfs_write_unlock_nested(inode->i_sb);
|
||||
inode_lock(inode);
|
||||
reiserfs_write_lock_nested(inode->i_sb, depth);
|
||||
}
|
||||
{
|
||||
int depth = reiserfs_write_unlock_nested(inode->i_sb);
|
||||
|
||||
inode_lock(inode);
|
||||
reiserfs_write_lock_nested(inode->i_sb, depth);
|
||||
}
|
||||
|
||||
reiserfs_write_lock(inode->i_sb);
|
||||
|
||||
|
|
|
@ -838,10 +838,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
|
|||
*/
|
||||
INC_DIR_INODE_NLINK(dir)
|
||||
|
||||
retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
|
||||
old_format_only(dir->i_sb) ?
|
||||
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
|
||||
dentry, inode, &security);
|
||||
retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */,
|
||||
old_format_only(dir->i_sb) ?
|
||||
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
|
||||
dentry, inode, &security);
|
||||
if (retval) {
|
||||
DEC_DIR_INODE_NLINK(dir)
|
||||
goto out_failed;
|
||||
|
@ -967,7 +967,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
reiserfs_update_sd(&th, inode);
|
||||
|
||||
DEC_DIR_INODE_NLINK(dir)
|
||||
dir->i_size -= (DEH_SIZE + de.de_entrylen);
|
||||
dir->i_size -= (DEH_SIZE + de.de_entrylen);
|
||||
reiserfs_update_sd(&th, dir);
|
||||
|
||||
/* prevent empty directory from getting lost */
|
||||
|
|
|
@ -67,13 +67,6 @@ int seq_open(struct file *file, const struct seq_operations *op)
|
|||
// to the lifetime of the file.
|
||||
p->file = file;
|
||||
|
||||
/*
|
||||
* Wrappers around seq_open(e.g. swaps_open) need to be
|
||||
* aware of this. If they set f_version themselves, they
|
||||
* should call seq_open first and then set f_version.
|
||||
*/
|
||||
file->f_version = 0;
|
||||
|
||||
/*
|
||||
* seq_files support lseek() and pread(). They do not implement
|
||||
* write() at all, but we clear FMODE_PWRITE here for historical
|
||||
|
@ -94,7 +87,6 @@ static int traverse(struct seq_file *m, loff_t offset)
|
|||
int error = 0;
|
||||
void *p;
|
||||
|
||||
m->version = 0;
|
||||
m->index = 0;
|
||||
m->count = m->from = 0;
|
||||
if (!offset)
|
||||
|
@ -160,26 +152,12 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
|||
|
||||
mutex_lock(&m->lock);
|
||||
|
||||
/*
|
||||
* seq_file->op->..m_start/m_stop/m_next may do special actions
|
||||
* or optimisations based on the file->f_version, so we want to
|
||||
* pass the file->f_version to those methods.
|
||||
*
|
||||
* seq_file->version is just copy of f_version, and seq_file
|
||||
* methods can treat it simply as file version.
|
||||
* It is copied in first and copied out after all operations.
|
||||
* It is convenient to have it as part of structure to avoid the
|
||||
* need of passing another argument to all the seq_file methods.
|
||||
*/
|
||||
m->version = file->f_version;
|
||||
|
||||
/*
|
||||
* if request is to read from zero offset, reset iterator to first
|
||||
* record as it might have been already advanced by previous requests
|
||||
*/
|
||||
if (*ppos == 0) {
|
||||
m->index = 0;
|
||||
m->version = 0;
|
||||
m->count = 0;
|
||||
}
|
||||
|
||||
|
@ -190,7 +168,6 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
|||
if (err) {
|
||||
/* With prejudice... */
|
||||
m->read_pos = 0;
|
||||
m->version = 0;
|
||||
m->index = 0;
|
||||
m->count = 0;
|
||||
goto Done;
|
||||
|
@ -243,7 +220,6 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
|||
m->buf = seq_buf_alloc(m->size <<= 1);
|
||||
if (!m->buf)
|
||||
goto Enomem;
|
||||
m->version = 0;
|
||||
p = m->op->start(m, &m->index);
|
||||
}
|
||||
m->op->stop(m, p);
|
||||
|
@ -287,7 +263,6 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
|
|||
*ppos += copied;
|
||||
m->read_pos += copied;
|
||||
}
|
||||
file->f_version = m->version;
|
||||
mutex_unlock(&m->lock);
|
||||
return copied;
|
||||
Enomem:
|
||||
|
@ -313,7 +288,6 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
|
|||
loff_t retval = -EINVAL;
|
||||
|
||||
mutex_lock(&m->lock);
|
||||
m->version = file->f_version;
|
||||
switch (whence) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
|
@ -329,7 +303,6 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
|
|||
/* with extreme prejudice... */
|
||||
file->f_pos = 0;
|
||||
m->read_pos = 0;
|
||||
m->version = 0;
|
||||
m->index = 0;
|
||||
m->count = 0;
|
||||
} else {
|
||||
|
@ -340,7 +313,6 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
|
|||
file->f_pos = offset;
|
||||
}
|
||||
}
|
||||
file->f_version = m->version;
|
||||
mutex_unlock(&m->lock);
|
||||
return retval;
|
||||
}
|
||||
|
|
106
fs/userfaultfd.c
106
fs/userfaultfd.c
|
@ -314,8 +314,11 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
|
|||
if (!pmd_present(_pmd))
|
||||
goto out;
|
||||
|
||||
if (pmd_trans_huge(_pmd))
|
||||
if (pmd_trans_huge(_pmd)) {
|
||||
if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
|
||||
ret = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
|
||||
|
@ -328,6 +331,8 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
|
|||
*/
|
||||
if (pte_none(*pte))
|
||||
ret = true;
|
||||
if (!pte_write(*pte) && (reason & VM_UFFD_WP))
|
||||
ret = true;
|
||||
pte_unmap(pte);
|
||||
|
||||
out:
|
||||
|
@ -1287,10 +1292,13 @@ static __always_inline int validate_range(struct mm_struct *mm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline bool vma_can_userfault(struct vm_area_struct *vma)
|
||||
static inline bool vma_can_userfault(struct vm_area_struct *vma,
|
||||
unsigned long vm_flags)
|
||||
{
|
||||
return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
|
||||
vma_is_shmem(vma);
|
||||
/* FIXME: add WP support to hugetlbfs and shmem */
|
||||
return vma_is_anonymous(vma) ||
|
||||
((is_vm_hugetlb_page(vma) || vma_is_shmem(vma)) &&
|
||||
!(vm_flags & VM_UFFD_WP));
|
||||
}
|
||||
|
||||
static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
||||
|
@ -1322,15 +1330,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
vm_flags = 0;
|
||||
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
|
||||
vm_flags |= VM_UFFD_MISSING;
|
||||
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
|
||||
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)
|
||||
vm_flags |= VM_UFFD_WP;
|
||||
/*
|
||||
* FIXME: remove the below error constraint by
|
||||
* implementing the wprotect tracking mode.
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = validate_range(mm, &uffdio_register.range.start,
|
||||
uffdio_register.range.len);
|
||||
|
@ -1380,7 +1381,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
|
||||
/* check not compatible vmas */
|
||||
ret = -EINVAL;
|
||||
if (!vma_can_userfault(cur))
|
||||
if (!vma_can_userfault(cur, vm_flags))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
|
@ -1408,6 +1409,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
if (end & (vma_hpagesize - 1))
|
||||
goto out_unlock;
|
||||
}
|
||||
if ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Check that this vma isn't already owned by a
|
||||
|
@ -1437,7 +1440,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
do {
|
||||
cond_resched();
|
||||
|
||||
BUG_ON(!vma_can_userfault(vma));
|
||||
BUG_ON(!vma_can_userfault(vma, vm_flags));
|
||||
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
|
||||
vma->vm_userfaultfd_ctx.ctx != ctx);
|
||||
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
|
||||
|
@ -1492,14 +1495,24 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||
up_write(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
if (!ret) {
|
||||
__u64 ioctls_out;
|
||||
|
||||
ioctls_out = basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC :
|
||||
UFFD_API_RANGE_IOCTLS;
|
||||
|
||||
/*
|
||||
* Declare the WP ioctl only if the WP mode is
|
||||
* specified and all checks passed with the range
|
||||
*/
|
||||
if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP))
|
||||
ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT);
|
||||
|
||||
/*
|
||||
* Now that we scanned all vmas we can already tell
|
||||
* userland which ioctls methods are guaranteed to
|
||||
* succeed on this range.
|
||||
*/
|
||||
if (put_user(basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC :
|
||||
UFFD_API_RANGE_IOCTLS,
|
||||
&user_uffdio_register->ioctls))
|
||||
if (put_user(ioctls_out, &user_uffdio_register->ioctls))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
out:
|
||||
|
@ -1575,7 +1588,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||
* provides for more strict behavior to notice
|
||||
* unregistration errors.
|
||||
*/
|
||||
if (!vma_can_userfault(cur))
|
||||
if (!vma_can_userfault(cur, cur->vm_flags))
|
||||
goto out_unlock;
|
||||
|
||||
found = true;
|
||||
|
@ -1589,7 +1602,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||
do {
|
||||
cond_resched();
|
||||
|
||||
BUG_ON(!vma_can_userfault(vma));
|
||||
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));
|
||||
|
||||
/*
|
||||
* Nothing to do: this vma is already registered into this
|
||||
|
@ -1724,11 +1737,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
|
|||
ret = -EINVAL;
|
||||
if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
|
||||
goto out;
|
||||
if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
|
||||
if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
|
||||
goto out;
|
||||
if (mmget_not_zero(ctx->mm)) {
|
||||
ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
|
||||
uffdio_copy.len, &ctx->mmap_changing);
|
||||
uffdio_copy.len, &ctx->mmap_changing,
|
||||
uffdio_copy.mode);
|
||||
mmput(ctx->mm);
|
||||
} else {
|
||||
return -ESRCH;
|
||||
|
@ -1801,6 +1815,53 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
|
||||
unsigned long arg)
|
||||
{
|
||||
int ret;
|
||||
struct uffdio_writeprotect uffdio_wp;
|
||||
struct uffdio_writeprotect __user *user_uffdio_wp;
|
||||
struct userfaultfd_wake_range range;
|
||||
bool mode_wp, mode_dontwake;
|
||||
|
||||
if (READ_ONCE(ctx->mmap_changing))
|
||||
return -EAGAIN;
|
||||
|
||||
user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;
|
||||
|
||||
if (copy_from_user(&uffdio_wp, user_uffdio_wp,
|
||||
sizeof(struct uffdio_writeprotect)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = validate_range(ctx->mm, &uffdio_wp.range.start,
|
||||
uffdio_wp.range.len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
|
||||
UFFDIO_WRITEPROTECT_MODE_WP))
|
||||
return -EINVAL;
|
||||
|
||||
mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
|
||||
mode_dontwake = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
|
||||
|
||||
if (mode_wp && mode_dontwake)
|
||||
return -EINVAL;
|
||||
|
||||
ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
|
||||
uffdio_wp.range.len, mode_wp,
|
||||
&ctx->mmap_changing);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!mode_wp && !mode_dontwake) {
|
||||
range.start = uffdio_wp.range.start;
|
||||
range.len = uffdio_wp.range.len;
|
||||
wake_userfault(ctx, &range);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned int uffd_ctx_features(__u64 user_features)
|
||||
{
|
||||
/*
|
||||
|
@ -1882,6 +1943,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
|
|||
case UFFDIO_ZEROPAGE:
|
||||
ret = userfaultfd_zeropage(ctx, arg);
|
||||
break;
|
||||
case UFFDIO_WRITEPROTECT:
|
||||
ret = userfaultfd_writeprotect(ctx, arg);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <linux/mm_types.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/errno.h>
|
||||
#include <asm-generic/pgtable_uffd.h>
|
||||
|
||||
#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
|
||||
defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
|
||||
|
|
66
include/asm-generic/pgtable_uffd.h
Normal file
66
include/asm-generic/pgtable_uffd.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
#ifndef _ASM_GENERIC_PGTABLE_UFFD_H
|
||||
#define _ASM_GENERIC_PGTABLE_UFFD_H
|
||||
|
||||
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
static __always_inline int pte_uffd_wp(pte_t pte)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __always_inline int pmd_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
static __always_inline int pte_swp_uffd_wp(pte_t pte)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline int pmd_swp_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
|
||||
{
|
||||
return pmd;
|
||||
}
|
||||
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
|
||||
|
||||
#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/hugetlb_inline.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -398,7 +399,7 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
|||
* We rely on tlb_end_vma() to issue a flush, such that when we reset
|
||||
* these values the batch is empty.
|
||||
*/
|
||||
tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB);
|
||||
tlb->vma_huge = is_vm_hugetlb_page(vma);
|
||||
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
|
||||
}
|
||||
|
||||
|
|
|
@ -162,7 +162,7 @@ static inline __u8 ror8(__u8 word, unsigned int shift)
|
|||
*
|
||||
* This is safe to use for 16- and 8-bit types as well.
|
||||
*/
|
||||
static inline __s32 sign_extend32(__u32 value, int index)
|
||||
static __always_inline __s32 sign_extend32(__u32 value, int index)
|
||||
{
|
||||
__u8 shift = 31 - index;
|
||||
return (__s32)(value << shift) >> shift;
|
||||
|
@ -173,7 +173,7 @@ static inline __s32 sign_extend32(__u32 value, int index)
|
|||
* @value: value to sign extend
|
||||
* @index: 0 based bit index (0<=index<64) to sign bit
|
||||
*/
|
||||
static inline __s64 sign_extend64(__u64 value, int index)
|
||||
static __always_inline __s64 sign_extend64(__u64 value, int index)
|
||||
{
|
||||
__u8 shift = 63 - index;
|
||||
return (__s64)(value << shift) >> shift;
|
||||
|
|
|
@ -18,12 +18,30 @@
|
|||
* position @h. For example
|
||||
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
|
||||
*/
|
||||
#define GENMASK(h, l) \
|
||||
#if !defined(__ASSEMBLY__) && \
|
||||
(!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000)
|
||||
#include <linux/build_bug.h>
|
||||
#define GENMASK_INPUT_CHECK(h, l) \
|
||||
(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
|
||||
__builtin_constant_p((l) > (h)), (l) > (h), 0)))
|
||||
#else
|
||||
/*
|
||||
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
|
||||
* disable the input check if that is the case.
|
||||
*/
|
||||
#define GENMASK_INPUT_CHECK(h, l) 0
|
||||
#endif
|
||||
|
||||
#define __GENMASK(h, l) \
|
||||
(((~UL(0)) - (UL(1) << (l)) + 1) & \
|
||||
(~UL(0) >> (BITS_PER_LONG - 1 - (h))))
|
||||
#define GENMASK(h, l) \
|
||||
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
|
||||
|
||||
#define GENMASK_ULL(h, l) \
|
||||
#define __GENMASK_ULL(h, l) \
|
||||
(((~ULL(0)) - (ULL(1) << (l)) + 1) & \
|
||||
(~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
|
||||
#define GENMASK_ULL(h, l) \
|
||||
(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
|
||||
|
||||
#endif /* __LINUX_BITS_H */
|
||||
|
|
|
@ -347,7 +347,7 @@ static inline void *offset_to_ptr(const int *off)
|
|||
* compiler has support to do so.
|
||||
*/
|
||||
#define compiletime_assert(condition, msg) \
|
||||
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
|
||||
_compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
|
||||
|
||||
#define compiletime_assert_atomic_type(t) \
|
||||
compiletime_assert(__native_word(t), \
|
||||
|
|
|
@ -129,22 +129,13 @@ struct ftrace_likely_data {
|
|||
#define __compiler_offsetof(a, b) __builtin_offsetof(a, b)
|
||||
|
||||
/*
|
||||
* Force always-inline if the user requests it so via the .config.
|
||||
* Prefer gnu_inline, so that extern inline functions do not emit an
|
||||
* externally visible function. This makes extern inline behave as per gnu89
|
||||
* semantics rather than c99. This prevents multiple symbol definition errors
|
||||
* of extern inline functions at link time.
|
||||
* A lot of inline functions can cause havoc with function tracing.
|
||||
* Do not use __always_inline here, since currently it expands to inline again
|
||||
* (which would break users of __always_inline).
|
||||
*/
|
||||
#if !defined(CONFIG_OPTIMIZE_INLINING)
|
||||
#define inline inline __attribute__((__always_inline__)) __gnu_inline \
|
||||
__inline_maybe_unused notrace
|
||||
#else
|
||||
#define inline inline __gnu_inline \
|
||||
__inline_maybe_unused notrace
|
||||
#endif
|
||||
#define inline inline __gnu_inline __inline_maybe_unused notrace
|
||||
|
||||
/*
|
||||
* gcc provides both __inline__ and __inline as alternate spellings of
|
||||
|
|
|
@ -124,6 +124,8 @@ struct vm_area_struct;
|
|||
*
|
||||
* Reclaim modifiers
|
||||
* ~~~~~~~~~~~~~~~~~
|
||||
* Please note that all the following flags are only applicable to sleepable
|
||||
* allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
|
||||
*
|
||||
* %__GFP_IO can start physical IO.
|
||||
*
|
||||
|
|
|
@ -46,7 +46,7 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
|
|||
pmd_t *old_pmd, pmd_t *new_pmd);
|
||||
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
unsigned long addr, pgprot_t newprot,
|
||||
int prot_numa);
|
||||
unsigned long cp_flags);
|
||||
vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
|
||||
pgprot_t pgprot, bool write);
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
struct memory_block {
|
||||
unsigned long start_section_nr;
|
||||
unsigned long state; /* serialized by the dev->lock */
|
||||
int section_count; /* serialized by mem_sysfs_mutex */
|
||||
int online_type; /* for passing data to online routine */
|
||||
int phys_device; /* to which fru does this belong? */
|
||||
struct device dev;
|
||||
|
|
|
@ -47,9 +47,13 @@ enum {
|
|||
|
||||
/* Types for control the zone type of onlined and offlined memory */
|
||||
enum {
|
||||
MMOP_OFFLINE = -1,
|
||||
MMOP_ONLINE_KEEP,
|
||||
/* Offline the memory. */
|
||||
MMOP_OFFLINE = 0,
|
||||
/* Online the memory. Zone depends, see default_zone_for_pfn(). */
|
||||
MMOP_ONLINE,
|
||||
/* Online the memory to ZONE_NORMAL. */
|
||||
MMOP_ONLINE_KERNEL,
|
||||
/* Online the memory to ZONE_MOVABLE. */
|
||||
MMOP_ONLINE_MOVABLE,
|
||||
};
|
||||
|
||||
|
@ -113,7 +117,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
|
|||
struct mhp_restrictions *restrictions);
|
||||
extern u64 max_mem_size;
|
||||
|
||||
extern bool memhp_auto_online;
|
||||
extern int memhp_online_type_from_str(const char *str);
|
||||
|
||||
/* Default online_type (MMOP_*) when new memory blocks are added. */
|
||||
extern int memhp_default_online_type;
|
||||
/* If movable_node boot option specified */
|
||||
extern bool movable_node_enabled;
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
|
|
|
@ -98,8 +98,6 @@ struct dev_pagemap_ops {
|
|||
* @ref: reference count that pins the devm_memremap_pages() mapping
|
||||
* @internal_ref: internal reference if @ref is not provided by the caller
|
||||
* @done: completion for @internal_ref
|
||||
* @dev: host device of the mapping for debug
|
||||
* @data: private data pointer for page_free()
|
||||
* @type: memory type: see MEMORY_* in memory_hotplug.h
|
||||
* @flags: PGMAP_* flags to specify defailed behavior
|
||||
* @ops: method table
|
||||
|
|
|
@ -629,6 +629,12 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma)
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool vma_is_accessible(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SHMEM
|
||||
/*
|
||||
* The vma_is_shmem is not inline because it is used only by slow
|
||||
|
@ -1765,9 +1771,26 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
|
|||
unsigned long old_addr, struct vm_area_struct *new_vma,
|
||||
unsigned long new_addr, unsigned long len,
|
||||
bool need_rmap_locks);
|
||||
|
||||
/*
|
||||
* Flags used by change_protection(). For now we make it a bitmap so
|
||||
* that we can pass in multiple flags just like parameters. However
|
||||
* for now all the callers are only use one of the flags at the same
|
||||
* time.
|
||||
*/
|
||||
/* Whether we should allow dirty bit accounting */
|
||||
#define MM_CP_DIRTY_ACCT (1UL << 0)
|
||||
/* Whether this protection change is for NUMA hints */
|
||||
#define MM_CP_PROT_NUMA (1UL << 1)
|
||||
/* Whether this change is for write protecting */
|
||||
#define MM_CP_UFFD_WP (1UL << 2) /* do wp */
|
||||
#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */
|
||||
#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \
|
||||
MM_CP_UFFD_WP_RESOLVE)
|
||||
|
||||
extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end, pgprot_t newprot,
|
||||
int dirty_accountable, int prot_numa);
|
||||
unsigned long cp_flags);
|
||||
extern int mprotect_fixup(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **pprev, unsigned long start,
|
||||
unsigned long end, unsigned long newflags);
|
||||
|
|
|
@ -6,19 +6,20 @@
|
|||
#include <linux/swap.h>
|
||||
|
||||
/**
|
||||
* page_is_file_cache - should the page be on a file LRU or anon LRU?
|
||||
* page_is_file_lru - should the page be on a file LRU or anon LRU?
|
||||
* @page: the page to test
|
||||
*
|
||||
* Returns 1 if @page is page cache page backed by a regular filesystem,
|
||||
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
|
||||
* Used by functions that manipulate the LRU lists, to sort a page
|
||||
* onto the right LRU list.
|
||||
* Returns 1 if @page is a regular filesystem backed page cache page or a lazily
|
||||
* freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal
|
||||
* anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by
|
||||
* functions that manipulate the LRU lists, to sort a page onto the right LRU
|
||||
* list.
|
||||
*
|
||||
* We would like to get this info without a page flag, but the state
|
||||
* needs to survive until the page is last deleted from the LRU, which
|
||||
* could be as far down as __page_cache_release.
|
||||
*/
|
||||
static inline int page_is_file_cache(struct page *page)
|
||||
static inline int page_is_file_lru(struct page *page)
|
||||
{
|
||||
return !PageSwapBacked(page);
|
||||
}
|
||||
|
@ -75,7 +76,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
|
|||
*/
|
||||
static inline enum lru_list page_lru_base_type(struct page *page)
|
||||
{
|
||||
if (page_is_file_cache(page))
|
||||
if (page_is_file_lru(page))
|
||||
return LRU_INACTIVE_FILE;
|
||||
return LRU_INACTIVE_ANON;
|
||||
}
|
||||
|
|
|
@ -289,8 +289,8 @@ struct vm_userfaultfd_ctx {};
|
|||
#endif /* CONFIG_USERFAULTFD */
|
||||
|
||||
/*
|
||||
* This struct defines a memory VMM memory area. There is one of these
|
||||
* per VM-area/task. A VM area is any part of the process virtual memory
|
||||
* This struct describes a virtual memory area. There is one of these
|
||||
* per VM-area/task. A VM area is any part of the process virtual memory
|
||||
* space that has a special rule for the page-fault handlers (ie a shared
|
||||
* library, the executable area etc).
|
||||
*/
|
||||
|
|
|
@ -100,41 +100,6 @@ struct free_area {
|
|||
unsigned long nr_free;
|
||||
};
|
||||
|
||||
/* Used for pages not on another list */
|
||||
static inline void add_to_free_area(struct page *page, struct free_area *area,
|
||||
int migratetype)
|
||||
{
|
||||
list_add(&page->lru, &area->free_list[migratetype]);
|
||||
area->nr_free++;
|
||||
}
|
||||
|
||||
/* Used for pages not on another list */
|
||||
static inline void add_to_free_area_tail(struct page *page, struct free_area *area,
|
||||
int migratetype)
|
||||
{
|
||||
list_add_tail(&page->lru, &area->free_list[migratetype]);
|
||||
area->nr_free++;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR
|
||||
/* Used to preserve page allocation order entropy */
|
||||
void add_to_free_area_random(struct page *page, struct free_area *area,
|
||||
int migratetype);
|
||||
#else
|
||||
static inline void add_to_free_area_random(struct page *page,
|
||||
struct free_area *area, int migratetype)
|
||||
{
|
||||
add_to_free_area(page, area, migratetype);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Used for pages which are on another list */
|
||||
static inline void move_to_free_area(struct page *page, struct free_area *area,
|
||||
int migratetype)
|
||||
{
|
||||
list_move(&page->lru, &area->free_list[migratetype]);
|
||||
}
|
||||
|
||||
static inline struct page *get_page_from_free_area(struct free_area *area,
|
||||
int migratetype)
|
||||
{
|
||||
|
@ -142,15 +107,6 @@ static inline struct page *get_page_from_free_area(struct free_area *area,
|
|||
struct page, lru);
|
||||
}
|
||||
|
||||
static inline void del_page_from_free_area(struct page *page,
|
||||
struct free_area *area)
|
||||
{
|
||||
list_del(&page->lru);
|
||||
__ClearPageBuddy(page);
|
||||
set_page_private(page, 0);
|
||||
area->nr_free--;
|
||||
}
|
||||
|
||||
static inline bool free_area_empty(struct free_area *area, int migratetype)
|
||||
{
|
||||
return list_empty(&area->free_list[migratetype]);
|
||||
|
@ -708,7 +664,6 @@ struct deferred_split {
|
|||
* Memory statistics and page replacement data structures are maintained on a
|
||||
* per-zone basis.
|
||||
*/
|
||||
struct bootmem_data;
|
||||
typedef struct pglist_data {
|
||||
struct zone node_zones[MAX_NR_ZONES];
|
||||
struct zonelist node_zonelists[MAX_ZONELISTS];
|
||||
|
@ -1187,7 +1142,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
|
|||
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
|
||||
|
||||
struct mem_section_usage {
|
||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
|
||||
#endif
|
||||
/* See declaration of similar field in struct zone */
|
||||
unsigned long pageblock_flags[0];
|
||||
};
|
||||
|
|
|
@ -63,6 +63,11 @@
|
|||
* page_waitqueue(page) is a wait queue of all tasks waiting for the page
|
||||
* to become unlocked.
|
||||
*
|
||||
* PG_swapbacked is set when a page uses swap as a backing storage. This are
|
||||
* usually PageAnon or shmem pages but please note that even anonymous pages
|
||||
* might lose their PG_swapbacked flag when they simply can be dropped (e.g. as
|
||||
* a result of MADV_FREE).
|
||||
*
|
||||
* PG_uptodate tells whether the page's contents is valid. When a read
|
||||
* completes, the page becomes uptodate, unless a disk I/O error happened.
|
||||
*
|
||||
|
@ -163,6 +168,9 @@ enum pageflags {
|
|||
|
||||
/* non-lru isolated movable page */
|
||||
PG_isolated = PG_reclaim,
|
||||
|
||||
/* Only valid for buddy pages. Used to track pages that are reported */
|
||||
PG_reported = PG_uptodate,
|
||||
};
|
||||
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
@ -431,6 +439,14 @@ TESTCLEARFLAG(Young, young, PF_ANY)
|
|||
PAGEFLAG(Idle, idle, PF_ANY)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PageReported() is used to track reported free pages within the Buddy
|
||||
* allocator. We can use the non-atomic version of the test and set
|
||||
* operations as both should be shielded with the zone lock to prevent
|
||||
* any possible races on the setting or clearing of the bit.
|
||||
*/
|
||||
__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
|
||||
|
||||
/*
|
||||
* On an anonymous page mapped into a user virtual memory area,
|
||||
* page->mapping points to its anon_vma, not to a struct address_space;
|
||||
|
|
26
include/linux/page_reporting.h
Normal file
26
include/linux/page_reporting.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_PAGE_REPORTING_H
|
||||
#define _LINUX_PAGE_REPORTING_H
|
||||
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
/* This value should always be a power of 2, see page_reporting_cycle() */
|
||||
#define PAGE_REPORTING_CAPACITY 32
|
||||
|
||||
struct page_reporting_dev_info {
|
||||
/* function that alters pages to make them "reported" */
|
||||
int (*report)(struct page_reporting_dev_info *prdev,
|
||||
struct scatterlist *sg, unsigned int nents);
|
||||
|
||||
/* work struct for processing reports */
|
||||
struct delayed_work work;
|
||||
|
||||
/* Current state of page reporting */
|
||||
atomic_t state;
|
||||
};
|
||||
|
||||
/* Tear-down and bring-up for page reporting devices */
|
||||
void page_reporting_unregister(struct page_reporting_dev_info *prdev);
|
||||
int page_reporting_register(struct page_reporting_dev_info *prdev);
|
||||
#endif /*_LINUX_PAGE_REPORTING_H */
|
|
@ -341,9 +341,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
|
|||
if (PageHuge(head))
|
||||
return head;
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(head), head);
|
||||
|
||||
return head + (index & (compound_nr(head) - 1));
|
||||
return head + (index & (hpage_nr_pages(head) - 1));
|
||||
}
|
||||
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
|
||||
|
|
|
@ -78,9 +78,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc)
|
|||
*/
|
||||
static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
|
||||
{
|
||||
s64 ret = fbc->count;
|
||||
/* Prevent reloads of fbc->count */
|
||||
s64 ret = READ_ONCE(fbc->count);
|
||||
|
||||
barrier(); /* Prevent reloads of fbc->count */
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
return 0;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#ifndef _LINUX_PROC_FS_H
|
||||
#define _LINUX_PROC_FS_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
|
@ -12,7 +13,21 @@ struct proc_dir_entry;
|
|||
struct seq_file;
|
||||
struct seq_operations;
|
||||
|
||||
enum {
|
||||
/*
|
||||
* All /proc entries using this ->proc_ops instance are never removed.
|
||||
*
|
||||
* If in doubt, ignore this flag.
|
||||
*/
|
||||
#ifdef MODULE
|
||||
PROC_ENTRY_PERMANENT = 0U,
|
||||
#else
|
||||
PROC_ENTRY_PERMANENT = 1U << 0,
|
||||
#endif
|
||||
};
|
||||
|
||||
struct proc_ops {
|
||||
unsigned int proc_flags;
|
||||
int (*proc_open)(struct inode *, struct file *);
|
||||
ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *);
|
||||
ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *);
|
||||
|
@ -25,7 +40,7 @@ struct proc_ops {
|
|||
#endif
|
||||
int (*proc_mmap)(struct file *, struct vm_area_struct *);
|
||||
unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
||||
};
|
||||
} __randomize_layout;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ struct seq_file {
|
|||
size_t pad_until;
|
||||
loff_t index;
|
||||
loff_t read_pos;
|
||||
u64 version;
|
||||
struct mutex lock;
|
||||
const struct seq_operations *op;
|
||||
int poll_event;
|
||||
|
|
|
@ -78,6 +78,7 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
|||
extern int shmem_unuse(unsigned int type, bool frontswap,
|
||||
unsigned long *fs_pages_to_unuse);
|
||||
|
||||
extern bool shmem_huge_enabled(struct vm_area_struct *vma);
|
||||
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
||||
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
|
@ -114,15 +115,6 @@ static inline bool shmem_file(struct file *file)
|
|||
extern bool shmem_charge(struct inode *inode, long pages);
|
||||
extern void shmem_uncharge(struct inode *inode, long pages);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
|
||||
extern bool shmem_huge_enabled(struct vm_area_struct *vma);
|
||||
#else
|
||||
static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SHMEM
|
||||
extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
|
||||
struct vm_area_struct *dst_vma,
|
||||
|
|
|
@ -19,4 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
|
|||
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
|
||||
unsigned long **entries);
|
||||
|
||||
unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
|
|||
|
||||
if (pte_swp_soft_dirty(pte))
|
||||
pte = pte_swp_clear_soft_dirty(pte);
|
||||
if (pte_swp_uffd_wp(pte))
|
||||
pte = pte_swp_clear_uffd_wp(pte);
|
||||
arch_entry = __pte_to_swp_entry(pte);
|
||||
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
|
||||
}
|
||||
|
@ -348,7 +350,8 @@ static inline void num_poisoned_pages_inc(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
|
||||
#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \
|
||||
defined(CONFIG_DEVICE_PRIVATE)
|
||||
static inline int non_swap_entry(swp_entry_t entry)
|
||||
{
|
||||
return swp_type(entry) >= MAX_SWAPFILES;
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */
|
||||
|
||||
#include <linux/fcntl.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm-generic/pgtable_uffd.h>
|
||||
|
||||
/*
|
||||
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
|
||||
|
@ -34,11 +36,14 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
|
|||
|
||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len,
|
||||
bool *mmap_changing);
|
||||
bool *mmap_changing, __u64 mode);
|
||||
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
|
||||
unsigned long dst_start,
|
||||
unsigned long len,
|
||||
bool *mmap_changing);
|
||||
extern int mwriteprotect_range(struct mm_struct *dst_mm,
|
||||
unsigned long start, unsigned long len,
|
||||
bool enable_wp, bool *mmap_changing);
|
||||
|
||||
/* mm helpers */
|
||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||
|
@ -52,6 +57,23 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma)
|
|||
return vma->vm_flags & VM_UFFD_MISSING;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & VM_UFFD_WP;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
|
||||
pte_t pte)
|
||||
{
|
||||
return userfaultfd_wp(vma) && pte_uffd_wp(pte);
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
|
||||
pmd_t pmd)
|
||||
{
|
||||
return userfaultfd_wp(vma) && pmd_uffd_wp(pmd);
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
|
||||
{
|
||||
return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
|
||||
|
@ -96,6 +118,24 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
|
||||
pte_t pte)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
|
||||
pmd_t pmd)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -73,9 +73,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
|||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
THP_FAULT_ALLOC,
|
||||
THP_FAULT_FALLBACK,
|
||||
THP_FAULT_FALLBACK_CHARGE,
|
||||
THP_COLLAPSE_ALLOC,
|
||||
THP_COLLAPSE_ALLOC_FAILED,
|
||||
THP_FILE_ALLOC,
|
||||
THP_FILE_FALLBACK,
|
||||
THP_FILE_FALLBACK_CHARGE,
|
||||
THP_FILE_MAPPED,
|
||||
THP_SPLIT_PAGE,
|
||||
THP_SPLIT_PAGE_FAILED,
|
||||
|
@ -115,6 +118,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
|||
|
||||
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
|
||||
#define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
|
||||
#define THP_FILE_FALLBACK_CHARGE ({ BUILD_BUG(); 0; })
|
||||
#define THP_FILE_MAPPED ({ BUILD_BUG(); 0; })
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
EM( SCAN_PMD_NULL, "pmd_null") \
|
||||
EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \
|
||||
EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \
|
||||
EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \
|
||||
EM( SCAN_PAGE_RO, "no_writable_page") \
|
||||
EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \
|
||||
EM( SCAN_PAGE_NULL, "page_null") \
|
||||
|
|
|
@ -154,6 +154,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
|
|||
{VM_ACCOUNT, "account" }, \
|
||||
{VM_NORESERVE, "noreserve" }, \
|
||||
{VM_HUGETLB, "hugetlb" }, \
|
||||
{VM_SYNC, "sync" }, \
|
||||
__VM_ARCH_SPECIFIC_1 , \
|
||||
{VM_WIPEONFORK, "wipeonfork" }, \
|
||||
{VM_DONTDUMP, "dontdump" }, \
|
||||
|
|
|
@ -323,7 +323,7 @@ TRACE_EVENT(mm_vmscan_writepage,
|
|||
TP_fast_assign(
|
||||
__entry->pfn = page_to_pfn(page);
|
||||
__entry->reclaim_flags = trace_reclaim_flags(
|
||||
page_is_file_cache(page));
|
||||
page_is_file_lru(page));
|
||||
),
|
||||
|
||||
TP_printk("page=%p pfn=%lu flags=%s",
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
* means the userland is reading).
|
||||
*/
|
||||
#define UFFD_API ((__u64)0xAA)
|
||||
#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \
|
||||
#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
|
||||
UFFD_FEATURE_EVENT_FORK | \
|
||||
UFFD_FEATURE_EVENT_REMAP | \
|
||||
UFFD_FEATURE_EVENT_REMOVE | \
|
||||
UFFD_FEATURE_EVENT_UNMAP | \
|
||||
|
@ -34,7 +35,8 @@
|
|||
#define UFFD_API_RANGE_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_WAKE | \
|
||||
(__u64)1 << _UFFDIO_COPY | \
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE)
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE | \
|
||||
(__u64)1 << _UFFDIO_WRITEPROTECT)
|
||||
#define UFFD_API_RANGE_IOCTLS_BASIC \
|
||||
((__u64)1 << _UFFDIO_WAKE | \
|
||||
(__u64)1 << _UFFDIO_COPY)
|
||||
|
@ -52,6 +54,7 @@
|
|||
#define _UFFDIO_WAKE (0x02)
|
||||
#define _UFFDIO_COPY (0x03)
|
||||
#define _UFFDIO_ZEROPAGE (0x04)
|
||||
#define _UFFDIO_WRITEPROTECT (0x06)
|
||||
#define _UFFDIO_API (0x3F)
|
||||
|
||||
/* userfaultfd ioctl ids */
|
||||
|
@ -68,6 +71,8 @@
|
|||
struct uffdio_copy)
|
||||
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
|
||||
struct uffdio_zeropage)
|
||||
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
|
||||
struct uffdio_writeprotect)
|
||||
|
||||
/* read() structure */
|
||||
struct uffd_msg {
|
||||
|
@ -203,13 +208,14 @@ struct uffdio_copy {
|
|||
__u64 dst;
|
||||
__u64 src;
|
||||
__u64 len;
|
||||
/*
|
||||
* There will be a wrprotection flag later that allows to map
|
||||
* pages wrprotected on the fly. And such a flag will be
|
||||
* available if the wrprotection ioctl are implemented for the
|
||||
* range according to the uffdio_register.ioctls.
|
||||
*/
|
||||
#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
|
||||
/*
|
||||
* UFFDIO_COPY_MODE_WP will map the page write protected on
|
||||
* the fly. UFFDIO_COPY_MODE_WP is available only if the
|
||||
* write protected ioctl is implemented for the range
|
||||
* according to the uffdio_register.ioctls.
|
||||
*/
|
||||
#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
|
||||
__u64 mode;
|
||||
|
||||
/*
|
||||
|
@ -231,4 +237,24 @@ struct uffdio_zeropage {
|
|||
__s64 zeropage;
|
||||
};
|
||||
|
||||
struct uffdio_writeprotect {
|
||||
struct uffdio_range range;
|
||||
/*
|
||||
* UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
|
||||
* unset the flag to undo protection of a range which was previously
|
||||
* write protected.
|
||||
*
|
||||
* UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
|
||||
* any wait thread after the operation succeeds.
|
||||
*
|
||||
* NOTE: Write protecting a region (WP=1) is unrelated to page faults,
|
||||
* therefore DONTWAKE flag is meaningless with WP=1. Removing write
|
||||
* protection (WP=0) in response to a page fault wakes the faulting
|
||||
* task unless DONTWAKE is set.
|
||||
*/
|
||||
#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
|
||||
#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
|
||||
__u64 mode;
|
||||
};
|
||||
|
||||
#endif /* _LINUX_USERFAULTFD_H */
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
|
||||
#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
|
||||
#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */
|
||||
#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */
|
||||
|
||||
/* Size of a PFN in the balloon interface. */
|
||||
#define VIRTIO_BALLOON_PFN_SHIFT 12
|
||||
|
|
|
@ -872,7 +872,7 @@ config BLK_CGROUP
|
|||
This option only enables generic Block IO controller infrastructure.
|
||||
One needs to also enable actual IO controlling logic/policy. For
|
||||
enabling proportional weight division of disk bandwidth in CFQ, set
|
||||
CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
|
||||
CONFIG_BFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
|
||||
CONFIG_BLK_DEV_THROTTLING=y.
|
||||
|
||||
See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
|
||||
|
@ -1538,7 +1538,6 @@ config AIO
|
|||
|
||||
config IO_URING
|
||||
bool "Enable IO uring support" if EXPERT
|
||||
select ANON_INODES
|
||||
select IO_WQ
|
||||
default y
|
||||
help
|
||||
|
@ -1556,6 +1555,11 @@ config ADVISE_SYSCALLS
|
|||
applications use these syscalls, you can disable this option to save
|
||||
space.
|
||||
|
||||
config HAVE_ARCH_USERFAULTFD_WP
|
||||
bool
|
||||
help
|
||||
Arch has userfaultfd write protection support
|
||||
|
||||
config MEMBARRIER
|
||||
bool "Enable membarrier() system call" if EXPERT
|
||||
default y
|
||||
|
|
|
@ -239,11 +239,10 @@ static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
|
|||
info->msg_tree_rightmost = rb_prev(node);
|
||||
|
||||
rb_erase(node, &info->msg_tree);
|
||||
if (info->node_cache) {
|
||||
if (info->node_cache)
|
||||
kfree(leaf);
|
||||
} else {
|
||||
else
|
||||
info->node_cache = leaf;
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
|
||||
|
|
|
@ -1332,7 +1332,7 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
|
|||
}
|
||||
}
|
||||
|
||||
long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
|
||||
static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
|
||||
{
|
||||
struct ipc_namespace *ns;
|
||||
struct shmid64_ds sem64;
|
||||
|
|
|
@ -885,6 +885,7 @@ static int sysvipc_proc_release(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
static const struct proc_ops sysvipc_proc_ops = {
|
||||
.proc_flags = PROC_ENTRY_PERMANENT,
|
||||
.proc_open = sysvipc_proc_open,
|
||||
.proc_read = seq_read,
|
||||
.proc_lseek = seq_lseek,
|
||||
|
|
|
@ -6,7 +6,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
|
|||
CONFIG_KERNEL_XZ=y
|
||||
# CONFIG_KERNEL_LZO is not set
|
||||
# CONFIG_KERNEL_LZ4 is not set
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
# CONFIG_SLAB is not set
|
||||
# CONFIG_SLUB is not set
|
||||
CONFIG_SLOB=y
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <linux/export.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
@ -7973,7 +7974,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
|||
flags |= MAP_EXECUTABLE;
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
flags |= MAP_LOCKED;
|
||||
if (vma->vm_flags & VM_HUGETLB)
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
flags |= MAP_HUGETLB;
|
||||
|
||||
if (file) {
|
||||
|
|
|
@ -34,7 +34,8 @@ u32 __initdata __visible main_extable_sort_needed = 1;
|
|||
/* Sort the kernel's built-in exception table */
|
||||
void __init sort_main_extable(void)
|
||||
{
|
||||
if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) {
|
||||
if (main_extable_sort_needed &&
|
||||
&__stop___ex_table > &__start___ex_table) {
|
||||
pr_notice("Sorting __ex_table...\n");
|
||||
sort_extable(__start___ex_table, __stop___ex_table);
|
||||
}
|
||||
|
|
|
@ -361,6 +361,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
|||
if (new) {
|
||||
*new = *orig;
|
||||
INIT_LIST_HEAD(&new->anon_vma_chain);
|
||||
new->vm_next = new->vm_prev = NULL;
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
@ -553,14 +554,15 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
|||
if (retval)
|
||||
goto fail_nomem_anon_vma_fork;
|
||||
if (tmp->vm_flags & VM_WIPEONFORK) {
|
||||
/* VM_WIPEONFORK gets a clean slate in the child. */
|
||||
/*
|
||||
* VM_WIPEONFORK gets a clean slate in the child.
|
||||
* Don't prepare anon_vma until fault since we don't
|
||||
* copy page for current vma.
|
||||
*/
|
||||
tmp->anon_vma = NULL;
|
||||
if (anon_vma_prepare(tmp))
|
||||
goto fail_nomem_anon_vma_fork;
|
||||
} else if (anon_vma_fork(tmp, mpnt))
|
||||
goto fail_nomem_anon_vma_fork;
|
||||
tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
|
||||
tmp->vm_next = tmp->vm_prev = NULL;
|
||||
file = tmp->vm_file;
|
||||
if (file) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
|
|
@ -58,7 +58,7 @@ struct gcov_node {
|
|||
struct dentry *dentry;
|
||||
struct dentry **links;
|
||||
int num_loaded;
|
||||
char name[0];
|
||||
char name[];
|
||||
};
|
||||
|
||||
static const char objtree[] = OBJTREE;
|
||||
|
|
|
@ -38,7 +38,7 @@ static struct gcov_info *gcov_info_head;
|
|||
struct gcov_fn_info {
|
||||
unsigned int ident;
|
||||
unsigned int checksum;
|
||||
unsigned int n_ctrs[0];
|
||||
unsigned int n_ctrs[];
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -78,7 +78,7 @@ struct gcov_info {
|
|||
unsigned int n_functions;
|
||||
const struct gcov_fn_info *functions;
|
||||
unsigned int ctr_mask;
|
||||
struct gcov_ctr_info counts[0];
|
||||
struct gcov_ctr_info counts[];
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -352,7 +352,7 @@ struct gcov_iterator {
|
|||
unsigned int count;
|
||||
|
||||
int num_types;
|
||||
struct type_info type_info[0];
|
||||
struct type_info type_info[];
|
||||
};
|
||||
|
||||
static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
|
||||
|
|
|
@ -68,7 +68,7 @@ struct gcov_fn_info {
|
|||
unsigned int ident;
|
||||
unsigned int lineno_checksum;
|
||||
unsigned int cfg_checksum;
|
||||
struct gcov_ctr_info ctrs[0];
|
||||
struct gcov_ctr_info ctrs[];
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -175,7 +175,6 @@ unsigned long kallsyms_lookup_name(const char *name)
|
|||
}
|
||||
return module_kallsyms_lookup_name(name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
|
||||
|
||||
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
|
||||
unsigned long),
|
||||
|
@ -194,7 +193,6 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
|
|||
}
|
||||
return module_kallsyms_on_each_symbol(fn, data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kallsyms_on_each_symbol);
|
||||
|
||||
static unsigned long get_symbol_pos(unsigned long addr,
|
||||
unsigned long *symbolsize,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user