Merge branch 'akpm' (patches from Andrew)
Merge fifth set of updates from Andrew Morton: - A few things which were awaiting merges from linux-next: - rtc - ocfs2 - misc others - Willy's "dax" feature: direct fs access to memory (mainly NV-DIMMs) which isn't backed by pageframes. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (37 commits) rtc: add driver for DS1685 family of real time clocks MAINTAINERS: add entry for Maxim PMICs on Samsung boards lib/Kconfig: use bool instead of boolean powerpc: drop _PAGE_FILE and pte_file()-related helpers ocfs2: set append dio as a ro compat feature ocfs2: wait for orphan recovery first once append O_DIRECT write crash ocfs2: complete the rest request through buffer io ocfs2: do not fallback to buffer I/O write if appending ocfs2: allocate blocks in ocfs2_direct_IO_get_blocks ocfs2: implement ocfs2_direct_IO_write ocfs2: add orphan recovery types in ocfs2_recover_orphans ocfs2: add functions to add and remove inode in orphan dir ocfs2: prepare some interfaces used in append direct io MAINTAINERS: fix spelling mistake & remove trailing WS dax: does not work correctly with virtual aliasing caches brd: rename XIP to DAX ext4: add DAX functionality dax: add dax_zero_page_range ext2: get rid of most mentions of XIP in ext2 ext2: remove ext2_aops_xip ...
This commit is contained in:
commit
c397f8fa43
@ -34,6 +34,9 @@ configfs/
|
||||
- directory containing configfs documentation and example code.
|
||||
cramfs.txt
|
||||
- info on the cram filesystem for small storage (ROMs etc).
|
||||
dax.txt
|
||||
- info on avoiding the page cache for files stored on CPU-addressable
|
||||
storage devices.
|
||||
debugfs.txt
|
||||
- info on the debugfs filesystem.
|
||||
devpts.txt
|
||||
@ -154,5 +157,3 @@ xfs-self-describing-metadata.txt
|
||||
- info on XFS Self Describing Metadata.
|
||||
xfs.txt
|
||||
- info and mount options for the XFS filesystem.
|
||||
xip.txt
|
||||
- info on execute-in-place for file mappings.
|
||||
|
@ -199,8 +199,6 @@ prototypes:
|
||||
int (*releasepage) (struct page *, int);
|
||||
void (*freepage)(struct page *);
|
||||
int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
|
||||
int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
|
||||
unsigned long *);
|
||||
int (*migratepage)(struct address_space *, struct page *, struct page *);
|
||||
int (*launder_page)(struct page *);
|
||||
int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
|
||||
@ -225,7 +223,6 @@ invalidatepage: yes
|
||||
releasepage: yes
|
||||
freepage: yes
|
||||
direct_IO:
|
||||
get_xip_mem: maybe
|
||||
migratepage: yes (both)
|
||||
launder_page: yes
|
||||
is_partially_uptodate: yes
|
||||
|
94
Documentation/filesystems/dax.txt
Normal file
94
Documentation/filesystems/dax.txt
Normal file
@ -0,0 +1,94 @@
|
||||
Direct Access for files
|
||||
-----------------------
|
||||
|
||||
Motivation
|
||||
----------
|
||||
|
||||
The page cache is usually used to buffer reads and writes to files.
|
||||
It is also used to provide the pages which are mapped into userspace
|
||||
by a call to mmap.
|
||||
|
||||
For block devices that are memory-like, the page cache pages would be
|
||||
unnecessary copies of the original storage. The DAX code removes the
|
||||
extra copy by performing reads and writes directly to the storage device.
|
||||
For file mappings, the storage device is mapped directly into userspace.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
If you have a block device which supports DAX, you can make a filesystem
|
||||
on it as usual. When mounting it, use the -o dax option manually
|
||||
or add 'dax' to the options in /etc/fstab.
|
||||
|
||||
|
||||
Implementation Tips for Block Driver Writers
|
||||
--------------------------------------------
|
||||
|
||||
To support DAX in your block driver, implement the 'direct_access'
|
||||
block device operation. It is used to translate the sector number
|
||||
(expressed in units of 512-byte sectors) to a page frame number (pfn)
|
||||
that identifies the physical page for the memory. It also returns a
|
||||
kernel virtual address that can be used to access the memory.
|
||||
|
||||
The direct_access method takes a 'size' parameter that indicates the
|
||||
number of bytes being requested. The function should return the number
|
||||
of bytes that can be contiguously accessed at that offset. It may also
|
||||
return a negative errno if an error occurs.
|
||||
|
||||
In order to support this method, the storage must be byte-accessible by
|
||||
the CPU at all times. If your device uses paging techniques to expose
|
||||
a large amount of memory through a smaller window, then you cannot
|
||||
implement direct_access. Equally, if your device can occasionally
|
||||
stall the CPU for an extended period, you should also not attempt to
|
||||
implement direct_access.
|
||||
|
||||
These block devices may be used for inspiration:
|
||||
- axonram: Axon DDR2 device driver
|
||||
- brd: RAM backed block device driver
|
||||
- dcssblk: s390 dcss block device driver
|
||||
|
||||
|
||||
Implementation Tips for Filesystem Writers
|
||||
------------------------------------------
|
||||
|
||||
Filesystem support consists of
|
||||
- adding support to mark inodes as being DAX by setting the S_DAX flag in
|
||||
i_flags
|
||||
- implementing the direct_IO address space operation, and calling
|
||||
dax_do_io() instead of blockdev_direct_IO() if S_DAX is set
|
||||
- implementing an mmap file operation for DAX files which sets the
|
||||
VM_MIXEDMAP flag on the VMA, and setting the vm_ops to include handlers
|
||||
for fault and page_mkwrite (which should probably call dax_fault() and
|
||||
dax_mkwrite(), passing the appropriate get_block() callback)
|
||||
- calling dax_truncate_page() instead of block_truncate_page() for DAX files
|
||||
- calling dax_zero_page_range() instead of zero_user() for DAX files
|
||||
- ensuring that there is sufficient locking between reads, writes,
|
||||
truncates and page faults
|
||||
|
||||
The get_block() callback passed to the DAX functions may return
|
||||
uninitialised extents. If it does, it must ensure that simultaneous
|
||||
calls to get_block() (for example by a page-fault racing with a read()
|
||||
or a write()) work correctly.
|
||||
|
||||
These filesystems may be used for inspiration:
|
||||
- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
|
||||
- ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt
|
||||
|
||||
|
||||
Shortcomings
|
||||
------------
|
||||
|
||||
Even if the kernel or its modules are stored on a filesystem that supports
|
||||
DAX on a block device that supports DAX, they will still be copied into RAM.
|
||||
|
||||
The DAX code does not work correctly on architectures which have virtually
|
||||
mapped caches such as ARM, MIPS and SPARC.
|
||||
|
||||
Calling get_user_pages() on a range of user memory that has been mmaped
|
||||
from a DAX file will fail as there are no 'struct page' to describe
|
||||
those pages. This problem is being worked on. That means that O_DIRECT
|
||||
reads/writes to those memory ranges from a non-DAX file will fail (note
|
||||
that O_DIRECT reads/writes _of a DAX file_ do work, it is the memory
|
||||
that is being accessed that is key here). Other things that will not
|
||||
work include RDMA, sendfile() and splice().
|
@ -20,6 +20,9 @@ minixdf Makes `df' act like Minix.
|
||||
check=none, nocheck (*) Don't do extra checking of bitmaps on mount
|
||||
(check=normal and check=strict options removed)
|
||||
|
||||
dax Use direct access (no page cache). See
|
||||
Documentation/filesystems/dax.txt.
|
||||
|
||||
debug Extra debugging information is sent to the
|
||||
kernel syslog. Useful for developers.
|
||||
|
||||
@ -56,8 +59,6 @@ noacl Don't support POSIX ACLs.
|
||||
|
||||
nobh Do not attach buffer_heads to file pagecache.
|
||||
|
||||
xip Use execute in place (no caching) if possible
|
||||
|
||||
grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
|
||||
|
||||
|
||||
|
@ -386,6 +386,10 @@ max_dir_size_kb=n This limits the size of directories so that any
|
||||
i_version Enable 64-bit inode version support. This option is
|
||||
off by default.
|
||||
|
||||
dax Use direct access (no page cache). See
|
||||
Documentation/filesystems/dax.txt. Note that
|
||||
this option is incompatible with data=journal.
|
||||
|
||||
Data Mode
|
||||
=========
|
||||
There are 3 different data modes:
|
||||
|
@ -591,8 +591,6 @@ struct address_space_operations {
|
||||
int (*releasepage) (struct page *, int);
|
||||
void (*freepage)(struct page *);
|
||||
ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
|
||||
struct page* (*get_xip_page)(struct address_space *, sector_t,
|
||||
int);
|
||||
/* migrate the contents of a page to the specified target */
|
||||
int (*migratepage) (struct page *, struct page *);
|
||||
int (*launder_page) (struct page *);
|
||||
@ -748,11 +746,6 @@ struct address_space_operations {
|
||||
and transfer data directly between the storage and the
|
||||
application's address space.
|
||||
|
||||
get_xip_page: called by the VM to translate a block number to a page.
|
||||
The page is valid until the corresponding filesystem is unmounted.
|
||||
Filesystems that want to use execute-in-place (XIP) need to implement
|
||||
it. An example implementation can be found in fs/ext2/xip.c.
|
||||
|
||||
migrate_page: This is used to compact the physical memory usage.
|
||||
If the VM wants to relocate a page (maybe off a memory card
|
||||
that is signalling imminent failure) it will pass a new page
|
||||
|
@ -1,71 +0,0 @@
|
||||
Execute-in-place for file mappings
|
||||
----------------------------------
|
||||
|
||||
Motivation
|
||||
----------
|
||||
File mappings are performed by mapping page cache pages to userspace. In
|
||||
addition, read&write type file operations also transfer data from/to the page
|
||||
cache.
|
||||
|
||||
For memory backed storage devices that use the block device interface, the page
|
||||
cache pages are in fact copies of the original storage. Various approaches
|
||||
exist to work around the need for an extra copy. The ramdisk driver for example
|
||||
does read the data into the page cache, keeps a reference, and discards the
|
||||
original data behind later on.
|
||||
|
||||
Execute-in-place solves this issue the other way around: instead of keeping
|
||||
data in the page cache, the need to have a page cache copy is eliminated
|
||||
completely. With execute-in-place, read&write type operations are performed
|
||||
directly from/to the memory backed storage device. For file mappings, the
|
||||
storage device itself is mapped directly into userspace.
|
||||
|
||||
This implementation was initially written for shared memory segments between
|
||||
different virtual machines on s390 hardware to allow multiple machines to
|
||||
share the same binaries and libraries.
|
||||
|
||||
Implementation
|
||||
--------------
|
||||
Execute-in-place is implemented in three steps: block device operation,
|
||||
address space operation, and file operations.
|
||||
|
||||
A block device operation named direct_access is used to translate the
|
||||
block device sector number to a page frame number (pfn) that identifies
|
||||
the physical page for the memory. It also returns a kernel virtual
|
||||
address that can be used to access the memory.
|
||||
|
||||
The direct_access method takes a 'size' parameter that indicates the
|
||||
number of bytes being requested. The function should return the number
|
||||
of bytes that can be contiguously accessed at that offset. It may also
|
||||
return a negative errno if an error occurs.
|
||||
|
||||
The block device operation is optional, these block devices support it as of
|
||||
today:
|
||||
- dcssblk: s390 dcss block device driver
|
||||
|
||||
An address space operation named get_xip_mem is used to retrieve references
|
||||
to a page frame number and a kernel address. To obtain these values a reference
|
||||
to an address_space is provided. This function assigns values to the kmem and
|
||||
pfn parameters. The third argument indicates whether the function should allocate
|
||||
blocks if needed.
|
||||
|
||||
This address space operation is mutually exclusive with readpage&writepage that
|
||||
do page cache read/write operations.
|
||||
The following filesystems support it as of today:
|
||||
- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
|
||||
|
||||
A set of file operations that do utilize get_xip_page can be found in
|
||||
mm/filemap_xip.c . The following file operation implementations are provided:
|
||||
- aio_read/aio_write
|
||||
- readv/writev
|
||||
- sendfile
|
||||
|
||||
The generic file operations do_sync_read/do_sync_write can be used to implement
|
||||
classic synchronous IO calls.
|
||||
|
||||
Shortcomings
|
||||
------------
|
||||
This implementation is limited to storage devices that are cpu addressable at
|
||||
all times (no highmem or such). It works well on rom/ram, but enhancements are
|
||||
needed to make it work with flash in read+write mode.
|
||||
Putting the Linux kernel and/or its modules on a xip filesystem does not mean
|
||||
they are not copied.
|
34
MAINTAINERS
34
MAINTAINERS
@ -34,7 +34,7 @@ trivial patch so apply some common sense.
|
||||
generalized kernel feature ready for next time.
|
||||
|
||||
PLEASE check your patch with the automated style checker
|
||||
(scripts/checkpatch.pl) to catch trival style violations.
|
||||
(scripts/checkpatch.pl) to catch trivial style violations.
|
||||
See Documentation/CodingStyle for guidance here.
|
||||
|
||||
PLEASE CC: the maintainers and mailing lists that are generated
|
||||
@ -2965,6 +2965,12 @@ S: Supported
|
||||
F: drivers/input/touchscreen/cyttsp*
|
||||
F: include/linux/input/cyttsp.h
|
||||
|
||||
DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK
|
||||
M: Joshua Kinard <kumba@gentoo.org>
|
||||
S: Maintained
|
||||
F: drivers/rtc/rtc-ds1685.c
|
||||
F: include/linux/rtc/ds1685.h
|
||||
|
||||
DAMA SLAVE for AX.25
|
||||
M: Joerg Reuter <jreuter@yaina.de>
|
||||
W: http://yaina.de/jreuter/
|
||||
@ -3153,6 +3159,12 @@ L: linux-i2c@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/i2c/busses/i2c-diolan-u2c.c
|
||||
|
||||
DIRECT ACCESS (DAX)
|
||||
M: Matthew Wilcox <willy@linux.intel.com>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Supported
|
||||
F: fs/dax.c
|
||||
|
||||
DIRECTORY NOTIFICATION (DNOTIFY)
|
||||
M: Eric Paris <eparis@parisplace.org>
|
||||
S: Maintained
|
||||
@ -6212,6 +6224,26 @@ S: Supported
|
||||
F: drivers/power/max14577_charger.c
|
||||
F: drivers/power/max77693_charger.c
|
||||
|
||||
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
|
||||
M: Chanwoo Choi <cw00.choi@samsung.com>
|
||||
M: Krzysztof Kozlowski <k.kozlowski@samsung.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/*/max14577.c
|
||||
F: drivers/*/max77686.c
|
||||
F: drivers/*/max77693.c
|
||||
F: drivers/extcon/extcon-max14577.c
|
||||
F: drivers/extcon/extcon-max77693.c
|
||||
F: drivers/rtc/rtc-max77686.c
|
||||
F: drivers/clk/clk-max77686.c
|
||||
F: Documentation/devicetree/bindings/mfd/max14577.txt
|
||||
F: Documentation/devicetree/bindings/mfd/max77686.txt
|
||||
F: Documentation/devicetree/bindings/mfd/max77693.txt
|
||||
F: Documentation/devicetree/bindings/clock/maxim,max77686.txt
|
||||
F: include/linux/mfd/max14577*.h
|
||||
F: include/linux/mfd/max77686*.h
|
||||
F: include/linux/mfd/max77693*.h
|
||||
|
||||
MAXIRADIO FM RADIO RECEIVER DRIVER
|
||||
M: Hans Verkuil <hverkuil@xs4all.nl>
|
||||
L: linux-media@vger.kernel.org
|
||||
|
@ -58,7 +58,7 @@ &i2c0 {
|
||||
status = "okay";
|
||||
|
||||
isl9305: isl9305@68 {
|
||||
compatible = "isl,isl9305";
|
||||
compatible = "isil,isl9305";
|
||||
reg = <0x68>;
|
||||
|
||||
regulators {
|
||||
|
@ -333,8 +333,8 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
|
||||
/*
|
||||
* Encode and decode a swap entry.
|
||||
* Note that the bits we use in a PTE for representing a swap entry
|
||||
* must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
|
||||
*_PAGE_HASHPTE bit (if used). -- paulus
|
||||
* must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
|
||||
* -- paulus
|
||||
*/
|
||||
#define __swp_type(entry) ((entry).val & 0x1f)
|
||||
#define __swp_offset(entry) ((entry).val >> 5)
|
||||
@ -342,11 +342,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
|
||||
|
||||
/* Encode and decode a nonlinear file mapping entry */
|
||||
#define PTE_FILE_MAX_BITS 29
|
||||
#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
|
||||
#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
|
||||
|
||||
#ifndef CONFIG_PPC_4K_PAGES
|
||||
void pgtable_cache_init(void);
|
||||
#else
|
||||
|
@ -352,9 +352,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
|
||||
#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
|
||||
#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
|
||||
#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
|
||||
#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
|
||||
|
||||
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
|
||||
void pgtable_cache_init(void);
|
||||
@ -389,7 +386,7 @@ void pgtable_cache_init(void);
|
||||
* The last three bits are intentionally left to zero. This memory location
|
||||
* are also used as normal page PTE pointers. So if we have any pointers
|
||||
* left around while we collapse a hugepage, we need to make sure
|
||||
* _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them
|
||||
* _PAGE_PRESENT bit of that is zero when we look at them
|
||||
*/
|
||||
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
|
||||
{
|
||||
|
@ -34,7 +34,6 @@ static inline int pte_write(pte_t pte)
|
||||
{ return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; }
|
||||
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
|
||||
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
||||
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
|
||||
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
|
||||
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
|
||||
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
|
||||
|
@ -38,7 +38,6 @@
|
||||
*/
|
||||
|
||||
#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
|
||||
#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
|
||||
#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
|
||||
#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
|
||||
#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
|
||||
|
@ -44,9 +44,6 @@
|
||||
* - PRESENT *must* be in the bottom three bits because swap cache
|
||||
* entries use the top 29 bits for TLB2.
|
||||
*
|
||||
* - FILE *must* be in the bottom three bits because swap cache
|
||||
* entries use the top 29 bits for TLB2.
|
||||
*
|
||||
* - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
|
||||
* because it doesn't support SMP. However, some later 460 variants
|
||||
* have -some- form of SMP support and so I keep the bit there for
|
||||
@ -68,7 +65,6 @@
|
||||
*
|
||||
* There are three protection bits available for SWAP entry:
|
||||
* _PAGE_PRESENT
|
||||
* _PAGE_FILE
|
||||
* _PAGE_HASHPTE (if HW has)
|
||||
*
|
||||
* So those three bits have to be inside of 0-2nd LSB of PTE.
|
||||
@ -77,7 +73,6 @@
|
||||
|
||||
#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */
|
||||
#define _PAGE_RW 0x00000002 /* S: Write permission */
|
||||
#define _PAGE_FILE 0x00000004 /* S: nonlinear file mapping */
|
||||
#define _PAGE_EXEC 0x00000004 /* H: Execute permission */
|
||||
#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */
|
||||
#define _PAGE_DIRTY 0x00000010 /* S: Page dirty */
|
||||
|
@ -29,7 +29,6 @@
|
||||
|
||||
/* Definitions for 8xx embedded chips. */
|
||||
#define _PAGE_PRESENT 0x0001 /* Page is valid */
|
||||
#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */
|
||||
#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
|
||||
#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
|
||||
#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
/* Architected bits */
|
||||
#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
|
||||
#define _PAGE_FILE 0x000002 /* (!present only) software: pte holds file offset */
|
||||
#define _PAGE_SW1 0x000002
|
||||
#define _PAGE_BAP_SR 0x000004
|
||||
#define _PAGE_BAP_UR 0x000008
|
||||
|
@ -13,14 +13,11 @@
|
||||
- PRESENT *must* be in the bottom three bits because swap cache
|
||||
entries use the top 29 bits.
|
||||
|
||||
- FILE *must* be in the bottom three bits because swap cache
|
||||
entries use the top 29 bits.
|
||||
*/
|
||||
|
||||
/* Definitions for FSL Book-E Cores */
|
||||
#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */
|
||||
#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */
|
||||
#define _PAGE_FILE 0x00002 /* S: when !present: nonlinear file mapping */
|
||||
#define _PAGE_RW 0x00004 /* S: Write permission (SW) */
|
||||
#define _PAGE_DIRTY 0x00008 /* S: Page dirty */
|
||||
#define _PAGE_EXEC 0x00010 /* H: SX permission */
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
|
||||
#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
|
||||
#define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */
|
||||
#define _PAGE_USER 0x004 /* usermode access allowed */
|
||||
#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
|
||||
#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
|
||||
|
@ -16,7 +16,6 @@
|
||||
*/
|
||||
#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
|
||||
#define _PAGE_USER 0x0002 /* matches one of the PP bits */
|
||||
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
|
||||
#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
|
||||
#define _PAGE_GUARDED 0x0008
|
||||
/* We can derive Memory coherence from _PAGE_NO_CACHE */
|
||||
|
@ -782,7 +782,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
|
||||
{
|
||||
pmd_t pmd;
|
||||
/*
|
||||
* For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
|
||||
* For a valid pte, we would have _PAGE_PRESENT always
|
||||
* set. We use this to check THP page at pmd level.
|
||||
* leaf pte for huge page, bottom two bits != 00
|
||||
*/
|
||||
|
@ -393,14 +393,15 @@ config BLK_DEV_RAM_SIZE
|
||||
The default value is 4096 kilobytes. Only change this if you know
|
||||
what you are doing.
|
||||
|
||||
config BLK_DEV_XIP
|
||||
bool "Support XIP filesystems on RAM block device"
|
||||
depends on BLK_DEV_RAM
|
||||
config BLK_DEV_RAM_DAX
|
||||
bool "Support Direct Access (DAX) to RAM block devices"
|
||||
depends on BLK_DEV_RAM && FS_DAX
|
||||
default n
|
||||
help
|
||||
Support XIP filesystems (such as ext2 with XIP support on) on
|
||||
top of block ram device. This will slightly enlarge the kernel, and
|
||||
will prevent RAM block device backing store memory from being
|
||||
Support filesystems using DAX to access RAM block devices. This
|
||||
avoids double-buffering data in the page cache before copying it
|
||||
to the block device. Answering Y will slightly enlarge the kernel,
|
||||
and will prevent RAM block device backing store memory from being
|
||||
allocated from highmem (only a problem for highmem systems).
|
||||
|
||||
config CDROM_PKTCDVD
|
||||
|
@ -97,13 +97,13 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
||||
* Must use NOIO because we don't want to recurse back into the
|
||||
* block or filesystem layers from page reclaim.
|
||||
*
|
||||
* Cannot support XIP and highmem, because our ->direct_access
|
||||
* routine for XIP must return memory that is always addressable.
|
||||
* If XIP was reworked to use pfns and kmap throughout, this
|
||||
* Cannot support DAX and highmem, because our ->direct_access
|
||||
* routine for DAX must return memory that is always addressable.
|
||||
* If DAX was reworked to use pfns and kmap throughout, this
|
||||
* restriction might be able to be lifted.
|
||||
*/
|
||||
gfp_flags = GFP_NOIO | __GFP_ZERO;
|
||||
#ifndef CONFIG_BLK_DEV_XIP
|
||||
#ifndef CONFIG_BLK_DEV_RAM_DAX
|
||||
gfp_flags |= __GFP_HIGHMEM;
|
||||
#endif
|
||||
page = alloc_page(gfp_flags);
|
||||
@ -369,7 +369,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_XIP
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
static long brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
@ -390,6 +390,8 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
*/
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
#else
|
||||
#define brd_direct_access NULL
|
||||
#endif
|
||||
|
||||
static int brd_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
@ -430,9 +432,7 @@ static const struct block_device_operations brd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.rw_page = brd_rw_page,
|
||||
.ioctl = brd_ioctl,
|
||||
#ifdef CONFIG_BLK_DEV_XIP
|
||||
.direct_access = brd_direct_access,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -801,6 +801,96 @@ config RTC_DRV_DS1553
|
||||
This driver can also be built as a module. If so, the module
|
||||
will be called rtc-ds1553.
|
||||
|
||||
config RTC_DRV_DS1685_FAMILY
|
||||
tristate "Dallas/Maxim DS1685 Family"
|
||||
help
|
||||
If you say yes here you get support for the Dallas/Maxim DS1685
|
||||
family of real time chips. This family includes the DS1685/DS1687,
|
||||
DS1689/DS1693, DS17285/DS17287, DS17485/DS17487, and
|
||||
DS17885/DS17887 chips.
|
||||
|
||||
This driver can also be built as a module. If so, the module
|
||||
will be called rtc-ds1685.
|
||||
|
||||
choice
|
||||
prompt "Subtype"
|
||||
depends on RTC_DRV_DS1685_FAMILY
|
||||
default RTC_DRV_DS1685
|
||||
|
||||
config RTC_DRV_DS1685
|
||||
bool "DS1685/DS1687"
|
||||
help
|
||||
This enables support for the Dallas/Maxim DS1685/DS1687 real time
|
||||
clock chip.
|
||||
|
||||
This chip is commonly found in SGI O2 (IP32) and SGI Octane (IP30)
|
||||
systems, as well as EPPC-405-UC modules by electronic system design
|
||||
GmbH.
|
||||
|
||||
config RTC_DRV_DS1689
|
||||
bool "DS1689/DS1693"
|
||||
help
|
||||
This enables support for the Dallas/Maxim DS1689/DS1693 real time
|
||||
clock chip.
|
||||
|
||||
This is an older RTC chip, supplanted by the DS1685/DS1687 above,
|
||||
which supports a few minor features such as Vcc, Vbat, and Power
|
||||
Cycle counters, plus a customer-specific, 8-byte ROM/Serial number.
|
||||
|
||||
It also works for the even older DS1688/DS1691 RTC chips, which are
|
||||
virtually the same and carry the same model number. Both chips
|
||||
have 114 bytes of user NVRAM.
|
||||
|
||||
config RTC_DRV_DS17285
|
||||
bool "DS17285/DS17287"
|
||||
help
|
||||
This enables support for the Dallas/Maxim DS17285/DS17287 real time
|
||||
clock chip.
|
||||
|
||||
This chip features 2kb of extended NV-SRAM. It may possibly be
|
||||
found in some SGI O2 systems (rare).
|
||||
|
||||
config RTC_DRV_DS17485
|
||||
bool "DS17485/DS17487"
|
||||
help
|
||||
This enables support for the Dallas/Maxim DS17485/DS17487 real time
|
||||
clock chip.
|
||||
|
||||
This chip features 4kb of extended NV-SRAM.
|
||||
|
||||
config RTC_DRV_DS17885
|
||||
bool "DS17885/DS17887"
|
||||
help
|
||||
This enables support for the Dallas/Maxim DS17885/DS17887 real time
|
||||
clock chip.
|
||||
|
||||
This chip features 8kb of extended NV-SRAM.
|
||||
|
||||
endchoice
|
||||
|
||||
config RTC_DS1685_PROC_REGS
|
||||
bool "Display register values in /proc"
|
||||
depends on RTC_DRV_DS1685_FAMILY && PROC_FS
|
||||
help
|
||||
Enable this to display a readout of all of the RTC registers in
|
||||
/proc/drivers/rtc. Keep in mind that this can potentially lead
|
||||
to lost interrupts, as reading Control Register C will clear
|
||||
all pending IRQ flags.
|
||||
|
||||
Unless you are debugging this driver, choose N.
|
||||
|
||||
config RTC_DS1685_SYSFS_REGS
|
||||
bool "SysFS access to RTC register bits"
|
||||
depends on RTC_DRV_DS1685_FAMILY && SYSFS
|
||||
help
|
||||
Enable this to provide access to the RTC control register bits
|
||||
in /sys. Some of the bits are read-write, others are read-only.
|
||||
|
||||
Keep in mind that reading Control C's bits automatically clears
|
||||
all pending IRQ flags - this can cause lost interrupts.
|
||||
|
||||
If you know that you need access to these bits, choose Y, Else N.
|
||||
|
||||
config RTC_DRV_DS1742
|
||||
tristate "Maxim/Dallas DS1742/1743"
|
||||
depends on HAS_IOMEM
|
||||
|
@ -54,6 +54,7 @@ obj-$(CONFIG_RTC_DRV_DS1390) += rtc-ds1390.o
|
||||
obj-$(CONFIG_RTC_DRV_DS1511) += rtc-ds1511.o
|
||||
obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
|
||||
obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o
|
||||
obj-$(CONFIG_RTC_DRV_DS1685_FAMILY) += rtc-ds1685.o
|
||||
obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o
|
||||
obj-$(CONFIG_RTC_DRV_DS2404) += rtc-ds2404.o
|
||||
obj-$(CONFIG_RTC_DRV_DS3232) += rtc-ds3232.o
|
||||
|
2252
drivers/rtc/rtc-ds1685.c
Normal file
2252
drivers/rtc/rtc-ds1685.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -275,7 +275,8 @@ static int isl12022_probe(struct i2c_client *client,
|
||||
|
||||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id isl12022_dt_match[] = {
|
||||
{ .compatible = "isl,isl12022" },
|
||||
{ .compatible = "isl,isl12022" }, /* for backward compat., don't use */
|
||||
{ .compatible = "isil,isl12022" },
|
||||
{ },
|
||||
};
|
||||
#endif
|
||||
|
@ -644,7 +644,8 @@ static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend,
|
||||
|
||||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id isl12057_dt_match[] = {
|
||||
{ .compatible = "isl,isl12057" },
|
||||
{ .compatible = "isl,isl12057" }, /* for backward compat., don't use */
|
||||
{ .compatible = "isil,isl12057" },
|
||||
{ },
|
||||
};
|
||||
#endif
|
||||
|
@ -537,8 +537,8 @@ static const struct i2c_device_id isl29028_id[] = {
|
||||
MODULE_DEVICE_TABLE(i2c, isl29028_id);
|
||||
|
||||
static const struct of_device_id isl29028_of_match[] = {
|
||||
{ .compatible = "isl,isl29028", },
|
||||
{ .compatible = "isil,isl29028", },/* deprecated, don't use */
|
||||
{ .compatible = "isl,isl29028", }, /* for backward compat., don't use */
|
||||
{ .compatible = "isil,isl29028", },
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, isl29028_of_match);
|
||||
|
22
fs/Kconfig
22
fs/Kconfig
@ -13,13 +13,6 @@ if BLOCK
|
||||
source "fs/ext2/Kconfig"
|
||||
source "fs/ext3/Kconfig"
|
||||
source "fs/ext4/Kconfig"
|
||||
|
||||
config FS_XIP
|
||||
# execute in place
|
||||
bool
|
||||
depends on EXT2_FS_XIP
|
||||
default y
|
||||
|
||||
source "fs/jbd/Kconfig"
|
||||
source "fs/jbd2/Kconfig"
|
||||
|
||||
@ -40,6 +33,21 @@ source "fs/ocfs2/Kconfig"
|
||||
source "fs/btrfs/Kconfig"
|
||||
source "fs/nilfs2/Kconfig"
|
||||
|
||||
config FS_DAX
|
||||
bool "Direct Access (DAX) support"
|
||||
depends on MMU
|
||||
depends on !(ARM || MIPS || SPARC)
|
||||
help
|
||||
Direct Access (DAX) can be used on memory-backed block devices.
|
||||
If the block device supports DAX and the filesystem supports DAX,
|
||||
then you can avoid using the pagecache to buffer I/Os. Turning
|
||||
on this option will compile in support for DAX; you will need to
|
||||
mount the filesystem using the -o dax option.
|
||||
|
||||
If you do not have a block device that is capable of using this,
|
||||
or if unsure, say N. Saying Y will increase the size of the kernel
|
||||
by about 5kB.
|
||||
|
||||
endif # BLOCK
|
||||
|
||||
# Posix ACL utility routines
|
||||
|
@ -28,6 +28,7 @@ obj-$(CONFIG_SIGNALFD) += signalfd.o
|
||||
obj-$(CONFIG_TIMERFD) += timerfd.o
|
||||
obj-$(CONFIG_EVENTFD) += eventfd.o
|
||||
obj-$(CONFIG_AIO) += aio.o
|
||||
obj-$(CONFIG_FS_DAX) += dax.o
|
||||
obj-$(CONFIG_FILE_LOCKING) += locks.o
|
||||
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
|
||||
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
|
||||
|
534
fs/dax.c
Normal file
534
fs/dax.c
Normal file
@ -0,0 +1,534 @@
|
||||
/*
|
||||
* fs/dax.c - Direct Access filesystem code
|
||||
* Copyright (c) 2013-2014 Intel Corporation
|
||||
* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
|
||||
* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/vmstat.h>
|
||||
|
||||
int dax_clear_blocks(struct inode *inode, sector_t block, long size)
|
||||
{
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
sector_t sector = block << (inode->i_blkbits - 9);
|
||||
|
||||
might_sleep();
|
||||
do {
|
||||
void *addr;
|
||||
unsigned long pfn;
|
||||
long count;
|
||||
|
||||
count = bdev_direct_access(bdev, sector, &addr, &pfn, size);
|
||||
if (count < 0)
|
||||
return count;
|
||||
BUG_ON(size < count);
|
||||
while (count > 0) {
|
||||
unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
|
||||
if (pgsz > count)
|
||||
pgsz = count;
|
||||
if (pgsz < PAGE_SIZE)
|
||||
memset(addr, 0, pgsz);
|
||||
else
|
||||
clear_page(addr);
|
||||
addr += pgsz;
|
||||
size -= pgsz;
|
||||
count -= pgsz;
|
||||
BUG_ON(pgsz & 511);
|
||||
sector += pgsz / 512;
|
||||
cond_resched();
|
||||
}
|
||||
} while (size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_clear_blocks);
|
||||
|
||||
static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
|
||||
{
|
||||
unsigned long pfn;
|
||||
sector_t sector = bh->b_blocknr << (blkbits - 9);
|
||||
return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
|
||||
}
|
||||
|
||||
static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
|
||||
loff_t end)
|
||||
{
|
||||
loff_t final = end - pos + first; /* The final byte of the buffer */
|
||||
|
||||
if (first > 0)
|
||||
memset(addr, 0, first);
|
||||
if (final < size)
|
||||
memset(addr + final, 0, size - final);
|
||||
}
|
||||
|
||||
static bool buffer_written(struct buffer_head *bh)
|
||||
{
|
||||
return buffer_mapped(bh) && !buffer_unwritten(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* When ext4 encounters a hole, it returns without modifying the buffer_head
|
||||
* which means that we can't trust b_size. To cope with this, we set b_state
|
||||
* to 0 before calling get_block and, if any bit is set, we know we can trust
|
||||
* b_size. Unfortunate, really, since ext4 knows precisely how long a hole is
|
||||
* and would save us time calling get_block repeatedly.
|
||||
*/
|
||||
static bool buffer_size_valid(struct buffer_head *bh)
|
||||
{
|
||||
return bh->b_state != 0;
|
||||
}
|
||||
|
||||
static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
|
||||
loff_t start, loff_t end, get_block_t get_block,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
ssize_t retval = 0;
|
||||
loff_t pos = start;
|
||||
loff_t max = start;
|
||||
loff_t bh_max = start;
|
||||
void *addr;
|
||||
bool hole = false;
|
||||
|
||||
if (rw != WRITE)
|
||||
end = min(end, i_size_read(inode));
|
||||
|
||||
while (pos < end) {
|
||||
unsigned len;
|
||||
if (pos == max) {
|
||||
unsigned blkbits = inode->i_blkbits;
|
||||
sector_t block = pos >> blkbits;
|
||||
unsigned first = pos - (block << blkbits);
|
||||
long size;
|
||||
|
||||
if (pos == bh_max) {
|
||||
bh->b_size = PAGE_ALIGN(end - pos);
|
||||
bh->b_state = 0;
|
||||
retval = get_block(inode, block, bh,
|
||||
rw == WRITE);
|
||||
if (retval)
|
||||
break;
|
||||
if (!buffer_size_valid(bh))
|
||||
bh->b_size = 1 << blkbits;
|
||||
bh_max = pos - first + bh->b_size;
|
||||
} else {
|
||||
unsigned done = bh->b_size -
|
||||
(bh_max - (pos - first));
|
||||
bh->b_blocknr += done >> blkbits;
|
||||
bh->b_size -= done;
|
||||
}
|
||||
|
||||
hole = (rw != WRITE) && !buffer_written(bh);
|
||||
if (hole) {
|
||||
addr = NULL;
|
||||
size = bh->b_size - first;
|
||||
} else {
|
||||
retval = dax_get_addr(bh, &addr, blkbits);
|
||||
if (retval < 0)
|
||||
break;
|
||||
if (buffer_unwritten(bh) || buffer_new(bh))
|
||||
dax_new_buf(addr, retval, first, pos,
|
||||
end);
|
||||
addr += first;
|
||||
size = retval - first;
|
||||
}
|
||||
max = min(pos + size, end);
|
||||
}
|
||||
|
||||
if (rw == WRITE)
|
||||
len = copy_from_iter(addr, max - pos, iter);
|
||||
else if (!hole)
|
||||
len = copy_to_iter(addr, max - pos, iter);
|
||||
else
|
||||
len = iov_iter_zero(max - pos, iter);
|
||||
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
pos += len;
|
||||
addr += len;
|
||||
}
|
||||
|
||||
return (pos == start) ? retval : pos - start;
|
||||
}
|
||||
|
||||
/**
|
||||
* dax_do_io - Perform I/O to a DAX file
|
||||
* @rw: READ to read or WRITE to write
|
||||
* @iocb: The control block for this I/O
|
||||
* @inode: The file which the I/O is directed at
|
||||
* @iter: The addresses to do I/O from or to
|
||||
* @pos: The file offset where the I/O starts
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
* @end_io: A filesystem callback for I/O completion
|
||||
* @flags: See below
|
||||
*
|
||||
* This function uses the same locking scheme as do_blockdev_direct_IO:
|
||||
* If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
|
||||
* caller for writes. For reads, we take and release the i_mutex ourselves.
|
||||
* If DIO_LOCKING is not set, the filesystem takes care of its own locking.
|
||||
* As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
|
||||
* is in progress.
|
||||
*/
|
||||
ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
|
||||
struct iov_iter *iter, loff_t pos,
|
||||
get_block_t get_block, dio_iodone_t end_io, int flags)
|
||||
{
|
||||
struct buffer_head bh;
|
||||
ssize_t retval = -EINVAL;
|
||||
loff_t end = pos + iov_iter_count(iter);
|
||||
|
||||
memset(&bh, 0, sizeof(bh));
|
||||
|
||||
if ((flags & DIO_LOCKING) && (rw == READ)) {
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
|
||||
if (retval) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Protects against truncate */
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
|
||||
retval = dax_io(rw, inode, iter, pos, end, get_block, &bh);
|
||||
|
||||
if ((flags & DIO_LOCKING) && (rw == READ))
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if ((retval > 0) && end_io)
|
||||
end_io(iocb, pos, retval, bh.b_private);
|
||||
|
||||
inode_dio_done(inode);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_do_io);
|
||||
|
||||
/*
|
||||
* The user has performed a load from a hole in the file. Allocating
|
||||
* a new page in the file would cause excessive storage usage for
|
||||
* workloads with sparse files. We allocate a page cache page instead.
|
||||
* We'll kick it out of the page cache if it's ever written to,
|
||||
* otherwise it will simply fall out of the page cache under memory
|
||||
* pressure without ever having been dirtied.
|
||||
*/
|
||||
static int dax_load_hole(struct address_space *mapping, struct page *page,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long size;
|
||||
struct inode *inode = mapping->host;
|
||||
if (!page)
|
||||
page = find_or_create_page(mapping, vmf->pgoff,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page)
|
||||
return VM_FAULT_OOM;
|
||||
/* Recheck i_size under page lock to avoid truncate race */
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
vmf->page = page;
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
|
||||
static int copy_user_bh(struct page *to, struct buffer_head *bh,
|
||||
unsigned blkbits, unsigned long vaddr)
|
||||
{
|
||||
void *vfrom, *vto;
|
||||
if (dax_get_addr(bh, &vfrom, blkbits) < 0)
|
||||
return -EIO;
|
||||
vto = kmap_atomic(to);
|
||||
copy_user_page(vto, vfrom, vaddr, to);
|
||||
kunmap_atomic(vto);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
|
||||
unsigned long vaddr = (unsigned long)vmf->virtual_address;
|
||||
void *addr;
|
||||
unsigned long pfn;
|
||||
pgoff_t size;
|
||||
int error;
|
||||
|
||||
i_mmap_lock_read(mapping);
|
||||
|
||||
/*
|
||||
* Check truncate didn't happen while we were allocating a block.
|
||||
* If it did, this block may or may not be still allocated to the
|
||||
* file. We can't tell the filesystem to free it because we can't
|
||||
* take i_mutex here. In the worst case, the file still has blocks
|
||||
* allocated past the end of the file.
|
||||
*/
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(vmf->pgoff >= size)) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = bdev_direct_access(bh->b_bdev, sector, &addr, &pfn, bh->b_size);
|
||||
if (error < 0)
|
||||
goto out;
|
||||
if (error < PAGE_SIZE) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (buffer_unwritten(bh) || buffer_new(bh))
|
||||
clear_page(addr);
|
||||
|
||||
error = vm_insert_mixed(vma, vaddr, pfn);
|
||||
|
||||
out:
|
||||
i_mmap_unlock_read(mapping);
|
||||
|
||||
if (bh->b_end_io)
|
||||
bh->b_end_io(bh, 1);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
get_block_t get_block)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
struct page *page;
|
||||
struct buffer_head bh;
|
||||
unsigned long vaddr = (unsigned long)vmf->virtual_address;
|
||||
unsigned blkbits = inode->i_blkbits;
|
||||
sector_t block;
|
||||
pgoff_t size;
|
||||
int error;
|
||||
int major = 0;
|
||||
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
memset(&bh, 0, sizeof(bh));
|
||||
block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
|
||||
bh.b_size = PAGE_SIZE;
|
||||
|
||||
repeat:
|
||||
page = find_get_page(mapping, vmf->pgoff);
|
||||
if (page) {
|
||||
if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
|
||||
page_cache_release(page);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
goto repeat;
|
||||
}
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(vmf->pgoff >= size)) {
|
||||
/*
|
||||
* We have a struct page covering a hole in the file
|
||||
* from a read fault and we've raced with a truncate
|
||||
*/
|
||||
error = -EIO;
|
||||
goto unlock_page;
|
||||
}
|
||||
}
|
||||
|
||||
error = get_block(inode, block, &bh, 0);
|
||||
if (!error && (bh.b_size < PAGE_SIZE))
|
||||
error = -EIO; /* fs corruption? */
|
||||
if (error)
|
||||
goto unlock_page;
|
||||
|
||||
if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
|
||||
if (vmf->flags & FAULT_FLAG_WRITE) {
|
||||
error = get_block(inode, block, &bh, 1);
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
||||
major = VM_FAULT_MAJOR;
|
||||
if (!error && (bh.b_size < PAGE_SIZE))
|
||||
error = -EIO;
|
||||
if (error)
|
||||
goto unlock_page;
|
||||
} else {
|
||||
return dax_load_hole(mapping, page, vmf);
|
||||
}
|
||||
}
|
||||
|
||||
if (vmf->cow_page) {
|
||||
struct page *new_page = vmf->cow_page;
|
||||
if (buffer_written(&bh))
|
||||
error = copy_user_bh(new_page, &bh, blkbits, vaddr);
|
||||
else
|
||||
clear_user_highpage(new_page, vaddr);
|
||||
if (error)
|
||||
goto unlock_page;
|
||||
vmf->page = page;
|
||||
if (!page) {
|
||||
i_mmap_lock_read(mapping);
|
||||
/* Check we didn't race with truncate */
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >>
|
||||
PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size) {
|
||||
i_mmap_unlock_read(mapping);
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
|
||||
/* Check we didn't race with a read fault installing a new page */
|
||||
if (!page && major)
|
||||
page = find_lock_page(mapping, vmf->pgoff);
|
||||
|
||||
if (page) {
|
||||
unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
|
||||
PAGE_CACHE_SIZE, 0);
|
||||
delete_from_page_cache(page);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
error = dax_insert_mapping(inode, &bh, vma, vmf);
|
||||
|
||||
out:
|
||||
if (error == -ENOMEM)
|
||||
return VM_FAULT_OOM | major;
|
||||
/* -EBUSY is fine, somebody else faulted on the same PTE */
|
||||
if ((error < 0) && (error != -EBUSY))
|
||||
return VM_FAULT_SIGBUS | major;
|
||||
return VM_FAULT_NOPAGE | major;
|
||||
|
||||
unlock_page:
|
||||
if (page) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/**
|
||||
* dax_fault - handle a page fault on a DAX file
|
||||
* @vma: The virtual memory area where the fault occurred
|
||||
* @vmf: The description of the fault
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
*
|
||||
* When a page fault occurs, filesystems may call this helper in their
|
||||
* fault handler for DAX files.
|
||||
*/
|
||||
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
get_block_t get_block)
|
||||
{
|
||||
int result;
|
||||
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE) {
|
||||
sb_start_pagefault(sb);
|
||||
file_update_time(vma->vm_file);
|
||||
}
|
||||
result = do_dax_fault(vma, vmf, get_block);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
sb_end_pagefault(sb);
|
||||
|
||||
return result;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_fault);
|
||||
|
||||
/**
|
||||
* dax_zero_page_range - zero a range within a page of a DAX file
|
||||
* @inode: The file being truncated
|
||||
* @from: The file offset that is being truncated to
|
||||
* @length: The number of bytes to zero
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
*
|
||||
* This function can be called by a filesystem when it is zeroing part of a
|
||||
* page in a DAX file. This is intended for hole-punch operations. If
|
||||
* you are truncating a file, the helper function dax_truncate_page() may be
|
||||
* more convenient.
|
||||
*
|
||||
* We work in terms of PAGE_CACHE_SIZE here for commonality with
|
||||
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
|
||||
* took care of disposing of the unnecessary blocks. Even if the filesystem
|
||||
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
|
||||
* since the file might be mmapped.
|
||||
*/
|
||||
int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
|
||||
get_block_t get_block)
|
||||
{
|
||||
struct buffer_head bh;
|
||||
pgoff_t index = from >> PAGE_CACHE_SHIFT;
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
int err;
|
||||
|
||||
/* Block boundary? Nothing to do */
|
||||
if (!length)
|
||||
return 0;
|
||||
BUG_ON((offset + length) > PAGE_CACHE_SIZE);
|
||||
|
||||
memset(&bh, 0, sizeof(bh));
|
||||
bh.b_size = PAGE_CACHE_SIZE;
|
||||
err = get_block(inode, index, &bh, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (buffer_written(&bh)) {
|
||||
void *addr;
|
||||
err = dax_get_addr(&bh, &addr, inode->i_blkbits);
|
||||
if (err < 0)
|
||||
return err;
|
||||
memset(addr + offset, 0, length);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
||||
|
||||
/**
|
||||
* dax_truncate_page - handle a partial page being truncated in a DAX file
|
||||
* @inode: The file being truncated
|
||||
* @from: The file offset that is being truncated to
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
*
|
||||
* Similar to block_truncate_page(), this function can be called by a
|
||||
* filesystem when it is truncating a DAX file to handle the partial page.
|
||||
*
|
||||
* We work in terms of PAGE_CACHE_SIZE here for commonality with
|
||||
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
|
||||
* took care of disposing of the unnecessary blocks. Even if the filesystem
|
||||
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
|
||||
* since the file might be mmapped.
|
||||
*/
|
||||
int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
|
||||
{
|
||||
unsigned length = PAGE_CACHE_ALIGN(from) - from;
|
||||
return dax_zero_page_range(inode, from, length, get_block);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_truncate_page);
|
@ -985,7 +985,6 @@ const struct address_space_operations exofs_aops = {
|
||||
.direct_IO = exofs_direct_IO,
|
||||
|
||||
/* With these NULL has special meaning or default is not exported */
|
||||
.get_xip_mem = NULL,
|
||||
.migratepage = NULL,
|
||||
.launder_page = NULL,
|
||||
.is_partially_uptodate = NULL,
|
||||
|
@ -42,14 +42,3 @@ config EXT2_FS_SECURITY
|
||||
|
||||
If you are not using a security module that requires using
|
||||
extended attributes for file security labels, say N.
|
||||
|
||||
config EXT2_FS_XIP
|
||||
bool "Ext2 execute in place support"
|
||||
depends on EXT2_FS && MMU
|
||||
help
|
||||
Execute in place can be used on memory-backed block devices. If you
|
||||
enable this option, you can select to mount block devices which are
|
||||
capable of this feature without using the page cache.
|
||||
|
||||
If you do not use a block device that is capable of using this,
|
||||
or if unsure, say N.
|
||||
|
@ -10,4 +10,3 @@ ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
|
||||
ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
|
||||
ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
|
||||
ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
|
||||
ext2-$(CONFIG_EXT2_FS_XIP) += xip.o
|
||||
|
@ -380,10 +380,15 @@ struct ext2_inode {
|
||||
#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */
|
||||
#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */
|
||||
#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */
|
||||
#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */
|
||||
#define EXT2_MOUNT_XIP 0x010000 /* Obsolete, use DAX */
|
||||
#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */
|
||||
#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */
|
||||
#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */
|
||||
#ifdef CONFIG_FS_DAX
|
||||
#define EXT2_MOUNT_DAX 0x100000 /* Direct Access */
|
||||
#else
|
||||
#define EXT2_MOUNT_DAX 0
|
||||
#endif
|
||||
|
||||
|
||||
#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
|
||||
@ -788,11 +793,10 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync);
|
||||
extern const struct inode_operations ext2_file_inode_operations;
|
||||
extern const struct file_operations ext2_file_operations;
|
||||
extern const struct file_operations ext2_xip_file_operations;
|
||||
extern const struct file_operations ext2_dax_file_operations;
|
||||
|
||||
/* inode.c */
|
||||
extern const struct address_space_operations ext2_aops;
|
||||
extern const struct address_space_operations ext2_aops_xip;
|
||||
extern const struct address_space_operations ext2_nobh_aops;
|
||||
|
||||
/* namei.c */
|
||||
|
@ -25,6 +25,36 @@
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return dax_fault(vma, vmf, ext2_get_block);
|
||||
}
|
||||
|
||||
static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return dax_mkwrite(vma, vmf, ext2_get_block);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct ext2_dax_vm_ops = {
|
||||
.fault = ext2_dax_fault,
|
||||
.page_mkwrite = ext2_dax_mkwrite,
|
||||
};
|
||||
|
||||
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
if (!IS_DAX(file_inode(file)))
|
||||
return generic_file_mmap(file, vma);
|
||||
|
||||
file_accessed(file);
|
||||
vma->vm_ops = &ext2_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#define ext2_file_mmap generic_file_mmap
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called when filp is released. This happens when all file descriptors
|
||||
* for a single struct file are closed. Note that different open() calls
|
||||
@ -70,7 +100,7 @@ const struct file_operations ext2_file_operations = {
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext2_compat_ioctl,
|
||||
#endif
|
||||
.mmap = generic_file_mmap,
|
||||
.mmap = ext2_file_mmap,
|
||||
.open = dquot_file_open,
|
||||
.release = ext2_release_file,
|
||||
.fsync = ext2_fsync,
|
||||
@ -78,16 +108,18 @@ const struct file_operations ext2_file_operations = {
|
||||
.splice_write = iter_file_splice_write,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_EXT2_FS_XIP
|
||||
const struct file_operations ext2_xip_file_operations = {
|
||||
#ifdef CONFIG_FS_DAX
|
||||
const struct file_operations ext2_dax_file_operations = {
|
||||
.llseek = generic_file_llseek,
|
||||
.read = xip_file_read,
|
||||
.write = xip_file_write,
|
||||
.read = new_sync_read,
|
||||
.write = new_sync_write,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.write_iter = generic_file_write_iter,
|
||||
.unlocked_ioctl = ext2_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext2_compat_ioctl,
|
||||
#endif
|
||||
.mmap = xip_file_mmap,
|
||||
.mmap = ext2_file_mmap,
|
||||
.open = dquot_file_open,
|
||||
.release = ext2_release_file,
|
||||
.fsync = ext2_fsync,
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include <linux/aio.h>
|
||||
#include "ext2.h"
|
||||
#include "acl.h"
|
||||
#include "xip.h"
|
||||
#include "xattr.h"
|
||||
|
||||
static int __ext2_write_inode(struct inode *inode, int do_sync);
|
||||
@ -731,12 +730,14 @@ static int ext2_get_blocks(struct inode *inode,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ext2_use_xip(inode->i_sb)) {
|
||||
if (IS_DAX(inode)) {
|
||||
/*
|
||||
* we need to clear the block
|
||||
* block must be initialised before we put it in the tree
|
||||
* so that it's not found by another thread before it's
|
||||
* initialised
|
||||
*/
|
||||
err = ext2_clear_xip_target (inode,
|
||||
le32_to_cpu(chain[depth-1].key));
|
||||
err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
|
||||
1 << inode->i_blkbits);
|
||||
if (err) {
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
goto cleanup;
|
||||
@ -859,7 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
|
||||
size_t count = iov_iter_count(iter);
|
||||
ssize_t ret;
|
||||
|
||||
ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
|
||||
if (IS_DAX(inode))
|
||||
ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block,
|
||||
NULL, DIO_LOCKING);
|
||||
else
|
||||
ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
|
||||
ext2_get_block);
|
||||
if (ret < 0 && (rw & WRITE))
|
||||
ext2_write_failed(mapping, offset + count);
|
||||
return ret;
|
||||
@ -885,11 +891,6 @@ const struct address_space_operations ext2_aops = {
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
};
|
||||
|
||||
const struct address_space_operations ext2_aops_xip = {
|
||||
.bmap = ext2_bmap,
|
||||
.get_xip_mem = ext2_get_xip_mem,
|
||||
};
|
||||
|
||||
const struct address_space_operations ext2_nobh_aops = {
|
||||
.readpage = ext2_readpage,
|
||||
.readpages = ext2_readpages,
|
||||
@ -1201,8 +1202,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
|
||||
|
||||
inode_dio_wait(inode);
|
||||
|
||||
if (mapping_is_xip(inode->i_mapping))
|
||||
error = xip_truncate_page(inode->i_mapping, newsize);
|
||||
if (IS_DAX(inode))
|
||||
error = dax_truncate_page(inode, newsize, ext2_get_block);
|
||||
else if (test_opt(inode->i_sb, NOBH))
|
||||
error = nobh_truncate_page(inode->i_mapping,
|
||||
newsize, ext2_get_block);
|
||||
@ -1273,7 +1274,8 @@ void ext2_set_inode_flags(struct inode *inode)
|
||||
{
|
||||
unsigned int flags = EXT2_I(inode)->i_flags;
|
||||
|
||||
inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
|
||||
inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
|
||||
S_DIRSYNC | S_DAX);
|
||||
if (flags & EXT2_SYNC_FL)
|
||||
inode->i_flags |= S_SYNC;
|
||||
if (flags & EXT2_APPEND_FL)
|
||||
@ -1284,6 +1286,8 @@ void ext2_set_inode_flags(struct inode *inode)
|
||||
inode->i_flags |= S_NOATIME;
|
||||
if (flags & EXT2_DIRSYNC_FL)
|
||||
inode->i_flags |= S_DIRSYNC;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
inode->i_flags |= S_DAX;
|
||||
}
|
||||
|
||||
/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
|
||||
@ -1384,9 +1388,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
|
||||
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
inode->i_op = &ext2_file_inode_operations;
|
||||
if (ext2_use_xip(inode->i_sb)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops_xip;
|
||||
inode->i_fop = &ext2_xip_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops;
|
||||
inode->i_fop = &ext2_dax_file_operations;
|
||||
} else if (test_opt(inode->i_sb, NOBH)) {
|
||||
inode->i_mapping->a_ops = &ext2_nobh_aops;
|
||||
inode->i_fop = &ext2_file_operations;
|
||||
|
@ -35,7 +35,6 @@
|
||||
#include "ext2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
#include "xip.h"
|
||||
|
||||
static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
@ -105,9 +104,9 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
|
||||
return PTR_ERR(inode);
|
||||
|
||||
inode->i_op = &ext2_file_inode_operations;
|
||||
if (ext2_use_xip(inode->i_sb)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops_xip;
|
||||
inode->i_fop = &ext2_xip_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops;
|
||||
inode->i_fop = &ext2_dax_file_operations;
|
||||
} else if (test_opt(inode->i_sb, NOBH)) {
|
||||
inode->i_mapping->a_ops = &ext2_nobh_aops;
|
||||
inode->i_fop = &ext2_file_operations;
|
||||
@ -126,9 +125,9 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
return PTR_ERR(inode);
|
||||
|
||||
inode->i_op = &ext2_file_inode_operations;
|
||||
if (ext2_use_xip(inode->i_sb)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops_xip;
|
||||
inode->i_fop = &ext2_xip_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX)) {
|
||||
inode->i_mapping->a_ops = &ext2_aops;
|
||||
inode->i_fop = &ext2_dax_file_operations;
|
||||
} else if (test_opt(inode->i_sb, NOBH)) {
|
||||
inode->i_mapping->a_ops = &ext2_nobh_aops;
|
||||
inode->i_fop = &ext2_file_operations;
|
||||
|
@ -35,7 +35,6 @@
|
||||
#include "ext2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
#include "xip.h"
|
||||
|
||||
static void ext2_sync_super(struct super_block *sb,
|
||||
struct ext2_super_block *es, int wait);
|
||||
@ -292,9 +291,11 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
|
||||
seq_puts(seq, ",grpquota");
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_EXT2_FS_XIP)
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
|
||||
seq_puts(seq, ",xip");
|
||||
if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
|
||||
seq_puts(seq, ",dax");
|
||||
#endif
|
||||
|
||||
if (!test_opt(sb, RESERVATION))
|
||||
@ -403,7 +404,7 @@ enum {
|
||||
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic,
|
||||
Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
|
||||
Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
|
||||
Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
|
||||
Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
|
||||
Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
|
||||
};
|
||||
|
||||
@ -432,6 +433,7 @@ static const match_table_t tokens = {
|
||||
{Opt_acl, "acl"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_xip, "xip"},
|
||||
{Opt_dax, "dax"},
|
||||
{Opt_grpquota, "grpquota"},
|
||||
{Opt_ignore, "noquota"},
|
||||
{Opt_quota, "quota"},
|
||||
@ -559,10 +561,14 @@ static int parse_options(char *options, struct super_block *sb)
|
||||
break;
|
||||
#endif
|
||||
case Opt_xip:
|
||||
#ifdef CONFIG_EXT2_FS_XIP
|
||||
set_opt (sbi->s_mount_opt, XIP);
|
||||
ext2_msg(sb, KERN_INFO, "use dax instead of xip");
|
||||
set_opt(sbi->s_mount_opt, XIP);
|
||||
/* Fall through */
|
||||
case Opt_dax:
|
||||
#ifdef CONFIG_FS_DAX
|
||||
set_opt(sbi->s_mount_opt, DAX);
|
||||
#else
|
||||
ext2_msg(sb, KERN_INFO, "xip option not supported");
|
||||
ext2_msg(sb, KERN_INFO, "dax option not supported");
|
||||
#endif
|
||||
break;
|
||||
|
||||
@ -877,9 +883,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
|
||||
MS_POSIXACL : 0);
|
||||
|
||||
ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
|
||||
EXT2_MOUNT_XIP if not */
|
||||
|
||||
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
|
||||
(EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
|
||||
EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
|
||||
@ -909,11 +912,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
|
||||
if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) {
|
||||
if (!silent)
|
||||
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
ext2_msg(sb, KERN_ERR,
|
||||
"error: unsupported blocksize for xip");
|
||||
goto failed_mount;
|
||||
"error: unsupported blocksize for dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
if (!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
ext2_msg(sb, KERN_ERR,
|
||||
"error: device does not support dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the blocksize doesn't match, re-read the thing.. */
|
||||
@ -1259,7 +1268,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
|
||||
{
|
||||
struct ext2_sb_info * sbi = EXT2_SB(sb);
|
||||
struct ext2_super_block * es;
|
||||
unsigned long old_mount_opt = sbi->s_mount_opt;
|
||||
struct ext2_mount_options old_opts;
|
||||
unsigned long old_sb_flags;
|
||||
int err;
|
||||
@ -1284,22 +1292,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
|
||||
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
|
||||
((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
|
||||
|
||||
ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
|
||||
EXT2_MOUNT_XIP if not */
|
||||
|
||||
if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
|
||||
ext2_msg(sb, KERN_WARNING,
|
||||
"warning: unsupported blocksize for xip");
|
||||
err = -EINVAL;
|
||||
goto restore_opts;
|
||||
}
|
||||
|
||||
es = sbi->s_es;
|
||||
if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
|
||||
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT2_MOUNT_DAX) {
|
||||
ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
|
||||
"xip flag with busy inodes while remounting");
|
||||
sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
|
||||
sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
|
||||
"dax flag with busy inodes while remounting");
|
||||
sbi->s_mount_opt ^= EXT2_MOUNT_DAX;
|
||||
}
|
||||
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
|
||||
spin_unlock(&sbi->s_lock);
|
||||
|
@ -1,86 +0,0 @@
|
||||
/*
|
||||
* linux/fs/ext2/xip.c
|
||||
*
|
||||
* Copyright (C) 2005 IBM Corporation
|
||||
* Author: Carsten Otte (cotte@de.ibm.com)
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include "ext2.h"
|
||||
#include "xip.h"
|
||||
|
||||
static inline long __inode_direct_access(struct inode *inode, sector_t block,
|
||||
void **kaddr, unsigned long *pfn, long size)
|
||||
{
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
sector_t sector = block * (PAGE_SIZE / 512);
|
||||
return bdev_direct_access(bdev, sector, kaddr, pfn, size);
|
||||
}
|
||||
|
||||
static inline int
|
||||
__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
|
||||
sector_t *result)
|
||||
{
|
||||
struct buffer_head tmp;
|
||||
int rc;
|
||||
|
||||
memset(&tmp, 0, sizeof(struct buffer_head));
|
||||
tmp.b_size = 1 << inode->i_blkbits;
|
||||
rc = ext2_get_block(inode, pgoff, &tmp, create);
|
||||
*result = tmp.b_blocknr;
|
||||
|
||||
/* did we get a sparse block (hole in the file)? */
|
||||
if (!tmp.b_blocknr && !rc) {
|
||||
BUG_ON(create);
|
||||
rc = -ENODATA;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
ext2_clear_xip_target(struct inode *inode, sector_t block)
|
||||
{
|
||||
void *kaddr;
|
||||
unsigned long pfn;
|
||||
long size;
|
||||
|
||||
size = __inode_direct_access(inode, block, &kaddr, &pfn, PAGE_SIZE);
|
||||
if (size < 0)
|
||||
return size;
|
||||
clear_page(kaddr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext2_xip_verify_sb(struct super_block *sb)
|
||||
{
|
||||
struct ext2_sb_info *sbi = EXT2_SB(sb);
|
||||
|
||||
if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
|
||||
!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
|
||||
ext2_msg(sb, KERN_WARNING,
|
||||
"warning: ignoring xip option - "
|
||||
"not supported by bdev");
|
||||
}
|
||||
}
|
||||
|
||||
int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
|
||||
void **kmem, unsigned long *pfn)
|
||||
{
|
||||
long rc;
|
||||
sector_t block;
|
||||
|
||||
/* first, retrieve the sector number */
|
||||
rc = __ext2_get_block(mapping->host, pgoff, create, &block);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* retrieve address of the target data */
|
||||
rc = __inode_direct_access(mapping->host, block, kmem, pfn, PAGE_SIZE);
|
||||
return (rc < 0) ? rc : 0;
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
/*
|
||||
* linux/fs/ext2/xip.h
|
||||
*
|
||||
* Copyright (C) 2005 IBM Corporation
|
||||
* Author: Carsten Otte (cotte@de.ibm.com)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_EXT2_FS_XIP
|
||||
extern void ext2_xip_verify_sb (struct super_block *);
|
||||
extern int ext2_clear_xip_target (struct inode *, sector_t);
|
||||
|
||||
static inline int ext2_use_xip (struct super_block *sb)
|
||||
{
|
||||
struct ext2_sb_info *sbi = EXT2_SB(sb);
|
||||
return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
|
||||
}
|
||||
int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
|
||||
void **, unsigned long *);
|
||||
#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
|
||||
#else
|
||||
#define mapping_is_xip(map) 0
|
||||
#define ext2_xip_verify_sb(sb) do { } while (0)
|
||||
#define ext2_use_xip(sb) 0
|
||||
#define ext2_clear_xip_target(inode, chain) 0
|
||||
#define ext2_get_xip_mem NULL
|
||||
#endif
|
@ -965,6 +965,11 @@ struct ext4_inode_info {
|
||||
#define EXT4_MOUNT_ERRORS_MASK 0x00070
|
||||
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
|
||||
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
|
||||
#ifdef CONFIG_FS_DAX
|
||||
#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
|
||||
#else
|
||||
#define EXT4_MOUNT_DAX 0
|
||||
#endif
|
||||
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
|
||||
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
|
||||
#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
|
||||
@ -2578,6 +2583,7 @@ extern const struct file_operations ext4_dir_operations;
|
||||
/* file.c */
|
||||
extern const struct inode_operations ext4_file_inode_operations;
|
||||
extern const struct file_operations ext4_file_operations;
|
||||
extern const struct file_operations ext4_dax_file_operations;
|
||||
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
|
||||
|
||||
/* inline.c */
|
||||
|
@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct mutex *aio_mutex = NULL;
|
||||
struct blk_plug plug;
|
||||
int o_direct = file->f_flags & O_DIRECT;
|
||||
int o_direct = io_is_direct(file);
|
||||
int overwrite = 0;
|
||||
size_t length = iov_iter_count(from);
|
||||
ssize_t ret;
|
||||
@ -191,6 +191,26 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return dax_fault(vma, vmf, ext4_get_block);
|
||||
/* Is this the right get_block? */
|
||||
}
|
||||
|
||||
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return dax_mkwrite(vma, vmf, ext4_get_block);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct ext4_dax_vm_ops = {
|
||||
.fault = ext4_dax_fault,
|
||||
.page_mkwrite = ext4_dax_mkwrite,
|
||||
};
|
||||
#else
|
||||
#define ext4_dax_vm_ops ext4_file_vm_ops
|
||||
#endif
|
||||
|
||||
static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
@ -200,7 +220,12 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
file_accessed(file);
|
||||
vma->vm_ops = &ext4_file_vm_ops;
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
vma->vm_ops = &ext4_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
} else {
|
||||
vma->vm_ops = &ext4_file_vm_ops;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -599,6 +624,26 @@ const struct file_operations ext4_file_operations = {
|
||||
.fallocate = ext4_fallocate,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
const struct file_operations ext4_dax_file_operations = {
|
||||
.llseek = ext4_llseek,
|
||||
.read = new_sync_read,
|
||||
.write = new_sync_write,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.write_iter = ext4_file_write_iter,
|
||||
.unlocked_ioctl = ext4_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext4_compat_ioctl,
|
||||
#endif
|
||||
.mmap = ext4_file_mmap,
|
||||
.open = ext4_file_open,
|
||||
.release = ext4_release_file,
|
||||
.fsync = ext4_sync_file,
|
||||
/* Splice not yet supported with DAX */
|
||||
.fallocate = ext4_fallocate,
|
||||
};
|
||||
#endif
|
||||
|
||||
const struct inode_operations ext4_file_inode_operations = {
|
||||
.setattr = ext4_setattr,
|
||||
.getattr = ext4_getattr,
|
||||
|
@ -689,14 +689,22 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
|
||||
inode_dio_done(inode);
|
||||
goto locked;
|
||||
}
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iter, offset,
|
||||
ext4_get_block, NULL, NULL, 0);
|
||||
if (IS_DAX(inode))
|
||||
ret = dax_do_io(rw, iocb, inode, iter, offset,
|
||||
ext4_get_block, NULL, 0);
|
||||
else
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iter, offset,
|
||||
ext4_get_block, NULL, NULL, 0);
|
||||
inode_dio_done(inode);
|
||||
} else {
|
||||
locked:
|
||||
ret = blockdev_direct_IO(rw, iocb, inode, iter,
|
||||
offset, ext4_get_block);
|
||||
if (IS_DAX(inode))
|
||||
ret = dax_do_io(rw, iocb, inode, iter, offset,
|
||||
ext4_get_block, NULL, DIO_LOCKING);
|
||||
else
|
||||
ret = blockdev_direct_IO(rw, iocb, inode, iter,
|
||||
offset, ext4_get_block);
|
||||
|
||||
if (unlikely((rw & WRITE) && ret < 0)) {
|
||||
loff_t isize = i_size_read(inode);
|
||||
|
@ -657,6 +657,18 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
struct inode *inode = bh->b_assoc_map->host;
|
||||
/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
|
||||
loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
|
||||
int err;
|
||||
if (!uptodate)
|
||||
return;
|
||||
WARN_ON(!buffer_unwritten(bh));
|
||||
err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
|
||||
}
|
||||
|
||||
/* Maximum number of blocks we map for direct IO at once. */
|
||||
#define DIO_MAX_BLOCKS 4096
|
||||
|
||||
@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
|
||||
|
||||
map_bh(bh, inode->i_sb, map.m_pblk);
|
||||
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
|
||||
if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
|
||||
bh->b_assoc_map = inode->i_mapping;
|
||||
bh->b_private = (void *)(unsigned long)iblock;
|
||||
bh->b_end_io = ext4_end_io_unwritten;
|
||||
}
|
||||
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
|
||||
set_buffer_defer_completion(bh);
|
||||
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
|
||||
@ -3010,13 +3027,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
get_block_func = ext4_get_block_write;
|
||||
dio_flags = DIO_LOCKING;
|
||||
}
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iter,
|
||||
offset,
|
||||
get_block_func,
|
||||
ext4_end_io_dio,
|
||||
NULL,
|
||||
dio_flags);
|
||||
if (IS_DAX(inode))
|
||||
ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
|
||||
ext4_end_io_dio, dio_flags);
|
||||
else
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iter, offset,
|
||||
get_block_func,
|
||||
ext4_end_io_dio, NULL, dio_flags);
|
||||
|
||||
/*
|
||||
* Put our reference to io_end. This can free the io_end structure e.g.
|
||||
@ -3180,19 +3198,12 @@ void ext4_set_aops(struct inode *inode)
|
||||
inode->i_mapping->a_ops = &ext4_aops;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
|
||||
* starting from file offset 'from'. The range to be zero'd must
|
||||
* be contained with in one block. If the specified range exceeds
|
||||
* the end of the block it will be shortened to end of the block
|
||||
* that cooresponds to 'from'
|
||||
*/
|
||||
static int ext4_block_zero_page_range(handle_t *handle,
|
||||
static int __ext4_block_zero_page_range(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from, loff_t length)
|
||||
{
|
||||
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
unsigned blocksize, max, pos;
|
||||
unsigned blocksize, pos;
|
||||
ext4_lblk_t iblock;
|
||||
struct inode *inode = mapping->host;
|
||||
struct buffer_head *bh;
|
||||
@ -3205,14 +3216,6 @@ static int ext4_block_zero_page_range(handle_t *handle,
|
||||
return -ENOMEM;
|
||||
|
||||
blocksize = inode->i_sb->s_blocksize;
|
||||
max = blocksize - (offset & (blocksize - 1));
|
||||
|
||||
/*
|
||||
* correct length if it does not fall between
|
||||
* 'from' and the end of the block
|
||||
*/
|
||||
if (length > max || length < 0)
|
||||
length = max;
|
||||
|
||||
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
|
||||
|
||||
@ -3277,6 +3280,33 @@ static int ext4_block_zero_page_range(handle_t *handle,
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
|
||||
* starting from file offset 'from'. The range to be zero'd must
|
||||
* be contained with in one block. If the specified range exceeds
|
||||
* the end of the block it will be shortened to end of the block
|
||||
* that cooresponds to 'from'
|
||||
*/
|
||||
static int ext4_block_zero_page_range(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from, loff_t length)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
unsigned blocksize = inode->i_sb->s_blocksize;
|
||||
unsigned max = blocksize - (offset & (blocksize - 1));
|
||||
|
||||
/*
|
||||
* correct length if it does not fall between
|
||||
* 'from' and the end of the block
|
||||
*/
|
||||
if (length > max || length < 0)
|
||||
length = max;
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return dax_zero_page_range(inode, from, length, ext4_get_block);
|
||||
return __ext4_block_zero_page_range(handle, mapping, from, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
|
||||
* up to the end of the block which corresponds to `from'.
|
||||
@ -3798,8 +3828,10 @@ void ext4_set_inode_flags(struct inode *inode)
|
||||
new_fl |= S_NOATIME;
|
||||
if (flags & EXT4_DIRSYNC_FL)
|
||||
new_fl |= S_DIRSYNC;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
new_fl |= S_DAX;
|
||||
inode_set_flags(inode, new_fl,
|
||||
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
|
||||
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
|
||||
}
|
||||
|
||||
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
|
||||
@ -4052,7 +4084,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
||||
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
inode->i_fop = &ext4_dax_file_operations;
|
||||
else
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
ext4_set_aops(inode);
|
||||
} else if (S_ISDIR(inode->i_mode)) {
|
||||
inode->i_op = &ext4_dir_inode_operations;
|
||||
@ -4534,7 +4569,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
* Truncate pagecache after we've waited for commit
|
||||
* in data=journal mode to make pages freeable.
|
||||
*/
|
||||
truncate_pagecache(inode, inode->i_size);
|
||||
truncate_pagecache(inode, inode->i_size);
|
||||
}
|
||||
/*
|
||||
* We want to call ext4_truncate() even if attr->ia_size ==
|
||||
|
@ -2235,7 +2235,10 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
err = PTR_ERR(inode);
|
||||
if (!IS_ERR(inode)) {
|
||||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
inode->i_fop = &ext4_dax_file_operations;
|
||||
else
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
ext4_set_aops(inode);
|
||||
err = ext4_add_nondir(handle, dentry, inode);
|
||||
if (!err && IS_DIRSYNC(dir))
|
||||
@ -2299,7 +2302,10 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
err = PTR_ERR(inode);
|
||||
if (!IS_ERR(inode)) {
|
||||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
if (test_opt(inode->i_sb, DAX))
|
||||
inode->i_fop = &ext4_dax_file_operations;
|
||||
else
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
ext4_set_aops(inode);
|
||||
d_tmpfile(dentry, inode);
|
||||
err = ext4_orphan_add(handle, inode);
|
||||
|
@ -1124,7 +1124,7 @@ enum {
|
||||
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
|
||||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
|
||||
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
|
||||
Opt_usrquota, Opt_grpquota, Opt_i_version,
|
||||
Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
|
||||
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
|
||||
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
|
||||
Opt_inode_readahead_blks, Opt_journal_ioprio,
|
||||
@ -1187,6 +1187,7 @@ static const match_table_t tokens = {
|
||||
{Opt_barrier, "barrier"},
|
||||
{Opt_nobarrier, "nobarrier"},
|
||||
{Opt_i_version, "i_version"},
|
||||
{Opt_dax, "dax"},
|
||||
{Opt_stripe, "stripe=%u"},
|
||||
{Opt_delalloc, "delalloc"},
|
||||
{Opt_nodelalloc, "nodelalloc"},
|
||||
@ -1371,6 +1372,7 @@ static const struct mount_opts {
|
||||
{Opt_min_batch_time, 0, MOPT_GTE0},
|
||||
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
|
||||
{Opt_init_itable, 0, MOPT_GTE0},
|
||||
{Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
|
||||
{Opt_stripe, 0, MOPT_GTE0},
|
||||
{Opt_resuid, 0, MOPT_GTE0},
|
||||
{Opt_resgid, 0, MOPT_GTE0},
|
||||
@ -1606,6 +1608,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
|
||||
return -1;
|
||||
}
|
||||
sbi->s_jquota_fmt = m->mount_opt;
|
||||
#endif
|
||||
#ifndef CONFIG_FS_DAX
|
||||
} else if (token == Opt_dax) {
|
||||
ext4_msg(sb, KERN_INFO, "dax option not supported");
|
||||
return -1;
|
||||
#endif
|
||||
} else {
|
||||
if (!args->from)
|
||||
@ -3589,6 +3596,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
"both data=journal and dioread_nolock");
|
||||
goto failed_mount;
|
||||
}
|
||||
if (test_opt(sb, DAX)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"both data=journal and dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
if (test_opt(sb, DELALLOC))
|
||||
clear_opt(sb, DELALLOC);
|
||||
}
|
||||
@ -3652,6 +3664,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
goto failed_mount;
|
||||
}
|
||||
|
||||
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"error: unsupported blocksize for dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
if (!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"error: device does not support dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
if (sb->s_blocksize != blocksize) {
|
||||
/* Validate the filesystem blocksize */
|
||||
if (!sb_set_blocksize(sb, blocksize)) {
|
||||
@ -4869,6 +4894,18 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
err = -EINVAL;
|
||||
goto restore_opts;
|
||||
}
|
||||
if (test_opt(sb, DAX)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"both data=journal and dax");
|
||||
err = -EINVAL;
|
||||
goto restore_opts;
|
||||
}
|
||||
}
|
||||
|
||||
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
|
||||
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
|
||||
"dax flag with busy inodes while remounting");
|
||||
sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
|
||||
}
|
||||
|
||||
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
|
242
fs/ocfs2/aops.c
242
fs/ocfs2/aops.c
@ -28,6 +28,7 @@
|
||||
#include <linux/pipe_fs_i.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include <cluster/masklog.h>
|
||||
|
||||
@ -47,6 +48,9 @@
|
||||
#include "ocfs2_trace.h"
|
||||
|
||||
#include "buffer_head_io.h"
|
||||
#include "dir.h"
|
||||
#include "namei.h"
|
||||
#include "sysfile.h"
|
||||
|
||||
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
@ -506,18 +510,21 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
|
||||
*
|
||||
* called like this: dio->get_blocks(dio->inode, fs_startblk,
|
||||
* fs_count, map_bh, dio->rw == WRITE);
|
||||
*
|
||||
* Note that we never bother to allocate blocks here, and thus ignore the
|
||||
* create argument.
|
||||
*/
|
||||
static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret;
|
||||
u32 cpos = 0;
|
||||
int alloc_locked = 0;
|
||||
u64 p_blkno, inode_blocks, contig_blocks;
|
||||
unsigned int ext_flags;
|
||||
unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
|
||||
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
unsigned long len = bh_result->b_size;
|
||||
unsigned int clusters_to_alloc = 0;
|
||||
|
||||
cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
|
||||
|
||||
/* This function won't even be called if the request isn't all
|
||||
* nicely aligned and of the right size, so there's no need
|
||||
@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
||||
/* We should already CoW the refcounted extent in case of create. */
|
||||
BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
|
||||
|
||||
/* allocate blocks if no p_blkno is found, and create == 1 */
|
||||
if (!p_blkno && create) {
|
||||
ret = ocfs2_inode_lock(inode, NULL, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
alloc_locked = 1;
|
||||
|
||||
/* fill hole, allocate blocks can't be larger than the size
|
||||
* of the hole */
|
||||
clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
|
||||
if (clusters_to_alloc > contig_blocks)
|
||||
clusters_to_alloc = contig_blocks;
|
||||
|
||||
/* allocate extent and insert them into the extent tree */
|
||||
ret = ocfs2_extend_allocation(inode, cpos,
|
||||
clusters_to_alloc, 0);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
|
||||
&contig_blocks, &ext_flags);
|
||||
if (ret < 0) {
|
||||
mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
|
||||
(unsigned long long)iblock);
|
||||
ret = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* get_more_blocks() expects us to describe a hole by clearing
|
||||
* the mapped bit on bh_result().
|
||||
@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
||||
contig_blocks = max_blocks;
|
||||
bh_result->b_size = contig_blocks << blocksize_bits;
|
||||
bail:
|
||||
if (alloc_locked)
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -597,6 +640,184 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
|
||||
return try_to_free_buffers(page);
|
||||
}
|
||||
|
||||
static int ocfs2_is_overwrite(struct ocfs2_super *osb,
|
||||
struct inode *inode, loff_t offset)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 v_cpos = 0;
|
||||
u32 p_cpos = 0;
|
||||
unsigned int num_clusters = 0;
|
||||
unsigned int ext_flags = 0;
|
||||
|
||||
v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
|
||||
ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
|
||||
&num_clusters, &ext_flags);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
|
||||
struct iov_iter *iter,
|
||||
loff_t offset)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
ssize_t written = 0;
|
||||
bool orphaned = false;
|
||||
int is_overwrite = 0;
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file)->i_mapping->host;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct buffer_head *di_bh = NULL;
|
||||
size_t count = iter->count;
|
||||
journal_t *journal = osb->journal->j_journal;
|
||||
u32 zero_len;
|
||||
int cluster_align;
|
||||
loff_t final_size = offset + count;
|
||||
int append_write = offset >= i_size_read(inode) ? 1 : 0;
|
||||
unsigned int num_clusters = 0;
|
||||
unsigned int ext_flags = 0;
|
||||
|
||||
{
|
||||
u64 o = offset;
|
||||
|
||||
zero_len = do_div(o, 1 << osb->s_clustersize_bits);
|
||||
cluster_align = !zero_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* when final_size > inode->i_size, inode->i_size will be
|
||||
* updated after direct write, so add the inode to orphan
|
||||
* dir first.
|
||||
*/
|
||||
if (final_size > i_size_read(inode)) {
|
||||
ret = ocfs2_add_inode_to_orphan(osb, inode);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
orphaned = true;
|
||||
}
|
||||
|
||||
if (append_write) {
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto clean_orphan;
|
||||
}
|
||||
|
||||
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
||||
ret = ocfs2_zero_extend(inode, di_bh, offset);
|
||||
else
|
||||
ret = ocfs2_extend_no_holes(inode, di_bh, offset,
|
||||
offset);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
goto clean_orphan;
|
||||
}
|
||||
|
||||
is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
|
||||
if (is_overwrite < 0) {
|
||||
mlog_errno(is_overwrite);
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
goto clean_orphan;
|
||||
}
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
di_bh = NULL;
|
||||
}
|
||||
|
||||
written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
|
||||
iter, offset,
|
||||
ocfs2_direct_IO_get_blocks,
|
||||
ocfs2_dio_end_io, NULL, 0);
|
||||
if (unlikely(written < 0)) {
|
||||
loff_t i_size = i_size_read(inode);
|
||||
|
||||
if (offset + count > i_size) {
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto clean_orphan;
|
||||
}
|
||||
|
||||
if (i_size == i_size_read(inode)) {
|
||||
ret = ocfs2_truncate_file(inode, di_bh,
|
||||
i_size);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
goto clean_orphan;
|
||||
}
|
||||
}
|
||||
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
ret = jbd2_journal_force_commit(journal);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
} else if (written < 0 && append_write && !is_overwrite &&
|
||||
!cluster_align) {
|
||||
u32 p_cpos = 0;
|
||||
u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
|
||||
|
||||
ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
|
||||
&num_clusters, &ext_flags);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto clean_orphan;
|
||||
}
|
||||
|
||||
BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
|
||||
|
||||
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
|
||||
p_cpos << (osb->s_clustersize_bits - 9),
|
||||
zero_len >> 9, GFP_KERNEL, false);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
|
||||
clean_orphan:
|
||||
if (orphaned) {
|
||||
int tmp_ret;
|
||||
int update_isize = written > 0 ? 1 : 0;
|
||||
loff_t end = update_isize ? offset + written : 0;
|
||||
|
||||
tmp_ret = ocfs2_del_inode_from_orphan(osb, inode,
|
||||
update_isize, end);
|
||||
if (tmp_ret < 0) {
|
||||
ret = tmp_ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp_ret = jbd2_journal_force_commit(journal);
|
||||
if (tmp_ret < 0) {
|
||||
ret = tmp_ret;
|
||||
mlog_errno(tmp_ret);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret >= 0)
|
||||
ret = written;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_direct_IO(int rw,
|
||||
struct kiocb *iocb,
|
||||
struct iov_iter *iter,
|
||||
@ -604,6 +825,9 @@ static ssize_t ocfs2_direct_IO(int rw,
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file)->i_mapping->host;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
int full_coherency = !(osb->s_mount_opt &
|
||||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
|
||||
/*
|
||||
* Fallback to buffered I/O if we see an inode without
|
||||
@ -612,14 +836,20 @@ static ssize_t ocfs2_direct_IO(int rw,
|
||||
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
|
||||
return 0;
|
||||
|
||||
/* Fallback to buffered I/O if we are appending. */
|
||||
if (i_size_read(inode) <= offset)
|
||||
/* Fallback to buffered I/O if we are appending and
|
||||
* concurrent O_DIRECT writes are allowed.
|
||||
*/
|
||||
if (i_size_read(inode) <= offset && !full_coherency)
|
||||
return 0;
|
||||
|
||||
return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
|
||||
if (rw == READ)
|
||||
return __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev,
|
||||
iter, offset,
|
||||
ocfs2_direct_IO_get_blocks,
|
||||
ocfs2_dio_end_io, NULL, 0);
|
||||
else
|
||||
return ocfs2_direct_IO_write(iocb, iter, offset);
|
||||
}
|
||||
|
||||
static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
|
||||
|
@ -295,7 +295,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_set_inode_size(handle_t *handle,
|
||||
int ocfs2_set_inode_size(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
u64 new_i_size)
|
||||
@ -441,7 +441,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_truncate_file(struct inode *inode,
|
||||
int ocfs2_truncate_file(struct inode *inode,
|
||||
struct buffer_head *di_bh,
|
||||
u64 new_i_size)
|
||||
{
|
||||
@ -709,6 +709,13 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
u32 clusters_to_add, int mark_unwritten)
|
||||
{
|
||||
return __ocfs2_extend_allocation(inode, logical_start,
|
||||
clusters_to_add, mark_unwritten);
|
||||
}
|
||||
|
||||
/*
|
||||
* While a write will already be ordering the data, a truncate will not.
|
||||
* Thus, we need to explicitly order the zeroed pages.
|
||||
@ -2109,6 +2116,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
loff_t saved_pos = 0, end;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
int full_coherency = !(osb->s_mount_opt &
|
||||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
|
||||
/*
|
||||
* We start with a read level meta lock and only jump to an ex
|
||||
@ -2197,7 +2207,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
* one node could wind up truncating another
|
||||
* nodes writes.
|
||||
*/
|
||||
if (end > i_size_read(inode)) {
|
||||
if (end > i_size_read(inode) && !full_coherency) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fallback to old way if the feature bit is not set.
|
||||
*/
|
||||
if (end > i_size_read(inode) &&
|
||||
!ocfs2_supports_append_dio(osb)) {
|
||||
*direct_io = 0;
|
||||
break;
|
||||
}
|
||||
@ -2210,7 +2229,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
|
||||
*/
|
||||
ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
|
||||
if (ret == 1) {
|
||||
*direct_io = 0;
|
||||
/*
|
||||
* Fallback to old way if the feature bit is not set.
|
||||
* Otherwise try dio first and then complete the rest
|
||||
* request through buffer io.
|
||||
*/
|
||||
if (!ocfs2_supports_append_dio(osb))
|
||||
*direct_io = 0;
|
||||
ret = 0;
|
||||
} else if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
@ -2243,6 +2268,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
||||
u32 old_clusters;
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
int full_coherency = !(osb->s_mount_opt &
|
||||
OCFS2_MOUNT_COHERENCY_BUFFERED);
|
||||
@ -2357,11 +2383,51 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
|
||||
|
||||
iov_iter_truncate(from, count);
|
||||
if (direct_io) {
|
||||
loff_t endbyte;
|
||||
ssize_t written_buffered;
|
||||
written = generic_file_direct_write(iocb, from, *ppos);
|
||||
if (written < 0) {
|
||||
if (written < 0 || written == count) {
|
||||
ret = written;
|
||||
goto out_dio;
|
||||
}
|
||||
|
||||
/*
|
||||
* for completing the rest of the request.
|
||||
*/
|
||||
*ppos += written;
|
||||
count -= written;
|
||||
written_buffered = generic_perform_write(file, from, *ppos);
|
||||
/*
|
||||
* If generic_file_buffered_write() returned a synchronous error
|
||||
* then we want to return the number of bytes which were
|
||||
* direct-written, or the error code if that was zero. Note
|
||||
* that this differs from normal direct-io semantics, which
|
||||
* will return -EFOO even if some bytes were written.
|
||||
*/
|
||||
if (written_buffered < 0) {
|
||||
ret = written_buffered;
|
||||
goto out_dio;
|
||||
}
|
||||
|
||||
iocb->ki_pos = *ppos + written_buffered;
|
||||
/* We need to ensure that the page cache pages are written to
|
||||
* disk and invalidated to preserve the expected O_DIRECT
|
||||
* semantics.
|
||||
*/
|
||||
endbyte = *ppos + written_buffered - 1;
|
||||
ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
|
||||
endbyte);
|
||||
if (ret == 0) {
|
||||
written += written_buffered;
|
||||
invalidate_mapping_pages(mapping,
|
||||
*ppos >> PAGE_CACHE_SHIFT,
|
||||
endbyte >> PAGE_CACHE_SHIFT);
|
||||
} else {
|
||||
/*
|
||||
* We don't know how much we wrote, so just return
|
||||
* the number of bytes which were direct-written
|
||||
*/
|
||||
}
|
||||
} else {
|
||||
current->backing_dev_info = inode_to_bdi(inode);
|
||||
written = generic_perform_write(file, from, *ppos);
|
||||
|
@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
|
||||
struct ocfs2_alloc_context *data_ac,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
enum ocfs2_alloc_restarted *reason_ret);
|
||||
int ocfs2_set_inode_size(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
u64 new_i_size);
|
||||
int ocfs2_simple_size_update(struct inode *inode,
|
||||
struct buffer_head *di_bh,
|
||||
u64 new_i_size);
|
||||
int ocfs2_truncate_file(struct inode *inode,
|
||||
struct buffer_head *di_bh,
|
||||
u64 new_i_size);
|
||||
int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
|
||||
u64 new_i_size, u64 zero_to);
|
||||
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
|
||||
loff_t zero_to);
|
||||
int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
u32 clusters_to_add, int mark_unwritten);
|
||||
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
|
@ -648,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode,
|
||||
|
||||
if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
|
||||
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
|
||||
orphan_dir_bh);
|
||||
orphan_dir_bh, false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail_commit;
|
||||
|
@ -81,6 +81,8 @@ struct ocfs2_inode_info
|
||||
tid_t i_sync_tid;
|
||||
tid_t i_datasync_tid;
|
||||
|
||||
wait_queue_head_t append_dio_wq;
|
||||
|
||||
struct dquot *i_dquot[MAXQUOTAS];
|
||||
};
|
||||
|
||||
|
@ -50,6 +50,8 @@
|
||||
#include "sysfile.h"
|
||||
#include "uptodate.h"
|
||||
#include "quota.h"
|
||||
#include "file.h"
|
||||
#include "namei.h"
|
||||
|
||||
#include "buffer_head_io.h"
|
||||
#include "ocfs2_trace.h"
|
||||
@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
|
||||
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
|
||||
int slot_num);
|
||||
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
||||
int slot);
|
||||
int slot,
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type);
|
||||
static int ocfs2_commit_thread(void *arg);
|
||||
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
|
||||
int slot_num,
|
||||
struct ocfs2_dinode *la_dinode,
|
||||
struct ocfs2_dinode *tl_dinode,
|
||||
struct ocfs2_quota_recovery *qrec);
|
||||
struct ocfs2_quota_recovery *qrec,
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type);
|
||||
|
||||
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
|
||||
{
|
||||
@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
|
||||
void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type)
|
||||
{
|
||||
struct ocfs2_replay_map *replay_map = osb->replay_map;
|
||||
int i;
|
||||
@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
|
||||
for (i = 0; i < replay_map->rm_slots; i++)
|
||||
if (replay_map->rm_replay_slots[i])
|
||||
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
|
||||
NULL, NULL);
|
||||
NULL, NULL,
|
||||
orphan_reco_type);
|
||||
replay_map->rm_state = REPLAY_DONE;
|
||||
}
|
||||
|
||||
@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item {
|
||||
struct ocfs2_dinode *lri_la_dinode;
|
||||
struct ocfs2_dinode *lri_tl_dinode;
|
||||
struct ocfs2_quota_recovery *lri_qrec;
|
||||
enum ocfs2_orphan_reco_type lri_orphan_reco_type;
|
||||
};
|
||||
|
||||
/* Does the second half of the recovery process. By this point, the
|
||||
@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
|
||||
struct ocfs2_dinode *la_dinode, *tl_dinode;
|
||||
struct ocfs2_la_recovery_item *item, *n;
|
||||
struct ocfs2_quota_recovery *qrec;
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type;
|
||||
LIST_HEAD(tmp_la_list);
|
||||
|
||||
trace_ocfs2_complete_recovery(
|
||||
@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
|
||||
la_dinode = item->lri_la_dinode;
|
||||
tl_dinode = item->lri_tl_dinode;
|
||||
qrec = item->lri_qrec;
|
||||
orphan_reco_type = item->lri_orphan_reco_type;
|
||||
|
||||
trace_ocfs2_complete_recovery_slot(item->lri_slot,
|
||||
la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
|
||||
@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
|
||||
kfree(tl_dinode);
|
||||
}
|
||||
|
||||
ret = ocfs2_recover_orphans(osb, item->lri_slot);
|
||||
ret = ocfs2_recover_orphans(osb, item->lri_slot,
|
||||
orphan_reco_type);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
|
||||
@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
|
||||
int slot_num,
|
||||
struct ocfs2_dinode *la_dinode,
|
||||
struct ocfs2_dinode *tl_dinode,
|
||||
struct ocfs2_quota_recovery *qrec)
|
||||
struct ocfs2_quota_recovery *qrec,
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type)
|
||||
{
|
||||
struct ocfs2_la_recovery_item *item;
|
||||
|
||||
@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
|
||||
item->lri_slot = slot_num;
|
||||
item->lri_tl_dinode = tl_dinode;
|
||||
item->lri_qrec = qrec;
|
||||
item->lri_orphan_reco_type = orphan_reco_type;
|
||||
|
||||
spin_lock(&journal->j_lock);
|
||||
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
|
||||
@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
|
||||
/* No need to queue up our truncate_log as regular cleanup will catch
|
||||
* that */
|
||||
ocfs2_queue_recovery_completion(journal, osb->slot_num,
|
||||
osb->local_alloc_copy, NULL, NULL);
|
||||
osb->local_alloc_copy, NULL, NULL,
|
||||
ORPHAN_NEED_TRUNCATE);
|
||||
ocfs2_schedule_truncate_log_flush(osb, 0);
|
||||
|
||||
osb->local_alloc_copy = NULL;
|
||||
@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
|
||||
|
||||
/* queue to recover orphan slots for all offline slots */
|
||||
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
|
||||
ocfs2_queue_replay_slots(osb);
|
||||
ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
|
||||
ocfs2_free_replay_slots(osb);
|
||||
}
|
||||
|
||||
@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
|
||||
osb->slot_num,
|
||||
NULL,
|
||||
NULL,
|
||||
osb->quota_rec);
|
||||
osb->quota_rec,
|
||||
ORPHAN_NEED_TRUNCATE);
|
||||
osb->quota_rec = NULL;
|
||||
}
|
||||
}
|
||||
@ -1360,7 +1374,7 @@ static int __ocfs2_recovery_thread(void *arg)
|
||||
|
||||
/* queue recovery for our own slot */
|
||||
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
|
||||
NULL, NULL);
|
||||
NULL, NULL, ORPHAN_NO_NEED_TRUNCATE);
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
while (rm->rm_used) {
|
||||
@ -1419,13 +1433,14 @@ static int __ocfs2_recovery_thread(void *arg)
|
||||
continue;
|
||||
}
|
||||
ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
|
||||
NULL, NULL, qrec);
|
||||
NULL, NULL, qrec,
|
||||
ORPHAN_NEED_TRUNCATE);
|
||||
}
|
||||
|
||||
ocfs2_super_unlock(osb, 1);
|
||||
|
||||
/* queue recovery for offline slots */
|
||||
ocfs2_queue_replay_slots(osb);
|
||||
ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE);
|
||||
|
||||
bail:
|
||||
mutex_lock(&osb->recovery_lock);
|
||||
@ -1711,7 +1726,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
|
||||
|
||||
/* This will kfree the memory pointed to by la_copy and tl_copy */
|
||||
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
|
||||
tl_copy, NULL);
|
||||
tl_copy, NULL, ORPHAN_NEED_TRUNCATE);
|
||||
|
||||
status = 0;
|
||||
done:
|
||||
@ -1901,7 +1916,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
|
||||
|
||||
for (i = 0; i < osb->max_slots; i++)
|
||||
ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
|
||||
NULL);
|
||||
NULL, ORPHAN_NO_NEED_TRUNCATE);
|
||||
/*
|
||||
* We queued a recovery on orphan slots, increment the sequence
|
||||
* number and update LVB so other node will skip the scan for a while
|
||||
@ -2000,6 +2015,13 @@ static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
|
||||
if (IS_ERR(iter))
|
||||
return 0;
|
||||
|
||||
/* Skip inodes which are already added to recover list, since dio may
|
||||
* happen concurrently with unlink/rename */
|
||||
if (OCFS2_I(iter)->ip_next_orphan) {
|
||||
iput(iter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
|
||||
/* No locking is required for the next_orphan queue as there
|
||||
* is only ever a single process doing orphan recovery. */
|
||||
@ -2108,7 +2130,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb,
|
||||
* advertising our state to ocfs2_delete_inode().
|
||||
*/
|
||||
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
||||
int slot)
|
||||
int slot,
|
||||
enum ocfs2_orphan_reco_type orphan_reco_type)
|
||||
{
|
||||
int ret = 0;
|
||||
struct inode *inode = NULL;
|
||||
@ -2132,13 +2155,60 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
|
||||
(unsigned long long)oi->ip_blkno);
|
||||
|
||||
iter = oi->ip_next_orphan;
|
||||
oi->ip_next_orphan = NULL;
|
||||
|
||||
spin_lock(&oi->ip_lock);
|
||||
/* Set the proper information to get us going into
|
||||
* ocfs2_delete_inode. */
|
||||
oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
|
||||
spin_unlock(&oi->ip_lock);
|
||||
/*
|
||||
* We need to take and drop the inode lock to
|
||||
* force read inode from disk.
|
||||
*/
|
||||
ret = ocfs2_inode_lock(inode, NULL, 0);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto next;
|
||||
}
|
||||
ocfs2_inode_unlock(inode, 0);
|
||||
|
||||
if (inode->i_nlink == 0) {
|
||||
spin_lock(&oi->ip_lock);
|
||||
/* Set the proper information to get us going into
|
||||
* ocfs2_delete_inode. */
|
||||
oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
|
||||
spin_unlock(&oi->ip_lock);
|
||||
} else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) {
|
||||
struct buffer_head *di_bh = NULL;
|
||||
|
||||
ret = ocfs2_rw_lock(inode, 1);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
mlog_errno(ret);
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = ocfs2_truncate_file(inode, di_bh,
|
||||
i_size_read(inode));
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
wake_up(&OCFS2_I(inode)->append_dio_wq);
|
||||
} /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */
|
||||
|
||||
next:
|
||||
iput(inode);
|
||||
|
||||
inode = iter;
|
||||
|
@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
|
||||
* orphan dir index leaf */
|
||||
#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
|
||||
|
||||
/* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry +
|
||||
* orphan dir index root + orphan dir index leaf */
|
||||
#define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4)
|
||||
#define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS
|
||||
|
||||
/* dinode update, old dir dinode update, new dir dinode update, old
|
||||
* dir dir entry, new dir dir entry, dir entry update for renaming
|
||||
* directory + target unlink + 3 x dir index leaves */
|
||||
|
284
fs/ocfs2/namei.c
284
fs/ocfs2/namei.c
@ -79,7 +79,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
|
||||
struct inode **ret_orphan_dir,
|
||||
u64 blkno,
|
||||
char *name,
|
||||
struct ocfs2_dir_lookup_result *lookup);
|
||||
struct ocfs2_dir_lookup_result *lookup,
|
||||
bool dio);
|
||||
|
||||
static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
@ -87,7 +88,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
||||
struct buffer_head *fe_bh,
|
||||
char *name,
|
||||
struct ocfs2_dir_lookup_result *lookup,
|
||||
struct inode *orphan_dir_inode);
|
||||
struct inode *orphan_dir_inode,
|
||||
bool dio);
|
||||
|
||||
static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
@ -104,6 +106,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
|
||||
static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
|
||||
/* An orphan dir name is an 8 byte value, printed as a hex string */
|
||||
#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
|
||||
#define OCFS2_DIO_ORPHAN_PREFIX "dio-"
|
||||
#define OCFS2_DIO_ORPHAN_PREFIX_LEN 4
|
||||
|
||||
static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
@ -952,7 +956,8 @@ static int ocfs2_unlink(struct inode *dir,
|
||||
if (ocfs2_inode_is_unlinkable(inode)) {
|
||||
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
|
||||
OCFS2_I(inode)->ip_blkno,
|
||||
orphan_name, &orphan_insert);
|
||||
orphan_name, &orphan_insert,
|
||||
false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
@ -1004,7 +1009,7 @@ static int ocfs2_unlink(struct inode *dir,
|
||||
|
||||
if (is_unlinkable) {
|
||||
status = ocfs2_orphan_add(osb, handle, inode, fe_bh,
|
||||
orphan_name, &orphan_insert, orphan_dir);
|
||||
orphan_name, &orphan_insert, orphan_dir, false);
|
||||
if (status < 0)
|
||||
mlog_errno(status);
|
||||
}
|
||||
@ -1440,7 +1445,8 @@ static int ocfs2_rename(struct inode *old_dir,
|
||||
if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
|
||||
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
|
||||
OCFS2_I(new_inode)->ip_blkno,
|
||||
orphan_name, &orphan_insert);
|
||||
orphan_name, &orphan_insert,
|
||||
false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
@ -1507,7 +1513,7 @@ static int ocfs2_rename(struct inode *old_dir,
|
||||
if (should_add_orphan) {
|
||||
status = ocfs2_orphan_add(osb, handle, new_inode,
|
||||
newfe_bh, orphan_name,
|
||||
&orphan_insert, orphan_dir);
|
||||
&orphan_insert, orphan_dir, false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
@ -2088,12 +2094,28 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
|
||||
struct buffer_head *orphan_dir_bh,
|
||||
u64 blkno,
|
||||
char *name,
|
||||
struct ocfs2_dir_lookup_result *lookup)
|
||||
struct ocfs2_dir_lookup_result *lookup,
|
||||
bool dio)
|
||||
{
|
||||
int ret;
|
||||
struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
|
||||
int namelen = dio ?
|
||||
(OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
|
||||
OCFS2_ORPHAN_NAMELEN;
|
||||
|
||||
ret = ocfs2_blkno_stringify(blkno, name);
|
||||
if (dio) {
|
||||
ret = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
|
||||
OCFS2_DIO_ORPHAN_PREFIX);
|
||||
if (ret != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
|
||||
ret = -EINVAL;
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ocfs2_blkno_stringify(blkno,
|
||||
name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
|
||||
} else
|
||||
ret = ocfs2_blkno_stringify(blkno, name);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
@ -2101,7 +2123,7 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
|
||||
|
||||
ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
|
||||
orphan_dir_bh, name,
|
||||
OCFS2_ORPHAN_NAMELEN, lookup);
|
||||
namelen, lookup);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
@ -2128,7 +2150,8 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
|
||||
struct inode **ret_orphan_dir,
|
||||
u64 blkno,
|
||||
char *name,
|
||||
struct ocfs2_dir_lookup_result *lookup)
|
||||
struct ocfs2_dir_lookup_result *lookup,
|
||||
bool dio)
|
||||
{
|
||||
struct inode *orphan_dir_inode = NULL;
|
||||
struct buffer_head *orphan_dir_bh = NULL;
|
||||
@ -2142,7 +2165,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
|
||||
}
|
||||
|
||||
ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
|
||||
blkno, name, lookup);
|
||||
blkno, name, lookup, dio);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -2170,12 +2193,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
||||
struct buffer_head *fe_bh,
|
||||
char *name,
|
||||
struct ocfs2_dir_lookup_result *lookup,
|
||||
struct inode *orphan_dir_inode)
|
||||
struct inode *orphan_dir_inode,
|
||||
bool dio)
|
||||
{
|
||||
struct buffer_head *orphan_dir_bh = NULL;
|
||||
int status = 0;
|
||||
struct ocfs2_dinode *orphan_fe;
|
||||
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
|
||||
int namelen = dio ?
|
||||
(OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN) :
|
||||
OCFS2_ORPHAN_NAMELEN;
|
||||
|
||||
trace_ocfs2_orphan_add_begin(
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
@ -2219,7 +2246,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
||||
ocfs2_journal_dirty(handle, orphan_dir_bh);
|
||||
|
||||
status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
|
||||
OCFS2_ORPHAN_NAMELEN, inode,
|
||||
namelen, inode,
|
||||
OCFS2_I(inode)->ip_blkno,
|
||||
orphan_dir_bh, lookup);
|
||||
if (status < 0) {
|
||||
@ -2227,13 +2254,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
|
||||
goto rollback;
|
||||
}
|
||||
|
||||
fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
|
||||
OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
|
||||
if (dio) {
|
||||
/* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan
|
||||
* slot.
|
||||
*/
|
||||
fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
|
||||
fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num);
|
||||
} else {
|
||||
fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL);
|
||||
OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
|
||||
|
||||
/* Record which orphan dir our inode now resides
|
||||
* in. delete_inode will use this to determine which orphan
|
||||
* dir to lock. */
|
||||
fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
|
||||
/* Record which orphan dir our inode now resides
|
||||
* in. delete_inode will use this to determine which orphan
|
||||
* dir to lock. */
|
||||
fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
|
||||
}
|
||||
|
||||
ocfs2_journal_dirty(handle, fe_bh);
|
||||
|
||||
@ -2258,14 +2293,28 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
struct inode *orphan_dir_inode,
|
||||
struct inode *inode,
|
||||
struct buffer_head *orphan_dir_bh)
|
||||
struct buffer_head *orphan_dir_bh,
|
||||
bool dio)
|
||||
{
|
||||
char name[OCFS2_ORPHAN_NAMELEN + 1];
|
||||
const int namelen = OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN;
|
||||
char name[namelen + 1];
|
||||
struct ocfs2_dinode *orphan_fe;
|
||||
int status = 0;
|
||||
struct ocfs2_dir_lookup_result lookup = { NULL, };
|
||||
|
||||
status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
|
||||
if (dio) {
|
||||
status = snprintf(name, OCFS2_DIO_ORPHAN_PREFIX_LEN + 1, "%s",
|
||||
OCFS2_DIO_ORPHAN_PREFIX);
|
||||
if (status != OCFS2_DIO_ORPHAN_PREFIX_LEN) {
|
||||
status = -EINVAL;
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno,
|
||||
name + OCFS2_DIO_ORPHAN_PREFIX_LEN);
|
||||
} else
|
||||
status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
@ -2273,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
|
||||
|
||||
trace_ocfs2_orphan_del(
|
||||
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
|
||||
name, OCFS2_ORPHAN_NAMELEN);
|
||||
name, namelen);
|
||||
|
||||
/* find it's spot in the orphan directory */
|
||||
status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
|
||||
status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
|
||||
&lookup);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
@ -2376,7 +2425,8 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir,
|
||||
}
|
||||
|
||||
ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
|
||||
di_blkno, orphan_name, orphan_insert);
|
||||
di_blkno, orphan_name, orphan_insert,
|
||||
false);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -2482,7 +2532,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
|
||||
|
||||
di = (struct ocfs2_dinode *)new_di_bh->b_data;
|
||||
status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
|
||||
&orphan_insert, orphan_dir);
|
||||
&orphan_insert, orphan_dir, false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
@ -2527,6 +2577,186 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
|
||||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_dio_orphan_recovered(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
|
||||
ret = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL));
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000
|
||||
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode)
|
||||
{
|
||||
char orphan_name[OCFS2_DIO_ORPHAN_PREFIX_LEN + OCFS2_ORPHAN_NAMELEN + 1];
|
||||
struct inode *orphan_dir_inode = NULL;
|
||||
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
|
||||
struct buffer_head *di_bh = NULL;
|
||||
int status = 0;
|
||||
handle_t *handle = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
|
||||
restart:
|
||||
status = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
/*
|
||||
* Another append dio crashed?
|
||||
* If so, wait for recovery first.
|
||||
*/
|
||||
if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq,
|
||||
ocfs2_dio_orphan_recovered(inode),
|
||||
msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL));
|
||||
goto restart;
|
||||
}
|
||||
|
||||
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode,
|
||||
OCFS2_I(inode)->ip_blkno,
|
||||
orphan_name,
|
||||
&orphan_insert,
|
||||
true);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb,
|
||||
OCFS2_INODE_ADD_TO_ORPHAN_CREDITS);
|
||||
if (IS_ERR(handle)) {
|
||||
status = PTR_ERR(handle);
|
||||
goto bail_unlock_orphan;
|
||||
}
|
||||
|
||||
status = ocfs2_orphan_add(osb, handle, inode, di_bh, orphan_name,
|
||||
&orphan_insert, orphan_dir_inode, true);
|
||||
if (status)
|
||||
mlog_errno(status);
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
bail_unlock_orphan:
|
||||
ocfs2_inode_unlock(orphan_dir_inode, 1);
|
||||
mutex_unlock(&orphan_dir_inode->i_mutex);
|
||||
iput(orphan_dir_inode);
|
||||
|
||||
ocfs2_free_dir_lookup_result(&orphan_insert);
|
||||
|
||||
bail_unlock_inode:
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode, int update_isize,
|
||||
loff_t end)
|
||||
{
|
||||
struct inode *orphan_dir_inode = NULL;
|
||||
struct buffer_head *orphan_dir_bh = NULL;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
struct ocfs2_dinode *di = NULL;
|
||||
handle_t *handle = NULL;
|
||||
int status = 0;
|
||||
|
||||
status = ocfs2_inode_lock(inode, &di_bh, 1);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
di = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
|
||||
orphan_dir_inode = ocfs2_get_system_file_inode(osb,
|
||||
ORPHAN_DIR_SYSTEM_INODE,
|
||||
le16_to_cpu(di->i_dio_orphaned_slot));
|
||||
if (!orphan_dir_inode) {
|
||||
status = -ENOENT;
|
||||
mlog_errno(status);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
|
||||
mutex_lock(&orphan_dir_inode->i_mutex);
|
||||
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
|
||||
if (status < 0) {
|
||||
mutex_unlock(&orphan_dir_inode->i_mutex);
|
||||
iput(orphan_dir_inode);
|
||||
mlog_errno(status);
|
||||
goto bail_unlock_inode;
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb,
|
||||
OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS);
|
||||
if (IS_ERR(handle)) {
|
||||
status = PTR_ERR(handle);
|
||||
goto bail_unlock_orphan;
|
||||
}
|
||||
|
||||
BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)));
|
||||
|
||||
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode,
|
||||
inode, orphan_dir_bh, true);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail_commit;
|
||||
}
|
||||
|
||||
status = ocfs2_journal_access_di(handle,
|
||||
INODE_CACHE(inode),
|
||||
di_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail_commit;
|
||||
}
|
||||
|
||||
di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL);
|
||||
di->i_dio_orphaned_slot = 0;
|
||||
|
||||
if (update_isize) {
|
||||
status = ocfs2_set_inode_size(handle, inode, di_bh, end);
|
||||
if (status)
|
||||
mlog_errno(status);
|
||||
} else
|
||||
ocfs2_journal_dirty(handle, di_bh);
|
||||
|
||||
bail_commit:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
bail_unlock_orphan:
|
||||
ocfs2_inode_unlock(orphan_dir_inode, 1);
|
||||
mutex_unlock(&orphan_dir_inode->i_mutex);
|
||||
brelse(orphan_dir_bh);
|
||||
iput(orphan_dir_inode);
|
||||
|
||||
bail_unlock_inode:
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
brelse(di_bh);
|
||||
|
||||
bail:
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
struct inode *inode,
|
||||
struct dentry *dentry)
|
||||
@ -2615,7 +2845,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
}
|
||||
|
||||
status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
|
||||
orphan_dir_bh);
|
||||
orphan_dir_bh, false);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto out_commit;
|
||||
|
@ -34,10 +34,16 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
struct inode *orphan_dir_inode,
|
||||
struct inode *inode,
|
||||
struct buffer_head *orphan_dir_bh);
|
||||
struct buffer_head *orphan_dir_bh,
|
||||
bool dio);
|
||||
int ocfs2_create_inode_in_orphan(struct inode *dir,
|
||||
int mode,
|
||||
struct inode **new_inode);
|
||||
int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode);
|
||||
int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
|
||||
struct inode *inode, int update_isize,
|
||||
loff_t end);
|
||||
int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
|
||||
struct inode *new_inode,
|
||||
struct dentry *new_dentry);
|
||||
|
@ -209,6 +209,11 @@ struct ocfs2_lock_res {
|
||||
#endif
|
||||
};
|
||||
|
||||
enum ocfs2_orphan_reco_type {
|
||||
ORPHAN_NO_NEED_TRUNCATE = 0,
|
||||
ORPHAN_NEED_TRUNCATE,
|
||||
};
|
||||
|
||||
enum ocfs2_orphan_scan_state {
|
||||
ORPHAN_SCAN_ACTIVE,
|
||||
ORPHAN_SCAN_INACTIVE
|
||||
@ -495,6 +500,14 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
|
||||
{
|
||||
if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
|
||||
{
|
||||
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
|
||||
@ -726,6 +739,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb,
|
||||
return clusters;
|
||||
}
|
||||
|
||||
static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb,
|
||||
u64 bytes)
|
||||
{
|
||||
int cl_bits = OCFS2_SB(sb)->s_clustersize_bits;
|
||||
unsigned int clusters;
|
||||
|
||||
clusters = (unsigned int)(bytes >> cl_bits);
|
||||
return clusters;
|
||||
}
|
||||
|
||||
static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb,
|
||||
u64 bytes)
|
||||
{
|
||||
|
@ -105,7 +105,8 @@
|
||||
| OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
|
||||
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
|
||||
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
|
||||
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
|
||||
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
|
||||
| OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
|
||||
|
||||
/*
|
||||
* Heartbeat-only devices are missing journals and other files. The
|
||||
@ -199,6 +200,11 @@
|
||||
#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
|
||||
#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
|
||||
|
||||
/*
|
||||
* Append Direct IO support
|
||||
*/
|
||||
#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
|
||||
|
||||
/* The byte offset of the first backup block will be 1G.
|
||||
* The following will be 4G, 16G, 64G, 256G and 1T.
|
||||
*/
|
||||
@ -229,6 +235,8 @@
|
||||
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
|
||||
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
|
||||
#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
|
||||
#define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially
|
||||
* for dio */
|
||||
|
||||
/*
|
||||
* Flags on ocfs2_dinode.i_dyn_features
|
||||
@ -729,7 +737,9 @@ struct ocfs2_dinode {
|
||||
inode belongs to. Only valid
|
||||
if allocated from a
|
||||
discontiguous block group */
|
||||
/*A0*/ __le64 i_reserved2[3];
|
||||
/*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */
|
||||
__le16 i_reserved1[3];
|
||||
__le64 i_reserved2[2];
|
||||
/*B8*/ union {
|
||||
__le64 i_pad1; /* Generic way to refer to this
|
||||
64bit union */
|
||||
|
@ -1746,6 +1746,8 @@ static void ocfs2_inode_init_once(void *data)
|
||||
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
|
||||
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
|
||||
|
||||
init_waitqueue_head(&oi->append_dio_wq);
|
||||
|
||||
ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
|
||||
&ocfs2_inode_caching_ops);
|
||||
|
||||
|
@ -667,11 +667,8 @@ int open_check_o_direct(struct file *f)
|
||||
{
|
||||
/* NB: we're sure to have correct a_ops only after f_op->open */
|
||||
if (f->f_flags & O_DIRECT) {
|
||||
if (!f->f_mapping->a_ops ||
|
||||
((!f->f_mapping->a_ops->direct_IO) &&
|
||||
(!f->f_mapping->a_ops->get_xip_mem))) {
|
||||
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ struct swap_info_struct;
|
||||
struct seq_file;
|
||||
struct workqueue_struct;
|
||||
struct iov_iter;
|
||||
struct vm_fault;
|
||||
|
||||
extern void __init inode_init(void);
|
||||
extern void __init inode_init_early(void);
|
||||
@ -361,8 +362,6 @@ struct address_space_operations {
|
||||
int (*releasepage) (struct page *, gfp_t);
|
||||
void (*freepage)(struct page *);
|
||||
ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
|
||||
int (*get_xip_mem)(struct address_space *, pgoff_t, int,
|
||||
void **, unsigned long *);
|
||||
/*
|
||||
* migrate the contents of a page to the specified target. If
|
||||
* migrate_mode is MIGRATE_ASYNC, it must not block.
|
||||
@ -1677,6 +1676,11 @@ struct super_operations {
|
||||
#define S_IMA 1024 /* Inode has an associated IMA struct */
|
||||
#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
|
||||
#define S_NOSEC 4096 /* no suid or xattr security attributes */
|
||||
#ifdef CONFIG_FS_DAX
|
||||
#define S_DAX 8192 /* Direct Access, avoiding the page cache */
|
||||
#else
|
||||
#define S_DAX 0 /* Make all the DAX code disappear */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Note that nosuid etc flags are inode-specific: setting some file-system
|
||||
@ -1714,6 +1718,7 @@ struct super_operations {
|
||||
#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
|
||||
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
|
||||
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
|
||||
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
|
||||
|
||||
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
|
||||
(inode)->i_rdev == WHITEOUT_DEV)
|
||||
@ -2581,19 +2586,13 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
|
||||
extern int generic_file_open(struct inode * inode, struct file * filp);
|
||||
extern int nonseekable_open(struct inode * inode, struct file * filp);
|
||||
|
||||
#ifdef CONFIG_FS_XIP
|
||||
extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
|
||||
loff_t *ppos);
|
||||
extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
|
||||
extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
|
||||
size_t len, loff_t *ppos);
|
||||
extern int xip_truncate_page(struct address_space *mapping, loff_t from);
|
||||
#else
|
||||
static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *,
|
||||
loff_t, get_block_t, dio_iodone_t, int flags);
|
||||
int dax_clear_blocks(struct inode *, sector_t block, long size);
|
||||
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
||||
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
|
||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
|
||||
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
|
||||
@ -2750,6 +2749,11 @@ extern int generic_show_options(struct seq_file *m, struct dentry *root);
|
||||
extern void save_mount_options(struct super_block *sb, char *options);
|
||||
extern void replace_mount_options(struct super_block *sb, char *options);
|
||||
|
||||
static inline bool io_is_direct(struct file *filp)
|
||||
{
|
||||
return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
|
||||
}
|
||||
|
||||
static inline ino_t parent_ino(struct dentry *dentry)
|
||||
{
|
||||
ino_t res;
|
||||
|
@ -224,6 +224,7 @@ struct vm_fault {
|
||||
pgoff_t pgoff; /* Logical page offset based on vma */
|
||||
void __user *virtual_address; /* Faulting virtual address */
|
||||
|
||||
struct page *cow_page; /* Handler may choose to COW */
|
||||
struct page *page; /* ->fault handlers should return a
|
||||
* page here, unless VM_FAULT_NOPAGE
|
||||
* is set (which is also implied by
|
||||
|
@ -198,7 +198,7 @@ int page_referenced(struct page *, int is_locked,
|
||||
int try_to_unmap(struct page *, enum ttu_flags flags);
|
||||
|
||||
/*
|
||||
* Called from mm/filemap_xip.c to unmap empty zero page
|
||||
* Used by uprobes to replace a userspace page safely
|
||||
*/
|
||||
pte_t *__page_check_address(struct page *, struct mm_struct *,
|
||||
unsigned long, spinlock_t **, int);
|
||||
|
375
include/linux/rtc/ds1685.h
Normal file
375
include/linux/rtc/ds1685.h
Normal file
@ -0,0 +1,375 @@
|
||||
/*
|
||||
* Definitions for the registers, addresses, and platform data of the
|
||||
* DS1685/DS1687-series RTC chips.
|
||||
*
|
||||
* This Driver also works for the DS17X85/DS17X87 RTC chips. Functionally
|
||||
* similar to the DS1685/DS1687, they support a few extra features which
|
||||
* include larger, battery-backed NV-SRAM, burst-mode access, and an RTC
|
||||
* write counter.
|
||||
*
|
||||
* Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
|
||||
* Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
|
||||
*
|
||||
* References:
|
||||
* DS1685/DS1687 3V/5V Real-Time Clocks, 19-5215, Rev 4/10.
|
||||
* DS17x85/DS17x87 3V/5V Real-Time Clocks, 19-5222, Rev 4/10.
|
||||
* DS1689/DS1693 3V/5V Serialized Real-Time Clocks, Rev 112105.
|
||||
* Application Note 90, Using the Multiplex Bus RTC Extended Features.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_RTC_DS1685_H_
|
||||
#define _LINUX_RTC_DS1685_H_
|
||||
|
||||
#include <linux/rtc.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/**
|
||||
* struct ds1685_priv - DS1685 private data structure.
|
||||
* @dev: pointer to the rtc_device structure.
|
||||
* @regs: iomapped base address pointer of the RTC registers.
|
||||
* @regstep: padding/step size between registers (optional).
|
||||
* @baseaddr: base address of the RTC device.
|
||||
* @size: resource size.
|
||||
* @lock: private lock variable for spin locking/unlocking.
|
||||
* @work: private workqueue.
|
||||
* @irq: IRQ number assigned to the RTC device.
|
||||
* @prepare_poweroff: pointer to platform pre-poweroff function.
|
||||
* @wake_alarm: pointer to platform wake alarm function.
|
||||
* @post_ram_clear: pointer to platform post ram-clear function.
|
||||
*/
|
||||
struct ds1685_priv {
|
||||
struct rtc_device *dev;
|
||||
void __iomem *regs;
|
||||
u32 regstep;
|
||||
resource_size_t baseaddr;
|
||||
size_t size;
|
||||
spinlock_t lock;
|
||||
struct work_struct work;
|
||||
int irq_num;
|
||||
bool bcd_mode;
|
||||
bool no_irq;
|
||||
bool uie_unsupported;
|
||||
bool alloc_io_resources;
|
||||
u8 (*read)(struct ds1685_priv *, int);
|
||||
void (*write)(struct ds1685_priv *, int, u8);
|
||||
void (*prepare_poweroff)(void);
|
||||
void (*wake_alarm)(void);
|
||||
void (*post_ram_clear)(void);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct ds1685_rtc_platform_data - platform data structure.
|
||||
* @plat_prepare_poweroff: platform-specific pre-poweroff function.
|
||||
* @plat_wake_alarm: platform-specific wake alarm function.
|
||||
* @plat_post_ram_clear: platform-specific post ram-clear function.
|
||||
*
|
||||
* If your platform needs to use a custom padding/step size between
|
||||
* registers, or uses one or more of the extended interrupts and needs special
|
||||
* handling, then include this header file in your platform definition and
|
||||
* set regstep and the plat_* pointers as appropriate.
|
||||
*/
|
||||
struct ds1685_rtc_platform_data {
|
||||
const u32 regstep;
|
||||
const bool bcd_mode;
|
||||
const bool no_irq;
|
||||
const bool uie_unsupported;
|
||||
const bool alloc_io_resources;
|
||||
u8 (*plat_read)(struct ds1685_priv *, int);
|
||||
void (*plat_write)(struct ds1685_priv *, int, u8);
|
||||
void (*plat_prepare_poweroff)(void);
|
||||
void (*plat_wake_alarm)(void);
|
||||
void (*plat_post_ram_clear)(void);
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Time Registers.
|
||||
*/
|
||||
#define RTC_SECS 0x00 /* Seconds 00-59 */
|
||||
#define RTC_SECS_ALARM 0x01 /* Alarm Seconds 00-59 */
|
||||
#define RTC_MINS 0x02 /* Minutes 00-59 */
|
||||
#define RTC_MINS_ALARM 0x03 /* Alarm Minutes 00-59 */
|
||||
#define RTC_HRS 0x04 /* Hours 01-12 AM/PM || 00-23 */
|
||||
#define RTC_HRS_ALARM 0x05 /* Alarm Hours 01-12 AM/PM || 00-23 */
|
||||
#define RTC_WDAY 0x06 /* Day of Week 01-07 */
|
||||
#define RTC_MDAY 0x07 /* Day of Month 01-31 */
|
||||
#define RTC_MONTH 0x08 /* Month 01-12 */
|
||||
#define RTC_YEAR 0x09 /* Year 00-99 */
|
||||
#define RTC_CENTURY 0x48 /* Century 00-99 */
|
||||
#define RTC_MDAY_ALARM 0x49 /* Alarm Day of Month 01-31 */
|
||||
|
||||
|
||||
/*
|
||||
* Bit masks for the Time registers in BCD Mode (DM = 0).
|
||||
*/
|
||||
#define RTC_SECS_BCD_MASK 0x7f /* - x x x x x x x */
|
||||
#define RTC_MINS_BCD_MASK 0x7f /* - x x x x x x x */
|
||||
#define RTC_HRS_12_BCD_MASK 0x1f /* - - - x x x x x */
|
||||
#define RTC_HRS_24_BCD_MASK 0x3f /* - - x x x x x x */
|
||||
#define RTC_MDAY_BCD_MASK 0x3f /* - - x x x x x x */
|
||||
#define RTC_MONTH_BCD_MASK 0x1f /* - - - x x x x x */
|
||||
#define RTC_YEAR_BCD_MASK 0xff /* x x x x x x x x */
|
||||
|
||||
/*
|
||||
* Bit masks for the Time registers in BIN Mode (DM = 1).
|
||||
*/
|
||||
#define RTC_SECS_BIN_MASK 0x3f /* - - x x x x x x */
|
||||
#define RTC_MINS_BIN_MASK 0x3f /* - - x x x x x x */
|
||||
#define RTC_HRS_12_BIN_MASK 0x0f /* - - - - x x x x */
|
||||
#define RTC_HRS_24_BIN_MASK 0x1f /* - - - x x x x x */
|
||||
#define RTC_MDAY_BIN_MASK 0x1f /* - - - x x x x x */
|
||||
#define RTC_MONTH_BIN_MASK 0x0f /* - - - - x x x x */
|
||||
#define RTC_YEAR_BIN_MASK 0x7f /* - x x x x x x x */
|
||||
|
||||
/*
|
||||
* Bit masks common for the Time registers in BCD or BIN Mode.
|
||||
*/
|
||||
#define RTC_WDAY_MASK 0x07 /* - - - - - x x x */
|
||||
#define RTC_CENTURY_MASK 0xff /* x x x x x x x x */
|
||||
#define RTC_MDAY_ALARM_MASK 0xff /* x x x x x x x x */
|
||||
#define RTC_HRS_AMPM_MASK BIT(7) /* Mask for the AM/PM bit */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Control Registers.
|
||||
*/
|
||||
#define RTC_CTRL_A 0x0a /* Control Register A */
|
||||
#define RTC_CTRL_B 0x0b /* Control Register B */
|
||||
#define RTC_CTRL_C 0x0c /* Control Register C */
|
||||
#define RTC_CTRL_D 0x0d /* Control Register D */
|
||||
#define RTC_EXT_CTRL_4A 0x4a /* Extended Control Register 4A */
|
||||
#define RTC_EXT_CTRL_4B 0x4b /* Extended Control Register 4B */
|
||||
|
||||
|
||||
/*
|
||||
* Bit names in Control Register A.
|
||||
*/
|
||||
#define RTC_CTRL_A_UIP BIT(7) /* Update In Progress */
|
||||
#define RTC_CTRL_A_DV2 BIT(6) /* Countdown Chain */
|
||||
#define RTC_CTRL_A_DV1 BIT(5) /* Oscillator Enable */
|
||||
#define RTC_CTRL_A_DV0 BIT(4) /* Bank Select */
|
||||
#define RTC_CTRL_A_RS2 BIT(2) /* Rate-Selection Bit 2 */
|
||||
#define RTC_CTRL_A_RS3 BIT(3) /* Rate-Selection Bit 3 */
|
||||
#define RTC_CTRL_A_RS1 BIT(1) /* Rate-Selection Bit 1 */
|
||||
#define RTC_CTRL_A_RS0 BIT(0) /* Rate-Selection Bit 0 */
|
||||
#define RTC_CTRL_A_RS_MASK 0x0f /* RS3 + RS2 + RS1 + RS0 */
|
||||
|
||||
/*
|
||||
* Bit names in Control Register B.
|
||||
*/
|
||||
#define RTC_CTRL_B_SET BIT(7) /* SET Bit */
|
||||
#define RTC_CTRL_B_PIE BIT(6) /* Periodic-Interrupt Enable */
|
||||
#define RTC_CTRL_B_AIE BIT(5) /* Alarm-Interrupt Enable */
|
||||
#define RTC_CTRL_B_UIE BIT(4) /* Update-Ended Interrupt-Enable */
|
||||
#define RTC_CTRL_B_SQWE BIT(3) /* Square-Wave Enable */
|
||||
#define RTC_CTRL_B_DM BIT(2) /* Data Mode */
|
||||
#define RTC_CTRL_B_2412 BIT(1) /* 12-Hr/24-Hr Mode */
|
||||
#define RTC_CTRL_B_DSE BIT(0) /* Daylight Savings Enable */
|
||||
#define RTC_CTRL_B_PAU_MASK 0x70 /* PIE + AIE + UIE */
|
||||
|
||||
|
||||
/*
|
||||
* Bit names in Control Register C.
|
||||
*
|
||||
* BIT(0), BIT(1), BIT(2), & BIT(3) are unused, always return 0, and cannot
|
||||
* be written to.
|
||||
*/
|
||||
#define RTC_CTRL_C_IRQF BIT(7) /* Interrupt-Request Flag */
|
||||
#define RTC_CTRL_C_PF BIT(6) /* Periodic-Interrupt Flag */
|
||||
#define RTC_CTRL_C_AF BIT(5) /* Alarm-Interrupt Flag */
|
||||
#define RTC_CTRL_C_UF BIT(4) /* Update-Ended Interrupt Flag */
|
||||
#define RTC_CTRL_C_PAU_MASK 0x70 /* PF + AF + UF */
|
||||
|
||||
|
||||
/*
|
||||
* Bit names in Control Register D.
|
||||
*
|
||||
* BIT(0) through BIT(6) are unused, always return 0, and cannot
|
||||
* be written to.
|
||||
*/
|
||||
#define RTC_CTRL_D_VRT BIT(7) /* Valid RAM and Time */
|
||||
|
||||
|
||||
/*
|
||||
* Bit names in Extended Control Register 4A.
|
||||
*
|
||||
* On the DS1685/DS1687/DS1689/DS1693, BIT(4) and BIT(5) are reserved for
|
||||
* future use. They can be read from and written to, but have no effect
|
||||
* on the RTC's operation.
|
||||
*
|
||||
* On the DS17x85/DS17x87, BIT(5) is Burst-Mode Enable (BME), and allows
|
||||
* access to the extended NV-SRAM by automatically incrementing the address
|
||||
* register when they are read from or written to.
|
||||
*/
|
||||
#define RTC_CTRL_4A_VRT2 BIT(7) /* Auxillary Battery Status */
|
||||
#define RTC_CTRL_4A_INCR BIT(6) /* Increment-in-Progress Status */
|
||||
#define RTC_CTRL_4A_PAB BIT(3) /* Power-Active Bar Control */
|
||||
#define RTC_CTRL_4A_RF BIT(2) /* RAM-Clear Flag */
|
||||
#define RTC_CTRL_4A_WF BIT(1) /* Wake-Up Alarm Flag */
|
||||
#define RTC_CTRL_4A_KF BIT(0) /* Kickstart Flag */
|
||||
#if !defined(CONFIG_RTC_DRV_DS1685) && !defined(CONFIG_RTC_DRV_DS1689)
|
||||
#define RTC_CTRL_4A_BME BIT(5) /* Burst-Mode Enable */
|
||||
#endif
|
||||
#define RTC_CTRL_4A_RWK_MASK 0x07 /* RF + WF + KF */
|
||||
|
||||
|
||||
/*
|
||||
* Bit names in Extended Control Register 4B.
|
||||
*/
|
||||
#define RTC_CTRL_4B_ABE BIT(7) /* Auxillary Battery Enable */
|
||||
#define RTC_CTRL_4B_E32K BIT(6) /* Enable 32.768Hz on SQW Pin */
|
||||
#define RTC_CTRL_4B_CS BIT(5) /* Crystal Select */
|
||||
#define RTC_CTRL_4B_RCE BIT(4) /* RAM Clear-Enable */
|
||||
#define RTC_CTRL_4B_PRS BIT(3) /* PAB Reset-Select */
|
||||
#define RTC_CTRL_4B_RIE BIT(2) /* RAM Clear-Interrupt Enable */
|
||||
#define RTC_CTRL_4B_WIE BIT(1) /* Wake-Up Alarm-Interrupt Enable */
|
||||
#define RTC_CTRL_4B_KSE BIT(0) /* Kickstart Interrupt-Enable */
|
||||
#define RTC_CTRL_4B_RWK_MASK 0x07 /* RIE + WIE + KSE */
|
||||
|
||||
|
||||
/*
|
||||
* Misc register names in Bank 1.
|
||||
*
|
||||
* The DV0 bit in Control Register A must be set to 1 for these registers
|
||||
* to become available, including Extended Control Registers 4A & 4B.
|
||||
*/
|
||||
#define RTC_BANK1_SSN_MODEL 0x40 /* Model Number */
|
||||
#define RTC_BANK1_SSN_BYTE_1 0x41 /* 1st Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_BYTE_2 0x42 /* 2nd Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_BYTE_3 0x43 /* 3rd Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_BYTE_4 0x44 /* 4th Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_BYTE_5 0x45 /* 5th Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_BYTE_6 0x46 /* 6th Byte of Serial Number */
|
||||
#define RTC_BANK1_SSN_CRC 0x47 /* Serial CRC Byte */
|
||||
#define RTC_BANK1_RAM_DATA_PORT 0x53 /* Extended RAM Data Port */
|
||||
|
||||
|
||||
/*
|
||||
* Model-specific registers in Bank 1.
|
||||
*
|
||||
* The addresses below differ depending on the model of the RTC chip
|
||||
* selected in the kernel configuration. Not all of these features are
|
||||
* supported in the main driver at present.
|
||||
*
|
||||
* DS1685/DS1687 - Extended NV-SRAM address (LSB only).
|
||||
* DS1689/DS1693 - Vcc, Vbat, Pwr Cycle Counters & Customer-specific S/N.
|
||||
* DS17x85/DS17x87 - Extended NV-SRAM addresses (MSB & LSB) & Write counter.
|
||||
*/
|
||||
#if defined(CONFIG_RTC_DRV_DS1685)
|
||||
#define RTC_BANK1_RAM_ADDR 0x50 /* NV-SRAM Addr */
|
||||
#elif defined(CONFIG_RTC_DRV_DS1689)
|
||||
#define RTC_BANK1_VCC_CTR_LSB 0x54 /* Vcc Counter Addr (LSB) */
|
||||
#define RTC_BANK1_VCC_CTR_MSB 0x57 /* Vcc Counter Addr (MSB) */
|
||||
#define RTC_BANK1_VBAT_CTR_LSB 0x58 /* Vbat Counter Addr (LSB) */
|
||||
#define RTC_BANK1_VBAT_CTR_MSB 0x5b /* Vbat Counter Addr (MSB) */
|
||||
#define RTC_BANK1_PWR_CTR_LSB 0x5c /* Pwr Cycle Counter Addr (LSB) */
|
||||
#define RTC_BANK1_PWR_CTR_MSB 0x5d /* Pwr Cycle Counter Addr (MSB) */
|
||||
#define RTC_BANK1_UNIQ_SN 0x60 /* Customer-specific S/N */
|
||||
#else /* DS17x85/DS17x87 */
|
||||
#define RTC_BANK1_RAM_ADDR_LSB 0x50 /* NV-SRAM Addr (LSB) */
|
||||
#define RTC_BANK1_RAM_ADDR_MSB 0x51 /* NV-SRAM Addr (MSB) */
|
||||
#define RTC_BANK1_WRITE_CTR 0x5e /* RTC Write Counter */
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Model numbers.
|
||||
*
|
||||
* The DS1688/DS1691 and DS1689/DS1693 chips share the same model number
|
||||
* and the manual doesn't indicate any major differences. As such, they
|
||||
* are regarded as the same chip in this driver.
|
||||
*/
|
||||
#define RTC_MODEL_DS1685 0x71 /* DS1685/DS1687 */
|
||||
#define RTC_MODEL_DS17285 0x72 /* DS17285/DS17287 */
|
||||
#define RTC_MODEL_DS1689 0x73 /* DS1688/DS1691/DS1689/DS1693 */
|
||||
#define RTC_MODEL_DS17485 0x74 /* DS17485/DS17487 */
|
||||
#define RTC_MODEL_DS17885 0x78 /* DS17885/DS17887 */
|
||||
|
||||
|
||||
/*
|
||||
* Periodic Interrupt Rates / Square-Wave Output Frequency
|
||||
*
|
||||
* Periodic rates are selected by setting the RS3-RS0 bits in Control
|
||||
* Register A and enabled via either the E32K bit in Extended Control
|
||||
* Register 4B or the SQWE bit in Control Register B.
|
||||
*
|
||||
* E32K overrides the settings of RS3-RS0 and outputs a frequency of 32768Hz
|
||||
* on the SQW pin of the RTC chip. While there are 16 possible selections,
|
||||
* the 1-of-16 decoder is only able to divide the base 32768Hz signal into 13
|
||||
* smaller frequencies. The values 0x01 and 0x02 are not used and are
|
||||
* synonymous with 0x08 and 0x09, respectively.
|
||||
*
|
||||
* When E32K is set to a logic 1, periodic interrupts are disabled and reading
|
||||
* /dev/rtc will return -EINVAL. This also applies if the periodic interrupt
|
||||
* frequency is set to 0Hz.
|
||||
*
|
||||
* Not currently used by the rtc-ds1685 driver because the RTC core removed
|
||||
* support for hardware-generated periodic-interrupts in favour of
|
||||
* hrtimer-generated interrupts. But these defines are kept around for use
|
||||
* in userland, as documentation to the hardware, and possible future use if
|
||||
* hardware-generated periodic interrupts are ever added back.
|
||||
*/
|
||||
/* E32K RS3 RS2 RS1 RS0 */
|
||||
#define RTC_SQW_8192HZ 0x03 /* 0 0 0 1 1 */
|
||||
#define RTC_SQW_4096HZ 0x04 /* 0 0 1 0 0 */
|
||||
#define RTC_SQW_2048HZ 0x05 /* 0 0 1 0 1 */
|
||||
#define RTC_SQW_1024HZ 0x06 /* 0 0 1 1 0 */
|
||||
#define RTC_SQW_512HZ 0x07 /* 0 0 1 1 1 */
|
||||
#define RTC_SQW_256HZ 0x08 /* 0 1 0 0 0 */
|
||||
#define RTC_SQW_128HZ 0x09 /* 0 1 0 0 1 */
|
||||
#define RTC_SQW_64HZ 0x0a /* 0 1 0 1 0 */
|
||||
#define RTC_SQW_32HZ 0x0b /* 0 1 0 1 1 */
|
||||
#define RTC_SQW_16HZ 0x0c /* 0 1 1 0 0 */
|
||||
#define RTC_SQW_8HZ 0x0d /* 0 1 1 0 1 */
|
||||
#define RTC_SQW_4HZ 0x0e /* 0 1 1 1 0 */
|
||||
#define RTC_SQW_2HZ 0x0f /* 0 1 1 1 1 */
|
||||
#define RTC_SQW_0HZ 0x00 /* 0 0 0 0 0 */
|
||||
#define RTC_SQW_32768HZ 32768 /* 1 - - - - */
|
||||
#define RTC_MAX_USER_FREQ 8192
|
||||
|
||||
|
||||
/*
|
||||
* NVRAM data & addresses:
|
||||
* - 50 bytes of NVRAM are available just past the clock registers.
|
||||
* - 64 additional bytes are available in Bank0.
|
||||
*
|
||||
* Extended, battery-backed NV-SRAM:
|
||||
* - DS1685/DS1687 - 128 bytes.
|
||||
* - DS1689/DS1693 - 0 bytes.
|
||||
* - DS17285/DS17287 - 2048 bytes.
|
||||
* - DS17485/DS17487 - 4096 bytes.
|
||||
* - DS17885/DS17887 - 8192 bytes.
|
||||
*/
|
||||
#define NVRAM_TIME_BASE 0x0e /* NVRAM Addr in Time regs */
|
||||
#define NVRAM_BANK0_BASE 0x40 /* NVRAM Addr in Bank0 regs */
|
||||
#define NVRAM_SZ_TIME 50
|
||||
#define NVRAM_SZ_BANK0 64
|
||||
#if defined(CONFIG_RTC_DRV_DS1685)
|
||||
# define NVRAM_SZ_EXTND 128
|
||||
#elif defined(CONFIG_RTC_DRV_DS1689)
|
||||
# define NVRAM_SZ_EXTND 0
|
||||
#elif defined(CONFIG_RTC_DRV_DS17285)
|
||||
# define NVRAM_SZ_EXTND 2048
|
||||
#elif defined(CONFIG_RTC_DRV_DS17485)
|
||||
# define NVRAM_SZ_EXTND 4096
|
||||
#elif defined(CONFIG_RTC_DRV_DS17885)
|
||||
# define NVRAM_SZ_EXTND 8192
|
||||
#endif
|
||||
#define NVRAM_TOTAL_SZ_BANK0 (NVRAM_SZ_TIME + NVRAM_SZ_BANK0)
|
||||
#define NVRAM_TOTAL_SZ (NVRAM_TOTAL_SZ_BANK0 + NVRAM_SZ_EXTND)
|
||||
|
||||
|
||||
/*
|
||||
* Function Prototypes.
|
||||
*/
|
||||
extern void __noreturn
|
||||
ds1685_rtc_poweroff(struct platform_device *pdev);
|
||||
|
||||
#endif /* _LINUX_RTC_DS1685_H_ */
|
@ -14,7 +14,7 @@ config BITREVERSE
|
||||
tristate
|
||||
|
||||
config HAVE_ARCH_BITREVERSE
|
||||
boolean
|
||||
bool
|
||||
default n
|
||||
depends on BITREVERSE
|
||||
help
|
||||
|
@ -55,7 +55,6 @@ obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
|
||||
obj-$(CONFIG_KASAN) += kasan/
|
||||
obj-$(CONFIG_FAILSLAB) += failslab.o
|
||||
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
||||
obj-$(CONFIG_FS_XIP) += filemap_xip.o
|
||||
obj-$(CONFIG_MIGRATION) += migrate.o
|
||||
obj-$(CONFIG_QUICKLIST) += quicklist.o
|
||||
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
|
||||
|
@ -28,6 +28,7 @@
|
||||
SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
|
||||
{
|
||||
struct fd f = fdget(fd);
|
||||
struct inode *inode;
|
||||
struct address_space *mapping;
|
||||
struct backing_dev_info *bdi;
|
||||
loff_t endbyte; /* inclusive */
|
||||
@ -39,7 +40,8 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
||||
if (S_ISFIFO(file_inode(f.file)->i_mode)) {
|
||||
inode = file_inode(f.file);
|
||||
if (S_ISFIFO(inode->i_mode)) {
|
||||
ret = -ESPIPE;
|
||||
goto out;
|
||||
}
|
||||
@ -50,7 +52,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (mapping->a_ops->get_xip_mem) {
|
||||
if (IS_DAX(inode)) {
|
||||
switch (advice) {
|
||||
case POSIX_FADV_NORMAL:
|
||||
case POSIX_FADV_RANDOM:
|
||||
|
25
mm/filemap.c
25
mm/filemap.c
@ -1695,8 +1695,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
loff_t *ppos = &iocb->ki_pos;
|
||||
loff_t pos = *ppos;
|
||||
|
||||
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
|
||||
if (file->f_flags & O_DIRECT) {
|
||||
if (io_is_direct(file)) {
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
size_t count = iov_iter_count(iter);
|
||||
@ -1723,9 +1722,11 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* we've already read everything we wanted to, or if
|
||||
* there was a short read because we hit EOF, go ahead
|
||||
* and return. Otherwise fallthrough to buffered io for
|
||||
* the rest of the read.
|
||||
* the rest of the read. Buffered reads will not work for
|
||||
* DAX files, so don't bother trying.
|
||||
*/
|
||||
if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
|
||||
if (retval < 0 || !iov_iter_count(iter) || *ppos >= size ||
|
||||
IS_DAX(inode)) {
|
||||
file_accessed(file);
|
||||
goto out;
|
||||
}
|
||||
@ -2582,18 +2583,20 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
|
||||
if (unlikely(file->f_flags & O_DIRECT)) {
|
||||
if (io_is_direct(file)) {
|
||||
loff_t endbyte;
|
||||
|
||||
written = generic_file_direct_write(iocb, from, pos);
|
||||
if (written < 0 || written == count)
|
||||
/*
|
||||
* If the write stopped short of completing, fall back to
|
||||
* buffered writes. Some filesystems do this for writes to
|
||||
* holes, for example. For DAX files, a buffered write will
|
||||
* not succeed (even if it did, DAX does not handle dirty
|
||||
* page-cache pages correctly).
|
||||
*/
|
||||
if (written < 0 || written == count || IS_DAX(inode))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* direct-io write to a hole: fall through to buffered I/O
|
||||
* for completing the rest of the request.
|
||||
*/
|
||||
pos += written;
|
||||
count -= written;
|
||||
|
||||
|
478
mm/filemap_xip.c
478
mm/filemap_xip.c
@ -1,478 +0,0 @@
|
||||
/*
|
||||
* linux/mm/filemap_xip.c
|
||||
*
|
||||
* Copyright (C) 2005 IBM Corporation
|
||||
* Author: Carsten Otte <cotte@de.ibm.com>
|
||||
*
|
||||
* derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
/*
|
||||
* We do use our own empty page to avoid interference with other users
|
||||
* of ZERO_PAGE(), such as /dev/zero
|
||||
*/
|
||||
static DEFINE_MUTEX(xip_sparse_mutex);
|
||||
static seqcount_t xip_sparse_seq = SEQCNT_ZERO(xip_sparse_seq);
|
||||
static struct page *__xip_sparse_page;
|
||||
|
||||
/* called under xip_sparse_mutex */
|
||||
static struct page *xip_sparse_page(void)
|
||||
{
|
||||
if (!__xip_sparse_page) {
|
||||
struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
|
||||
|
||||
if (page)
|
||||
__xip_sparse_page = page;
|
||||
}
|
||||
return __xip_sparse_page;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a file read routine for execute in place files, and uses
|
||||
* the mapping->a_ops->get_xip_mem() function for the actual low-level
|
||||
* stuff.
|
||||
*
|
||||
* Note the struct file* is not used at all. It may be NULL.
|
||||
*/
|
||||
static ssize_t
|
||||
do_xip_mapping_read(struct address_space *mapping,
|
||||
struct file_ra_state *_ra,
|
||||
struct file *filp,
|
||||
char __user *buf,
|
||||
size_t len,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
pgoff_t index, end_index;
|
||||
unsigned long offset;
|
||||
loff_t isize, pos;
|
||||
size_t copied = 0, error = 0;
|
||||
|
||||
BUG_ON(!mapping->a_ops->get_xip_mem);
|
||||
|
||||
pos = *ppos;
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
offset = pos & ~PAGE_CACHE_MASK;
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (!isize)
|
||||
goto out;
|
||||
|
||||
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
|
||||
do {
|
||||
unsigned long nr, left;
|
||||
void *xip_mem;
|
||||
unsigned long xip_pfn;
|
||||
int zero = 0;
|
||||
|
||||
/* nr is the maximum number of bytes to copy from this page */
|
||||
nr = PAGE_CACHE_SIZE;
|
||||
if (index >= end_index) {
|
||||
if (index > end_index)
|
||||
goto out;
|
||||
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
|
||||
if (nr <= offset) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
nr = nr - offset;
|
||||
if (nr > len - copied)
|
||||
nr = len - copied;
|
||||
|
||||
error = mapping->a_ops->get_xip_mem(mapping, index, 0,
|
||||
&xip_mem, &xip_pfn);
|
||||
if (unlikely(error)) {
|
||||
if (error == -ENODATA) {
|
||||
/* sparse */
|
||||
zero = 1;
|
||||
} else
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If users can be writing to this page using arbitrary
|
||||
* virtual addresses, take care about potential aliasing
|
||||
* before reading the page on the kernel side.
|
||||
*/
|
||||
if (mapping_writably_mapped(mapping))
|
||||
/* address based flush */ ;
|
||||
|
||||
/*
|
||||
* Ok, we have the mem, so now we can copy it to user space...
|
||||
*
|
||||
* The actor routine returns how many bytes were actually used..
|
||||
* NOTE! This may not be the same as how much of a user buffer
|
||||
* we filled up (we may be padding etc), so we can only update
|
||||
* "pos" here (the actor routine has to update the user buffer
|
||||
* pointers and the remaining count).
|
||||
*/
|
||||
if (!zero)
|
||||
left = __copy_to_user(buf+copied, xip_mem+offset, nr);
|
||||
else
|
||||
left = __clear_user(buf + copied, nr);
|
||||
|
||||
if (left) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied += (nr - left);
|
||||
offset += (nr - left);
|
||||
index += offset >> PAGE_CACHE_SHIFT;
|
||||
offset &= ~PAGE_CACHE_MASK;
|
||||
} while (copied < len);
|
||||
|
||||
out:
|
||||
*ppos = pos + copied;
|
||||
if (filp)
|
||||
file_accessed(filp);
|
||||
|
||||
return (copied ? copied : error);
|
||||
}
|
||||
|
||||
ssize_t
|
||||
xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
|
||||
{
|
||||
if (!access_ok(VERIFY_WRITE, buf, len))
|
||||
return -EFAULT;
|
||||
|
||||
return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
|
||||
buf, len, ppos);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xip_file_read);
|
||||
|
||||
/*
|
||||
* __xip_unmap is invoked from xip_unmap and xip_write
|
||||
*
|
||||
* This function walks all vmas of the address_space and unmaps the
|
||||
* __xip_sparse_page when found at pgoff.
|
||||
*/
|
||||
static void __xip_unmap(struct address_space * mapping, unsigned long pgoff)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct page *page;
|
||||
unsigned count;
|
||||
int locked = 0;
|
||||
|
||||
count = read_seqcount_begin(&xip_sparse_seq);
|
||||
|
||||
page = __xip_sparse_page;
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
retry:
|
||||
i_mmap_lock_read(mapping);
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
pte_t *pte, pteval;
|
||||
spinlock_t *ptl;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long address = vma->vm_start +
|
||||
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
|
||||
|
||||
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
||||
pte = page_check_address(page, mm, address, &ptl, 1);
|
||||
if (pte) {
|
||||
/* Nuke the page table entry. */
|
||||
flush_cache_page(vma, address, pte_pfn(*pte));
|
||||
pteval = ptep_clear_flush(vma, address, pte);
|
||||
page_remove_rmap(page);
|
||||
dec_mm_counter(mm, MM_FILEPAGES);
|
||||
BUG_ON(pte_dirty(pteval));
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
/* must invalidate_page _before_ freeing the page */
|
||||
mmu_notifier_invalidate_page(mm, address);
|
||||
page_cache_release(page);
|
||||
}
|
||||
}
|
||||
i_mmap_unlock_read(mapping);
|
||||
|
||||
if (locked) {
|
||||
mutex_unlock(&xip_sparse_mutex);
|
||||
} else if (read_seqcount_retry(&xip_sparse_seq, count)) {
|
||||
mutex_lock(&xip_sparse_mutex);
|
||||
locked = 1;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* xip_fault() is invoked via the vma operations vector for a
|
||||
* mapped memory region to read in file data during a page fault.
|
||||
*
|
||||
* This function is derived from filemap_fault, but used for execute in place
|
||||
*/
|
||||
static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
pgoff_t size;
|
||||
void *xip_mem;
|
||||
unsigned long xip_pfn;
|
||||
struct page *page;
|
||||
int error;
|
||||
|
||||
/* XXX: are VM_FAULT_ codes OK? */
|
||||
again:
|
||||
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
if (vmf->pgoff >= size)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
|
||||
&xip_mem, &xip_pfn);
|
||||
if (likely(!error))
|
||||
goto found;
|
||||
if (error != -ENODATA)
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
/* sparse block */
|
||||
if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
|
||||
(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) &&
|
||||
(!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
|
||||
int err;
|
||||
|
||||
/* maybe shared writable, allocate new block */
|
||||
mutex_lock(&xip_sparse_mutex);
|
||||
error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
|
||||
&xip_mem, &xip_pfn);
|
||||
mutex_unlock(&xip_sparse_mutex);
|
||||
if (error)
|
||||
return VM_FAULT_SIGBUS;
|
||||
/* unmap sparse mappings at pgoff from all other vmas */
|
||||
__xip_unmap(mapping, vmf->pgoff);
|
||||
|
||||
found:
|
||||
err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
|
||||
xip_pfn);
|
||||
if (err == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
/*
|
||||
* err == -EBUSY is fine, we've raced against another thread
|
||||
* that faulted-in the same page
|
||||
*/
|
||||
if (err != -EBUSY)
|
||||
BUG_ON(err);
|
||||
return VM_FAULT_NOPAGE;
|
||||
} else {
|
||||
int err, ret = VM_FAULT_OOM;
|
||||
|
||||
mutex_lock(&xip_sparse_mutex);
|
||||
write_seqcount_begin(&xip_sparse_seq);
|
||||
error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
|
||||
&xip_mem, &xip_pfn);
|
||||
if (unlikely(!error)) {
|
||||
write_seqcount_end(&xip_sparse_seq);
|
||||
mutex_unlock(&xip_sparse_mutex);
|
||||
goto again;
|
||||
}
|
||||
if (error != -ENODATA)
|
||||
goto out;
|
||||
/* not shared and writable, use xip_sparse_page() */
|
||||
page = xip_sparse_page();
|
||||
if (!page)
|
||||
goto out;
|
||||
err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
|
||||
page);
|
||||
if (err == -ENOMEM)
|
||||
goto out;
|
||||
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
out:
|
||||
write_seqcount_end(&xip_sparse_seq);
|
||||
mutex_unlock(&xip_sparse_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct xip_file_vm_ops = {
|
||||
.fault = xip_file_fault,
|
||||
.page_mkwrite = filemap_page_mkwrite,
|
||||
};
|
||||
|
||||
int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
|
||||
{
|
||||
BUG_ON(!file->f_mapping->a_ops->get_xip_mem);
|
||||
|
||||
file_accessed(file);
|
||||
vma->vm_ops = &xip_file_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xip_file_mmap);
|
||||
|
||||
static ssize_t
|
||||
__xip_file_write(struct file *filp, const char __user *buf,
|
||||
size_t count, loff_t pos, loff_t *ppos)
|
||||
{
|
||||
struct address_space * mapping = filp->f_mapping;
|
||||
const struct address_space_operations *a_ops = mapping->a_ops;
|
||||
struct inode *inode = mapping->host;
|
||||
long status = 0;
|
||||
size_t bytes;
|
||||
ssize_t written = 0;
|
||||
|
||||
BUG_ON(!mapping->a_ops->get_xip_mem);
|
||||
|
||||
do {
|
||||
unsigned long index;
|
||||
unsigned long offset;
|
||||
size_t copied;
|
||||
void *xip_mem;
|
||||
unsigned long xip_pfn;
|
||||
|
||||
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
bytes = PAGE_CACHE_SIZE - offset;
|
||||
if (bytes > count)
|
||||
bytes = count;
|
||||
|
||||
status = a_ops->get_xip_mem(mapping, index, 0,
|
||||
&xip_mem, &xip_pfn);
|
||||
if (status == -ENODATA) {
|
||||
/* we allocate a new page unmap it */
|
||||
mutex_lock(&xip_sparse_mutex);
|
||||
status = a_ops->get_xip_mem(mapping, index, 1,
|
||||
&xip_mem, &xip_pfn);
|
||||
mutex_unlock(&xip_sparse_mutex);
|
||||
if (!status)
|
||||
/* unmap page at pgoff from all other vmas */
|
||||
__xip_unmap(mapping, index);
|
||||
}
|
||||
|
||||
if (status)
|
||||
break;
|
||||
|
||||
copied = bytes -
|
||||
__copy_from_user_nocache(xip_mem + offset, buf, bytes);
|
||||
|
||||
if (likely(copied > 0)) {
|
||||
status = copied;
|
||||
|
||||
if (status >= 0) {
|
||||
written += status;
|
||||
count -= status;
|
||||
pos += status;
|
||||
buf += status;
|
||||
}
|
||||
}
|
||||
if (unlikely(copied != bytes))
|
||||
if (status >= 0)
|
||||
status = -EFAULT;
|
||||
if (status < 0)
|
||||
break;
|
||||
} while (count);
|
||||
*ppos = pos;
|
||||
/*
|
||||
* No need to use i_size_read() here, the i_size
|
||||
* cannot change under us because we hold i_mutex.
|
||||
*/
|
||||
if (pos > inode->i_size) {
|
||||
i_size_write(inode, pos);
|
||||
mark_inode_dirty(inode);
|
||||
}
|
||||
|
||||
return written ? written : status;
|
||||
}
|
||||
|
||||
ssize_t
|
||||
xip_file_write(struct file *filp, const char __user *buf, size_t len,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct address_space *mapping = filp->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
size_t count;
|
||||
loff_t pos;
|
||||
ssize_t ret;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
if (!access_ok(VERIFY_READ, buf, len)) {
|
||||
ret=-EFAULT;
|
||||
goto out_up;
|
||||
}
|
||||
|
||||
pos = *ppos;
|
||||
count = len;
|
||||
|
||||
/* We can write back this queue in page reclaim */
|
||||
current->backing_dev_info = inode_to_bdi(inode);
|
||||
|
||||
ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
|
||||
if (ret)
|
||||
goto out_backing;
|
||||
if (count == 0)
|
||||
goto out_backing;
|
||||
|
||||
ret = file_remove_suid(filp);
|
||||
if (ret)
|
||||
goto out_backing;
|
||||
|
||||
ret = file_update_time(filp);
|
||||
if (ret)
|
||||
goto out_backing;
|
||||
|
||||
ret = __xip_file_write (filp, buf, count, pos, ppos);
|
||||
|
||||
out_backing:
|
||||
current->backing_dev_info = NULL;
|
||||
out_up:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xip_file_write);
|
||||
|
||||
/*
|
||||
* truncate a page used for execute in place
|
||||
* functionality is analog to block_truncate_page but does use get_xip_mem
|
||||
* to get the page instead of page cache
|
||||
*/
|
||||
int
|
||||
xip_truncate_page(struct address_space *mapping, loff_t from)
|
||||
{
|
||||
pgoff_t index = from >> PAGE_CACHE_SHIFT;
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
unsigned blocksize;
|
||||
unsigned length;
|
||||
void *xip_mem;
|
||||
unsigned long xip_pfn;
|
||||
int err;
|
||||
|
||||
BUG_ON(!mapping->a_ops->get_xip_mem);
|
||||
|
||||
blocksize = 1 << mapping->host->i_blkbits;
|
||||
length = offset & (blocksize - 1);
|
||||
|
||||
/* Block boundary? Nothing to do */
|
||||
if (!length)
|
||||
return 0;
|
||||
|
||||
length = blocksize - length;
|
||||
|
||||
err = mapping->a_ops->get_xip_mem(mapping, index, 0,
|
||||
&xip_mem, &xip_pfn);
|
||||
if (unlikely(err)) {
|
||||
if (err == -ENODATA)
|
||||
/* Hole? No need to truncate */
|
||||
return 0;
|
||||
else
|
||||
return err;
|
||||
}
|
||||
memset(xip_mem + offset, 0, length);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xip_truncate_page);
|
@ -239,7 +239,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
|
||||
return -EBADF;
|
||||
#endif
|
||||
|
||||
if (file->f_mapping->a_ops->get_xip_mem) {
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
/* no bad return value, but ignore advice */
|
||||
return 0;
|
||||
}
|
||||
|
42
mm/memory.c
42
mm/memory.c
@ -1965,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
|
||||
vmf.pgoff = page->index;
|
||||
vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
|
||||
vmf.page = page;
|
||||
vmf.cow_page = NULL;
|
||||
|
||||
ret = vma->vm_ops->page_mkwrite(vma, &vmf);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
|
||||
@ -2329,6 +2330,7 @@ void unmap_mapping_range(struct address_space *mapping,
|
||||
details.last_index = ULONG_MAX;
|
||||
|
||||
|
||||
/* DAX uses i_mmap_lock to serialise file truncate vs page fault */
|
||||
i_mmap_lock_write(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
@ -2638,7 +2640,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
* See filemap_fault() and __lock_page_retry().
|
||||
*/
|
||||
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
pgoff_t pgoff, unsigned int flags, struct page **page)
|
||||
pgoff_t pgoff, unsigned int flags,
|
||||
struct page *cow_page, struct page **page)
|
||||
{
|
||||
struct vm_fault vmf;
|
||||
int ret;
|
||||
@ -2647,10 +2650,13 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
vmf.pgoff = pgoff;
|
||||
vmf.flags = flags;
|
||||
vmf.page = NULL;
|
||||
vmf.cow_page = cow_page;
|
||||
|
||||
ret = vma->vm_ops->fault(vma, &vmf);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
if (!vmf.page)
|
||||
goto out;
|
||||
|
||||
if (unlikely(PageHWPoison(vmf.page))) {
|
||||
if (ret & VM_FAULT_LOCKED)
|
||||
@ -2664,6 +2670,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
else
|
||||
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);
|
||||
|
||||
out:
|
||||
*page = vmf.page;
|
||||
return ret;
|
||||
}
|
||||
@ -2834,7 +2841,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
}
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
|
||||
@ -2874,26 +2881,43 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
goto uncharge_out;
|
||||
|
||||
copy_user_highpage(new_page, fault_page, address, vma);
|
||||
if (fault_page)
|
||||
copy_user_highpage(new_page, fault_page, address, vma);
|
||||
__SetPageUptodate(new_page);
|
||||
|
||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
||||
if (unlikely(!pte_same(*pte, orig_pte))) {
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
unlock_page(fault_page);
|
||||
page_cache_release(fault_page);
|
||||
if (fault_page) {
|
||||
unlock_page(fault_page);
|
||||
page_cache_release(fault_page);
|
||||
} else {
|
||||
/*
|
||||
* The fault handler has no page to lock, so it holds
|
||||
* i_mmap_lock for read to protect against truncate.
|
||||
*/
|
||||
i_mmap_unlock_read(vma->vm_file->f_mapping);
|
||||
}
|
||||
goto uncharge_out;
|
||||
}
|
||||
do_set_pte(vma, address, new_page, pte, true, true);
|
||||
mem_cgroup_commit_charge(new_page, memcg, false);
|
||||
lru_cache_add_active_or_unevictable(new_page, vma);
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
unlock_page(fault_page);
|
||||
page_cache_release(fault_page);
|
||||
if (fault_page) {
|
||||
unlock_page(fault_page);
|
||||
page_cache_release(fault_page);
|
||||
} else {
|
||||
/*
|
||||
* The fault handler has no page to lock, so it holds
|
||||
* i_mmap_lock for read to protect against truncate.
|
||||
*/
|
||||
i_mmap_unlock_read(vma->vm_file->f_mapping);
|
||||
}
|
||||
return ret;
|
||||
uncharge_out:
|
||||
mem_cgroup_cancel_charge(new_page, memcg);
|
||||
@ -2912,7 +2936,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
int dirtied = 0;
|
||||
int ret, tmp;
|
||||
|
||||
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
|
||||
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
|
||||
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
|
||||
return ret;
|
||||
|
||||
|
@ -28,7 +28,6 @@ If no config files are specified, .config and .config.old are used.
|
||||
Example usage:
|
||||
$ diffconfig .config config-with-some-changes
|
||||
-EXT2_FS_XATTR n
|
||||
-EXT2_FS_XIP n
|
||||
CRAMFS n -> y
|
||||
EXT2_FS y -> n
|
||||
LOG_BUF_SHIFT 14 -> 16
|
||||
|
Loading…
Reference in New Issue
Block a user