From 97f100277cfdcd268f0cf3d83bb6e4d1a345bc80 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Fri, 11 Dec 2015 16:45:03 -0500 Subject: [PATCH] Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general AV dislikes many parts of orangefs_devreq_writev. Besides making orangefs_devreq_writev more easily readable and better commented, this patch makes an effort to address some of the problems: > The 5th is quietly ignored unless trailer_size is positive and > status is zero. If trailer_size > 0 && status == 0, you verify that > the length of the 5th segment is no more than trailer_size and copy > it to vmalloc'ed buffer. Without bothering to zero the rest of that > buffer out. It was just wrong to allow a 5th segment that is not exactly equal to trailer_size. Now that that's fixed, there's nothing to zero out in the vmalloced buffer - it is exactly the right size to hold the 5th segment. > Another API bogosity: when the 5th segment is present, successful writev() > returns the sum of sizes of the first 4. Added size of 5th segment to writev return... > if concatenation of the first 4 segments is longer than > 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine > and proceed with garbage. If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail. > if the 32bit value 4 bytes into op->downcall is zero and 64bit > value following it is non-zero, the latter is interpreted as the size of > trailer data. The latter is what userspace claimed was the length of the trailer data. The kernel module now compares it to the trailer iovec's iov_len as a sanity check. > if there's no trailer, the 5th segment (if present) is completely ignored. Whine and fail if there should be no trailer, yet a 5th segment is present. > if vmalloc fails, act as if status (32bit at offset 5 into > op->downcall) had been -ENOMEM and don't look at the 5th segment at all. whine and fail with -ENOMEM. Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 279 +++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 106 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index e74938d575d6..b182b025db86 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -76,11 +76,12 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file) int ret = -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { - gossip_err("orangefs: device cannot be opened in blocking mode\n"); + gossip_err("%s: device cannot be opened in blocking mode\n", + __func__); goto out; } ret = -EACCES; - gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n"); + gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n"); mutex_lock(&devreq_mutex); if (open_access_count == 0) { @@ -100,6 +101,7 @@ static int orangefs_devreq_open(struct inode *inode, struct file *file) return ret; } +/* Function for read() callers into the device */ static ssize_t orangefs_devreq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) @@ -112,7 +114,8 @@ static ssize_t orangefs_devreq_read(struct file *file, /* We do not support blocking IO. */ if (!(file->f_flags & O_NONBLOCK)) { - gossip_err("orangefs: blocking reads are not supported! (pvfs2-client-core bug)\n"); + gossip_err("%s: blocking read from client-core.\n", + __func__); return -EINVAL; } @@ -143,12 +146,16 @@ static ssize_t orangefs_devreq_read(struct file *file, llu(op->tag), get_opname_string(op)); spin_unlock(&op->lock); continue; - /* Skip ops whose filesystem we don't know about unless - * it is being mounted. */ + /* + * Skip ops whose filesystem we don't know about unless + * it is being mounted. + */ /* XXX: is there a better way to detect this? */ } else if (ret == -1 && - !(op->upcall.type == ORANGEFS_VFS_OP_FS_MOUNT || - op->upcall.type == ORANGEFS_VFS_OP_GETATTR)) { + !(op->upcall.type == + ORANGEFS_VFS_OP_FS_MOUNT || + op->upcall.type == + ORANGEFS_VFS_OP_GETATTR)) { gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: skipping op tag %llu %s\n", llu(op->tag), get_opname_string(op)); @@ -237,7 +244,11 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EFAULT; } -/* Function for writev() callers into the device */ +/* + * Function for writev() callers into the device. Readdir related + * operations have an extra iovec containing info about objects + * contained in directories. + */ static ssize_t orangefs_devreq_writev(struct file *file, const struct iovec *iov, size_t count, @@ -247,27 +258,43 @@ static ssize_t orangefs_devreq_writev(struct file *file, void *buffer = NULL; void *ptr = NULL; unsigned long i = 0; - static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE; - int ret = 0, num_remaining = max_downsize; - int notrailer_count = 4; /* num elements in iovec without trailer */ + int num_remaining = MAX_ALIGNED_DEV_REQ_DOWNSIZE; + int ret = 0; + /* num elements in iovec without trailer */ + int notrailer_count = 4; + /* + * If there's a trailer, its iov index will be equal to + * notrailer_count. + */ + int trailer_index = notrailer_count; int payload_size = 0; + int returned_downcall_size = 0; __s32 magic = 0; __s32 proto_ver = 0; __u64 tag = 0; ssize_t total_returned_size = 0; - /* Either there is a trailer or there isn't */ + /* + * There will always be at least notrailer_count iovecs, and + * when there's a trailer, one more than notrailer_count. Check + * count's sanity. + */ if (count != notrailer_count && count != (notrailer_count + 1)) { - gossip_err("Error: Number of iov vectors is (%zu) and notrailer count is %d\n", + gossip_err("%s: count:%zu: notrailer_count :%d:\n", + __func__, count, notrailer_count); return -EPROTO; } - buffer = dev_req_alloc(); - if (!buffer) - return -ENOMEM; - ptr = buffer; + + /* Copy the non-trailer iovec data into a device request buffer. */ + buffer = dev_req_alloc(); + if (!buffer) { + gossip_err("%s: dev_req_alloc failed.\n", __func__); + return -ENOMEM; + } + ptr = buffer; for (i = 0; i < notrailer_count; i++) { if (iov[i].iov_len > num_remaining) { gossip_err @@ -292,7 +319,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, * make it 8 bytes big, or use get_unaligned when asigning. */ ptr = buffer; - proto_ver = *((__s32 *) ptr); + proto_ver = *((__s32 *) ptr); /* unused */ ptr += sizeof(__s32); magic = *((__s32 *) ptr); @@ -307,82 +334,114 @@ static ssize_t orangefs_devreq_writev(struct file *file, return -EPROTO; } - /* - * proto_ver = 20902 for 2.9.2 - */ - op = orangefs_devreq_remove_op(tag); if (op) { /* Increase ref count! */ get_op(op); - /* cut off magic and tag from payload size */ - payload_size -= (2 * sizeof(__s32) + sizeof(__u64)); - if (payload_size <= sizeof(struct orangefs_downcall_s)) - /* copy the passed in downcall into the op */ + + /* calculate the size of the returned downcall. */ + returned_downcall_size = + payload_size - (2 * sizeof(__s32) + sizeof(__u64)); + + /* copy the passed in downcall into the op */ + if (returned_downcall_size == + sizeof(struct orangefs_downcall_s)) { memcpy(&op->downcall, ptr, sizeof(struct orangefs_downcall_s)); - else - gossip_debug(GOSSIP_DEV_DEBUG, - "writev: Ignoring %d bytes\n", - payload_size); - - /* Do not allocate needlessly if client-core forgets - * to reset trailer size on op errors. - */ - if (op->downcall.status == 0 && op->downcall.trailer_size > 0) { - __u64 trailer_size = op->downcall.trailer_size; - size_t size; - gossip_debug(GOSSIP_DEV_DEBUG, - "writev: trailer size %ld\n", - (unsigned long)trailer_size); - if (count != (notrailer_count + 1)) { - gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)trailer_size, count); - dev_req_release(buffer); - put_op(op); - return -EPROTO; - } - size = iov[notrailer_count].iov_len; - if (size > trailer_size) { - gossip_err("writev error: trailer size (%ld) != iov_len (%zd)\n", (unsigned long)trailer_size, size); - dev_req_release(buffer); - put_op(op); - return -EMSGSIZE; - } - /* Allocate a buffer large enough to hold the - * trailer bytes. - */ - op->downcall.trailer_buf = vmalloc(trailer_size); - if (op->downcall.trailer_buf != NULL) { - gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", - op->downcall.trailer_buf); - ret = copy_from_user(op->downcall.trailer_buf, - iov[notrailer_count]. - iov_base, - size); - if (ret) { - gossip_err("Failed to copy trailer data from user space\n"); - dev_req_release(buffer); - gossip_debug(GOSSIP_DEV_DEBUG, - "vfree: %p\n", - op->downcall.trailer_buf); - vfree(op->downcall.trailer_buf); - op->downcall.trailer_buf = NULL; - put_op(op); - return -EIO; - } - memset(op->downcall.trailer_buf + size, 0, - trailer_size - size); - } else { - /* Change downcall status */ - op->downcall.status = -ENOMEM; - gossip_err("writev: could not vmalloc for trailer!\n"); - } + } else { + gossip_err("%s: returned downcall size:%d: \n", + __func__, + returned_downcall_size); + dev_req_release(buffer); + put_op(op); + return -EMSGSIZE; } - /* if this operation is an I/O operation and if it was - * initiated on behalf of a *synchronous* VFS I/O operation, - * only then we need to wait + /* Don't tolerate an unexpected trailer iovec. */ + if ((op->downcall.trailer_size == 0) && + (count != notrailer_count)) { + gossip_err("%s: unexpected trailer iovec.\n", + __func__); + dev_req_release(buffer); + put_op(op); + return -EPROTO; + } + + /* Don't consider the trailer if there's a bad status. */ + if (op->downcall.status != 0) + goto no_trailer; + + /* get the trailer if there is one. */ + if (op->downcall.trailer_size == 0) + goto no_trailer; + + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: op->downcall.trailer_size %lld\n", + __func__, + op->downcall.trailer_size); + + /* + * Bail if we think think there should be a trailer, but + * there's no iovec for it. + */ + if (count != (notrailer_count + 1)) { + gossip_err("%s: trailer_size:%lld: count:%zu:\n", + __func__, + op->downcall.trailer_size, + count); + dev_req_release(buffer); + put_op(op); + return -EPROTO; + } + + /* Verify that trailer_size is accurate. */ + if (op->downcall.trailer_size != iov[trailer_index].iov_len) { + gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n", + __func__, + op->downcall.trailer_size, + iov[trailer_index].iov_len); + dev_req_release(buffer); + put_op(op); + return -EMSGSIZE; + } + + total_returned_size += iov[trailer_index].iov_len; + + /* + * Allocate a buffer, copy the trailer bytes into it and + * attach it to the downcall. + */ + op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len); + if (op->downcall.trailer_buf != NULL) { + gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", + op->downcall.trailer_buf); + ret = copy_from_user(op->downcall.trailer_buf, + iov[trailer_index].iov_base, + iov[trailer_index].iov_len); + if (ret) { + gossip_err("%s: Failed to copy trailer.\n", + __func__); + dev_req_release(buffer); + gossip_debug(GOSSIP_DEV_DEBUG, + "vfree: %p\n", + op->downcall.trailer_buf); + vfree(op->downcall.trailer_buf); + op->downcall.trailer_buf = NULL; + put_op(op); + return -EIO; + } + } else { + /* Change downcall status */ + gossip_err("writev: could not vmalloc for trailer!\n"); + dev_req_release(buffer); + put_op(op); + return -ENOMEM; + } + +no_trailer: + + /* if this operation is an I/O operation we need to wait * for all data to be copied before we can return to avoid * buffer corruption and races that can pull the buffers * out from under us. @@ -392,12 +451,12 @@ static ssize_t orangefs_devreq_writev(struct file *file, * application reading/writing this device to return until * the buffers are done being used. */ - if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO && - op->upcall.req.io.async_vfs_io == ORANGEFS_VFS_SYNC_IO) { + if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) { int timed_out = 0; DECLARE_WAITQUEUE(wait_entry, current); - /* tell the vfs op waiting on a waitqueue + /* + * tell the vfs op waiting on a waitqueue * that this op is done */ spin_lock(&op->lock); @@ -423,14 +482,18 @@ static ssize_t orangefs_devreq_writev(struct file *file, MSECS_TO_JIFFIES(1000 * op_timeout_secs); if (!schedule_timeout(timeout)) { - gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n"); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: timed out.\n", + __func__); timed_out = 1; break; } continue; } - gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n"); + gossip_debug(GOSSIP_DEV_DEBUG, + "%s: signal on I/O wait, aborting\n", + __func__); break; } @@ -468,6 +531,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, "WARNING: No one's waiting for tag %llu\n", llu(tag)); } + /* put_op? */ dev_req_release(buffer); return total_returned_size; @@ -632,7 +696,8 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) return ret ? -EIO : orangefs_bufmap_initialize(&user_desc); case ORANGEFS_DEV_REMOUNT_ALL: gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: got ORANGEFS_DEV_REMOUNT_ALL\n"); + "%s: got ORANGEFS_DEV_REMOUNT_ALL\n", + __func__); /* * remount all mounted orangefs volumes to regain the lost @@ -647,13 +712,17 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) if (ret < 0) return ret; gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: priority remount in progress\n"); + "%s: priority remount in progress\n", + __func__); list_for_each(tmp, &orangefs_superblocks) { orangefs_sb = - list_entry(tmp, struct orangefs_sb_info_s, list); + list_entry(tmp, + struct orangefs_sb_info_s, + list); if (orangefs_sb && (orangefs_sb->sb)) { gossip_debug(GOSSIP_DEV_DEBUG, - "Remounting SB %p\n", + "%s: Remounting SB %p\n", + __func__, orangefs_sb); ret = orangefs_remount(orangefs_sb->sb); @@ -661,12 +730,13 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) gossip_debug(GOSSIP_DEV_DEBUG, "SB %p remount failed\n", orangefs_sb); - break; + break; } } } gossip_debug(GOSSIP_DEV_DEBUG, - "orangefs_devreq_ioctl: priority remount complete\n"); + "%s: priority remount complete\n", + __func__); mutex_unlock(&request_mutex); return ret; @@ -704,15 +774,12 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) (void __user *)arg, ORANGEFS_MAX_DEBUG_STRING_LEN); if (ret != 0) { - pr_info("%s: " - "ORANGEFS_DEV_CLIENT_STRING: copy_from_user failed" - "\n", + pr_info("%s: CLIENT_STRING: copy_from_user failed\n", __func__); return -EIO; } - pr_info("%s: client debug array string has been been received." - "\n", + pr_info("%s: client debug array string has been received.\n", __func__); if (!help_string_initialized) { @@ -722,9 +789,7 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) /* build a proper debug help string */ if (orangefs_prepare_debugfs_help_string(0)) { - gossip_err("%s: " - "prepare_debugfs_help_string failed" - "\n", + gossip_err("%s: no debug help string \n", __func__); return -EIO; } @@ -781,15 +846,17 @@ static long dispatch_ioctl_command(unsigned int command, unsigned long arg) debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); gossip_debug_mask = mask_info.mask_value; - pr_info("ORANGEFS: kernel debug mask has been modified to " + pr_info("%s: kernel debug mask has been modified to " ":%s: :%llx:\n", + __func__, kernel_debug_string, (unsigned long long)gossip_debug_mask); } else if (mask_info.mask_type == CLIENT_MASK) { debug_mask_to_string(&mask_info.mask_value, mask_info.mask_type); - pr_info("ORANGEFS: client debug mask has been modified to" + pr_info("%s: client debug mask has been modified to" ":%s: :%llx:\n", + __func__, client_debug_string, llu(mask_info.mask_value)); } else {