# This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2004/09/22 11:47:14-07:00 ysaito@ysaito1.(none) # Reduced the size of kiocb.ki_fast_iov to one. # # include/linux/aio.h # 2004/09/22 11:46:57-07:00 ysaito@ysaito1.(none) +1 -1 # Reduced the size of kiocb.ki_fast_iov to one. # # fs/read_write.c # 2004/09/22 11:46:57-07:00 ysaito@ysaito1.(none) +2 -2 # Reduced the size of kiocb.ki_fast_iov to one. # # fs/aio.c # 2004/09/22 11:46:57-07:00 ysaito@ysaito1.(none) +3 -3 # Reduced the size of kiocb.ki_fast_iov to one. # # ChangeSet # 2004/09/22 10:44:57-07:00 ysaito@ysaito1.(none) # do_sync_read, do_sync_write: Fix socket aio BUG by filling kiocb properly. # # fs/read_write.c # 2004/09/22 10:44:41-07:00 ysaito@ysaito1.(none) +8 -0 # do_sync_read, do_sync_write: Fix socket aio BUG by filling kiocb properly. # # ChangeSet # 2004/09/22 00:31:54-07:00 ysaito@ysaito1.(none) # Added PREADV and PWRITEV support to NFS. # # include/linux/nfs_fs.h # 2004/09/22 00:31:25-07:00 ysaito@ysaito1.(none) +3 -4 # Added PREADV and PWRITEV support to NFS. # # fs/nfs/file.c # 2004/09/22 00:31:25-07:00 ysaito@ysaito1.(none) +31 -8 # Added PREADV and PWRITEV support to NFS. # # fs/nfs/direct.c # 2004/09/22 00:31:25-07:00 ysaito@ysaito1.(none) +27 -27 # Added PREADV and PWRITEV support to NFS. # # ChangeSet # 2004/09/22 00:30:24-07:00 ysaito@ysaito1.(none) # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # Fixed a bug in filemap PREADV implementation. # # # net/socket.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +20 -17 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # # mm/filemap.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +15 -12 # Fixed a bug in IO_CMD_PREADV that causes some data to be skipped when aio doesn't complete immediately. # # include/linux/fs.h # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +6 -4 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # fs/reiserfs/file.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +2 -3 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # fs/pipe.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +4 -6 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # fs/ext3/file.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +7 -4 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # # fs/block_dev.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +2 -3 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # fs/aio.c # 2004/09/22 00:29:53-07:00 ysaito@ysaito1.(none) +11 -14 # Removed iov and nr_segs parametrs from aio_readv and aio_writev methods in file_operations. # # Renamed advance_iov -> increment_iov. # # ChangeSet # 2004/09/20 16:48:13-07:00 ysaito@ysaito1.(none) # __generic_file_aio_read: abort reading when the first iovec element returns incomplete. # # mm/filemap.c # 2004/09/20 16:48:01-07:00 ysaito@ysaito1.(none) +3 -1 # __generic_file_aio_read: abort reading when the first iovec element returns incomplete. # # ChangeSet # 2004/09/19 20:20:18-07:00 ysaito@starfishost1.domain_not_set.invalid # Synched with aiov. # # net/socket.c # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +52 -19 # Synched with aiov. # # mm/filemap.c # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +20 -8 # Synched with aiov. # # include/linux/fs.h # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +5 -1 # Synched with aiov. # # include/linux/aio_abi.h # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +29 -3 # Synched with aiov. # # include/linux/aio.h # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +15 -1 # Synched with aiov. # # fs/reiserfs/file.c # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +8 -0 # Synched with aiov. # # fs/pipe.c # 2004/09/19 20:20:05-07:00 ysaito@starfishost1.domain_not_set.invalid +26 -27 # Synched with aiov. # # fs/jfs/file.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +2 -0 # Synched with aiov. # # fs/ext3/file.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +11 -2 # Synched with aiov. # # fs/ext2/file.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +2 -0 # Synched with aiov. # # fs/block_dev.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +7 -0 # Synched with aiov. # # fs/bad_inode.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +3 -0 # Synched with aiov. # # fs/aio.c # 2004/09/19 20:20:04-07:00 ysaito@starfishost1.domain_not_set.invalid +108 -26 # Synched with aiov. # diff -Nru a/fs/aio.c b/fs/aio.c --- a/fs/aio.c 2004-09-22 11:48:25 -07:00 +++ b/fs/aio.c 2004-09-22 11:48:25 -07:00 @@ -457,6 +457,8 @@ req->ki_obj.user = NULL; req->ki_dtor = NULL; req->private = NULL; + if (req->ki_slow_iov) + kfree(req->ki_slow_iov); kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; @@ -1307,6 +1309,24 @@ return -EINVAL; } +static void +aio_increment_iov(struct iovec **iov_ptr, unsigned long *nr_segs, size_t nr_bytes) +{ + struct iovec *iov = *iov_ptr; + while (nr_bytes > 0) { + if (iov->iov_len <= nr_bytes) { + nr_bytes -= iov->iov_len; + iov++; + (*nr_segs)--; + } else { + iov->iov_len -= nr_bytes; + iov->iov_base = (char*)iov->iov_base + nr_bytes; + break; + } + } + BUG_ON(*nr_segs >= 9999999); + *iov_ptr = iov; +} /* * Retry method for aio_read (also used for first time submit) * Responsible for updating iocb state as retries progress @@ -1318,16 +1338,21 @@ struct inode *inode = mapping->host; ssize_t ret = 0; - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + if (iocb->ki_nr_segs == 1) { + ret = file->f_op->aio_read(iocb, iocb->ki_iov[0].iov_base, + iocb->ki_iov[0].iov_len, + iocb->ki_pos); + } else { + ret = file->f_op->aio_readv(iocb); + } /* * Can't just depend on iocb->ki_left to determine * whether we are done. This may have been a short read. */ if (ret > 0) { - iocb->ki_buf += ret; iocb->ki_left -= ret; + aio_increment_iov(&iocb->ki_iov, &iocb->ki_nr_segs, ret); /* * For pipes and sockets we return once we have * some data; for regular files we retry till we @@ -1355,13 +1380,17 @@ struct file *file = iocb->ki_filp; ssize_t ret = 0; - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + if (iocb->ki_nr_segs == 1) { + ret = file->f_op->aio_write(iocb, iocb->ki_iov[0].iov_base, + iocb->ki_iov[0].iov_len, + iocb->ki_pos); + } else { + ret = file->f_op->aio_writev(iocb); + } if (ret > 0) { - iocb->ki_buf += iocb->ki_buf ? ret : 0; iocb->ki_left -= ret; - + aio_increment_iov(&iocb->ki_iov, &iocb->ki_nr_segs, ret); ret = -EIOCBRETRY; } @@ -1400,10 +1429,20 @@ */ static ssize_t aio_poll(struct kiocb *iocb) { - unsigned events = (unsigned)(iocb->ki_buf); + unsigned events = (unsigned)(iocb->ki_iov[0].iov_base); return generic_aio_poll(iocb, events); } +static int +aio_iov_access_ok(int mode, struct kiocb *kiocb) +{ + int i; + for (i = 0; i < kiocb->ki_nr_segs; i++) + if (unlikely(!access_ok(mode, kiocb->ki_iov[i].iov_base, + kiocb->ki_iov[i].iov_len))) + return 0; + return 1; +} /* * aio_setup_iocb: * Performs the initial checks and aio retry method @@ -1416,12 +1455,12 @@ switch (kiocb->ki_opcode) { case IOCB_CMD_PREAD: + case IOCB_CMD_PREADV: ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) break; ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, - kiocb->ki_left))) + if (unlikely(!aio_iov_access_ok(VERIFY_WRITE, kiocb))) break; ret = security_file_permission(file, MAY_READ); if (ret) @@ -1431,12 +1470,12 @@ kiocb->ki_retry = aio_pread; break; case IOCB_CMD_PWRITE: + case IOCB_CMD_PWRITEV: ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_WRITE))) break; ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, - kiocb->ki_left))) + if (unlikely(!aio_iov_access_ok(VERIFY_READ, kiocb))) break; ret = security_file_permission(file, MAY_WRITE); if (ret) @@ -1514,16 +1553,6 @@ return -EINVAL; } - /* prevent overflows */ - if (unlikely( - (iocb->aio_buf != (unsigned long)iocb->aio_buf) || - (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) || - ((ssize_t)iocb->aio_nbytes < 0) - )) { - pr_debug("EINVAL: io_submit: overflow check\n"); - return -EINVAL; - } - file = fget(iocb->aio_fildes); if (unlikely(!file)) return -EBADF; @@ -1544,11 +1573,61 @@ req->ki_obj.user = user_iocb; req->ki_user_data = iocb->aio_data; - req->ki_pos = iocb->aio_offset; - - req->ki_buf = (char *)(unsigned long)iocb->aio_buf; - req->ki_left = req->ki_nbytes = iocb->aio_nbytes; req->ki_opcode = iocb->aio_lio_opcode; + + req->ki_slow_iov = NULL; + + switch (iocb->aio_lio_opcode) { + case IOCB_CMD_PREADV: + /* FALLTHROUGH */ + case IOCB_CMD_PWRITEV: + ret = -EINVAL; + req->ki_pos = iocb->u.v.offset; + req->ki_nr_segs = iocb->u.v.nr; + req->ki_iov = &req->ki_fast_iov; + if (req->ki_nr_segs > 1) { + if (req->ki_nr_segs >= UIO_MAXIOV) + goto out_put_req; + req->ki_slow_iov = kmalloc(sizeof(struct iovec) * req->ki_nr_segs, GFP_KERNEL); + req->ki_iov = req->ki_slow_iov; + } + ret = -EFAULT; + if (unlikely(copy_from_user(req->ki_iov, iocb->u.v.vec, + sizeof(struct iovec) * req->ki_nr_segs))) + goto out_put_req; + /* Compute the total length; also make sure that the + length isn't ridiculuously large. */ + { + int i; + ssize_t tot_len = 0; + ret = -EINVAL; + for (i = 0; i < req->ki_nr_segs; i++) { + ssize_t len = (ssize_t)req->ki_iov[i].iov_len; + tot_len += len; + if (len < 0 || tot_len < 0) + // overflow + goto out_put_req; + } + req->ki_nbytes = tot_len; + } + break; + default: + /* prevent overflows */ + ret = -EINVAL; + if (unlikely((iocb->u.c.buf != (unsigned long)iocb->u.c.buf) || + (iocb->u.c.nbytes != (size_t)iocb->u.c.nbytes) || + ((ssize_t)iocb->u.c.nbytes < 0))) { + pr_debug("EINVAL: io_submit: overflow check\n"); + goto out_put_req; + } + req->ki_pos = iocb->u.c.offset; + req->ki_nr_segs = 1; + req->ki_iov = &req->ki_fast_iov; + req->ki_iov->iov_base = (char *)(unsigned long)iocb->u.c.buf; + req->ki_iov->iov_len = iocb->u.c.nbytes; + req->ki_nbytes = iocb->u.c.nbytes; + } + req->ki_left = req->ki_nbytes; init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); INIT_LIST_HEAD(&req->ki_wait.task_list); req->ki_run_list.next = req->ki_run_list.prev = NULL; diff -Nru a/fs/bad_inode.c b/fs/bad_inode.c --- a/fs/bad_inode.c 2004-09-22 11:48:25 -07:00 +++ b/fs/bad_inode.c 2004-09-22 11:48:25 -07:00 @@ -55,6 +55,9 @@ .writev = EIO_ERROR, .sendfile = EIO_ERROR, .sendpage = EIO_ERROR, + .aio_readv = EIO_ERROR, + .aio_writev = EIO_ERROR, + .get_unmapped_area = EIO_ERROR, }; diff -Nru a/fs/block_dev.c b/fs/block_dev.c --- a/fs/block_dev.c 2004-09-22 11:48:25 -07:00 +++ b/fs/block_dev.c 2004-09-22 11:48:25 -07:00 @@ -765,6 +765,10 @@ return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); } +static ssize_t blkdev_file_aio_writev(struct kiocb *iocb) +{ + return generic_file_aio_write_nolock(iocb, iocb->ki_iov, iocb->ki_nr_segs, &iocb->ki_pos); +} static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd, unsigned long arg) @@ -790,6 +794,8 @@ .write = blkdev_file_write, .aio_read = generic_file_aio_read, .aio_write = blkdev_file_aio_write, + .aio_readv = generic_file_aio_readv, + .aio_writev = blkdev_file_aio_writev, .mmap = generic_file_mmap, .fsync = block_fsync, .ioctl = block_ioctl, diff -Nru a/fs/ext2/file.c b/fs/ext2/file.c --- a/fs/ext2/file.c 2004-09-22 11:48:25 -07:00 +++ b/fs/ext2/file.c 2004-09-22 11:48:25 -07:00 @@ -45,6 +45,8 @@ .write = generic_file_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, + .aio_readv = generic_file_aio_readv, + .aio_writev = generic_file_aio_writev, .ioctl = ext2_ioctl, .mmap = generic_file_mmap, .open = generic_file_open, diff -Nru a/fs/ext3/file.c b/fs/ext3/file.c --- a/fs/ext3/file.c 2004-09-22 11:48:25 -07:00 +++ b/fs/ext3/file.c 2004-09-22 11:48:25 -07:00 @@ -56,14 +56,14 @@ } static ssize_t -ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ext3_file_writev(struct kiocb *iocb) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; ssize_t ret; int err; - ret = generic_file_aio_write(iocb, buf, count, pos); + ret = generic_file_aio_writev(iocb); /* * Skip flushing if there was an error, or if nothing was written. @@ -113,12 +113,24 @@ return ret; } +static ssize_t +ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +{ + /* aio_write is a legacy interface. */ + BUG_ON(buf != iocb->ki_iov[0].iov_base + || count != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return ext3_file_writev(iocb); +} + struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, + .aio_readv = generic_file_aio_readv, + .aio_writev = ext3_file_writev, .readv = generic_file_readv, .writev = generic_file_writev, .ioctl = ext3_ioctl, diff -Nru a/fs/jfs/file.c b/fs/jfs/file.c --- a/fs/jfs/file.c 2004-09-22 11:48:25 -07:00 +++ b/fs/jfs/file.c 2004-09-22 11:48:25 -07:00 @@ -110,6 +110,8 @@ .read = generic_file_read, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, + .aio_readv = generic_file_aio_readv, + .aio_writev = generic_file_aio_writev, .mmap = generic_file_mmap, .readv = generic_file_readv, .writev = generic_file_writev, diff -Nru a/fs/nfs/direct.c b/fs/nfs/direct.c --- a/fs/nfs/direct.c 2004-09-22 11:48:25 -07:00 +++ b/fs/nfs/direct.c 2004-09-22 11:48:25 -07:00 @@ -445,11 +445,11 @@ } /** - * nfs_file_direct_read - file direct read operation for NFS files + * nfs_file_direct_readv - file direct read operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer into which to read data - * count: number of bytes to read - * pos: byte offset in file where reading starts + * + * The iovec and its size is passed through iocb->ki_iov and iocb->ki_nr_segs. + * The read offset is passed through iocb->ki_pos. * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -466,29 +466,29 @@ * cache. */ ssize_t -nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +nfs_file_direct_readv(struct kiocb *iocb) { ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; struct file *file = iocb->ki_filp; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = buf, - .iov_len = count, - }; + const struct iovec *iov = iocb->ki_iov; + unsigned long nr_segs = iocb->ki_nr_segs; + size_t count = iov_length(iov, nr_segs); + int i; dprintk("nfs: direct read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long) pos); + (unsigned long)count, (unsigned long) iocb->ki_pos); if (!is_sync_kiocb(iocb)) goto out; if (count < 0) goto out; retval = -EFAULT; - if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + for (i = 0; i < nr_segs; i++) + if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) goto out; retval = 0; if (!count) @@ -502,20 +502,19 @@ goto out; } - retval = nfs_direct_read(inode, file, &iov, pos, 1); + retval = nfs_direct_read(inode, file, iov, iocb->ki_pos, nr_segs); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos += retval; out: return retval; } /** - * nfs_file_direct_write - file direct write operation for NFS files + * nfs_file_direct_writev - file direct write operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer from which to write data - * count: number of bytes to write - * pos: byte offset in file where writing starts + * The iovec and its size is passed through iocb->ki_iov and iocb->ki_nr_segs. + * The read offset is passed through iocb->ki_pos. * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -536,19 +535,19 @@ * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t -nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +nfs_file_direct_writev(struct kiocb *iocb) { ssize_t retval = -EINVAL; - loff_t *ppos = &iocb->ki_pos; + loff_t pos = iocb->ki_pos; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; struct file *file = iocb->ki_filp; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iovec iov = { - .iov_base = (char __user *)buf, - .iov_len = count, - }; + const struct iovec *iov = iocb->ki_iov; + unsigned long nr_segs = iocb->ki_nr_segs; + size_t count = iov_length(iov, nr_segs); + int i; dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -561,7 +560,8 @@ if (pos < 0) goto out; retval = -EFAULT; - if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + for (i = 0; i < nr_segs; i++) + if (!access_ok(VERIFY_READ, iov[i].iov_base, iov[i].iov_len)) goto out; if (file->f_error) { retval = file->f_error; @@ -589,11 +589,11 @@ goto out; } - retval = nfs_direct_write(inode, file, &iov, pos, 1); + retval = nfs_direct_write(inode, file, iov, pos, nr_segs); if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) - *ppos = pos + retval; + iocb->ki_pos = pos + retval; out: return retval; diff -Nru a/fs/nfs/file.c b/fs/nfs/file.c --- a/fs/nfs/file.c 2004-09-22 11:48:25 -07:00 +++ b/fs/nfs/file.c 2004-09-22 11:48:25 -07:00 @@ -39,6 +39,8 @@ static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); +static ssize_t nfs_file_readv(struct kiocb *); +static ssize_t nfs_file_writev(struct kiocb *); static int nfs_file_flush(struct file *); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); @@ -49,6 +51,8 @@ .write = do_sync_write, .aio_read = nfs_file_read, .aio_write = nfs_file_write, + .aio_readv = nfs_file_readv, + .aio_writev = nfs_file_writev, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -134,7 +138,7 @@ } static ssize_t -nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +nfs_file_readv(struct kiocb *iocb) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; @@ -142,18 +146,27 @@ #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, buf, count, pos); + return nfs_file_direct_readv(iocb); #endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long) pos); + (unsigned long)iov_length(iocb->ki_iov, iocb->ki_nr_segs), + (unsigned long)iocb->ki_pos); result = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!result) - result = generic_file_aio_read(iocb, buf, count, pos); + result = generic_file_aio_readv(iocb); return result; } +static ssize_t +nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +{ + BUG_ON(buf != iocb->ki_iov[0].iov_base + || count != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return nfs_file_readv(iocb); +} static ssize_t nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, @@ -253,20 +266,21 @@ * Write to a file (through the page cache). */ static ssize_t -nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +nfs_file_writev(struct kiocb *iocb) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; + size_t count = iov_length(iocb->ki_iov, iocb->ki_nr_segs); ssize_t result; #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, buf, count, pos); + return nfs_file_direct_writev(iocb); #endif dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); + inode->i_ino, (unsigned long)count, (unsigned long)iocb->ki_pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -279,13 +293,22 @@ if (!count) goto out; - result = generic_file_aio_write(iocb, buf, count, pos); + result = generic_file_aio_writev(iocb); out: return result; out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); goto out; +} + +static ssize_t +nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +{ + BUG_ON(buf != iocb->ki_iov[0].iov_base + || count != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return nfs_file_writev(iocb); } /* diff -Nru a/fs/pipe.c b/fs/pipe.c --- a/fs/pipe.c 2004-09-22 11:48:25 -07:00 +++ b/fs/pipe.c 2004-09-22 11:48:25 -07:00 @@ -91,7 +91,7 @@ } static ssize_t -pipe_aio_readv(struct file *filp, const struct iovec *_iov, +do_pipe_aio_readv(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; @@ -182,14 +182,14 @@ { struct iovec iov = { .iov_base = buf, .iov_len = count }; ssize_t ret; - ret = pipe_aio_readv(filp, &iov, 1, ppos); + ret = do_pipe_aio_readv(filp, &iov, 1, ppos); if (ret == -EIOCBRETRY) BUG(); return ret; } static ssize_t -pipe_aio_writev(struct file *filp, const struct iovec *_iov, +do_pipe_aio_writev(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; @@ -286,7 +286,7 @@ size_t count, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - return pipe_aio_writev(filp, &iov, 1, ppos); + return do_pipe_aio_writev(filp, &iov, 1, ppos); } static int @@ -305,22 +305,19 @@ } static ssize_t -pipe_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +pipe_aio_writev(struct kiocb *iocb) { struct file *file = iocb->ki_filp; - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; iocb->ki_cancel = pipe_aio_cancel; - return pipe_aio_writev(file, &iov, 1, &file->f_pos); + return do_pipe_aio_writev(file, iocb->ki_iov, iocb->ki_nr_segs, &file->f_pos); } static ssize_t -pipe_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +pipe_aio_readv(struct kiocb *iocb) { struct file *file = iocb->ki_filp; - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; iocb->ki_cancel = pipe_aio_cancel; - return pipe_aio_readv(file, &iov, 1, &file->f_pos); + return do_pipe_aio_readv(file, iocb->ki_iov, iocb->ki_nr_segs, &file->f_pos); } static ssize_t @@ -515,8 +512,8 @@ struct file_operations read_fifo_fops = { .llseek = no_llseek, .read = pipe_read, - .readv = pipe_aio_readv, - .aio_read = pipe_aio_read, + .readv = do_pipe_aio_readv, + .aio_readv = pipe_aio_readv, .write = bad_pipe_w, .poll = fifo_poll, .ioctl = pipe_ioctl, @@ -529,8 +526,8 @@ .llseek = no_llseek, .read = bad_pipe_r, .write = pipe_write, - .writev = pipe_aio_writev, - .aio_write = pipe_aio_write, + .writev = do_pipe_aio_writev, + .aio_writev = pipe_aio_writev, .poll = fifo_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -541,11 +538,11 @@ struct file_operations rdwr_fifo_fops = { .llseek = no_llseek, .read = pipe_read, - .readv = pipe_aio_readv, + .readv = do_pipe_aio_readv, .write = pipe_write, - .writev = pipe_aio_writev, - .aio_write = pipe_aio_write, - .aio_read = pipe_aio_read, + .writev = do_pipe_aio_writev, + .aio_writev = pipe_aio_writev, + .aio_readv = pipe_aio_readv, .poll = fifo_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, @@ -556,8 +553,8 @@ struct file_operations read_pipe_fops = { .llseek = no_llseek, .read = pipe_read, - .aio_read = pipe_aio_read, - .readv = pipe_aio_readv, + .aio_readv = pipe_aio_readv, + .readv = do_pipe_aio_readv, .write = bad_pipe_w, .poll = pipe_poll, .ioctl = pipe_ioctl, @@ -570,8 +567,8 @@ .llseek = no_llseek, .read = bad_pipe_r, .write = pipe_write, - .writev = pipe_aio_writev, - .aio_write = pipe_aio_write, + .writev = do_pipe_aio_writev, + .aio_writev = pipe_aio_writev, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_write_open, @@ -582,11 +579,11 @@ struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, .read = pipe_read, - .readv = pipe_aio_readv, - .aio_read = pipe_aio_read, - .aio_write = pipe_aio_write, + .readv = do_pipe_aio_readv, + .aio_readv = pipe_aio_readv, + .aio_writev = pipe_aio_writev, .write = pipe_write, - .writev = pipe_aio_writev, + .writev = do_pipe_aio_writev, .poll = pipe_poll, .ioctl = pipe_ioctl, .open = pipe_rdwr_open, diff -Nru a/fs/read_write.c b/fs/read_write.c --- a/fs/read_write.c 2004-09-22 11:48:25 -07:00 +++ b/fs/read_write.c 2004-09-22 11:48:25 -07:00 @@ -189,6 +189,10 @@ init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; + kiocb.ki_iov = &kiocb.ki_fast_iov; + kiocb.ki_iov->iov_base = buf; + kiocb.ki_iov->iov_len = len; + kiocb.ki_nr_segs = 1; ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -233,6 +237,10 @@ init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; + kiocb.ki_iov = &kiocb.ki_fast_iov; + kiocb.ki_iov->iov_base = (char __user*)buf; + kiocb.ki_iov->iov_len = len; + kiocb.ki_nr_segs = 1; ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); diff -Nru a/fs/reiserfs/file.c b/fs/reiserfs/file.c --- a/fs/reiserfs/file.c 2004-09-22 11:48:25 -07:00 +++ b/fs/reiserfs/file.c 2004-09-22 11:48:25 -07:00 @@ -1300,6 +1300,11 @@ { return generic_file_aio_write(iocb, buf, count, pos); } +static ssize_t reiserfs_aio_writev(struct kiocb *iocb) +{ + return generic_file_aio_writev(iocb); +} + @@ -1313,6 +1318,8 @@ .sendfile = generic_file_sendfile, .aio_read = generic_file_aio_read, .aio_write = reiserfs_aio_write, + .aio_readv = generic_file_aio_readv, + .aio_writev = reiserfs_aio_writev, }; diff -Nru a/include/linux/aio.h b/include/linux/aio.h --- a/include/linux/aio.h 2004-09-22 11:48:25 -07:00 +++ b/include/linux/aio.h 2004-09-22 11:48:25 -07:00 @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -73,8 +74,21 @@ /* State that we remember to be able to restart/retry */ unsigned short ki_opcode; size_t ki_nbytes; /* copy of iocb->aio_nbytes */ - char *ki_buf; /* remaining iocb->aio_buf */ + + /* Used for PREAD, PWRITE, and PREADV and WRITEV with small nr_segs. */ + struct iovec ki_fast_iov; + + /* Used for long PREADV and PWRITEV. iov is kmalloced. */ + struct iovec *ki_slow_iov; + + /* ki_iov points to either &ki_short_iov or ki_long_iov, + depending on the value of ki_nr_segs. Its pointers are + incremented as more data is read or written + asynchronously. */ + struct iovec *ki_iov; + unsigned long ki_nr_segs; /* number of iovs left. */ size_t ki_left; /* remaining bytes */ + wait_queue_t ki_wait; long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ diff -Nru a/include/linux/aio_abi.h b/include/linux/aio_abi.h --- a/include/linux/aio_abi.h 2004-09-22 11:48:25 -07:00 +++ b/include/linux/aio_abi.h 2004-09-22 11:48:25 -07:00 @@ -41,6 +41,8 @@ */ IOCB_CMD_POLL = 5, IOCB_CMD_NOOP = 6, + IOCB_CMD_PREADV = 7, + IOCB_CMD_PWRITEV = 8, }; /* read() from /dev/aio returns these structures. */ @@ -65,6 +67,27 @@ * proper padding and aio_error abstraction */ +struct io_iocb_poll { + __u32 events; +}; + +struct io_iocb_sockaddr { + __u64 addr; + __u32 len; +}; + +struct io_iocb_common { + __u64 buf; + __u64 nbytes; + __s64 offset; +}; + +struct io_iocb_vector { + struct iovec __user *vec; + __u32 nr; + __s64 offset; +}; + struct iocb { /* these are internal to the kernel/libc. */ __u64 aio_data; /* data to be returned in event's data */ @@ -76,9 +99,12 @@ __s16 aio_reqprio; __u32 aio_fildes; - __u64 aio_buf; - __u64 aio_nbytes; - __s64 aio_offset; + union { + struct io_iocb_common c; + struct io_iocb_vector v; + struct io_iocb_poll poll; + struct io_iocb_sockaddr saddr; + } u; /* extra parameters */ __u64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */ diff -Nru a/include/linux/fs.h b/include/linux/fs.h --- a/include/linux/fs.h 2004-09-22 11:48:25 -07:00 +++ b/include/linux/fs.h 2004-09-22 11:48:25 -07:00 @@ -879,6 +879,7 @@ loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); int (*readdir) (struct file *, void *, filldir_t); @@ -896,6 +897,10 @@ ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); + /* For aio_readv and aio_writev, the iovec and offset are passed + through kiocb->ki_iov, ki_nr_segs, and ki_pos. */ + ssize_t (*aio_readv) (struct kiocb *); + ssize_t (*aio_writev) (struct kiocb *); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*dir_notify)(struct file *filp, unsigned long arg); @@ -1413,8 +1418,9 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); -extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); +extern ssize_t generic_file_aio_readv(struct kiocb *); extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_writev(struct kiocb *); extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, unsigned long, loff_t *); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff -Nru a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h --- a/include/linux/nfs_fs.h 2004-09-22 11:48:25 -07:00 +++ b/include/linux/nfs_fs.h 2004-09-22 11:48:25 -07:00 @@ -306,10 +306,9 @@ */ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, - size_t count, loff_t pos); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos); +/* iov, #iov, and offset are passed through iocb ki_iov, ki_pos. */ +extern ssize_t nfs_file_direct_readv(struct kiocb *iocb); +extern ssize_t nfs_file_direct_writev(struct kiocb *iocb); /* * linux/fs/nfs/dir.c diff -Nru a/mm/filemap.c b/mm/filemap.c --- a/mm/filemap.c 2004-09-22 11:48:25 -07:00 +++ b/mm/filemap.c 2004-09-22 11:48:25 -07:00 @@ -1152,8 +1152,9 @@ desc.error = 0; do_generic_file_read(filp,ppos,&desc,file_read_actor); retval += desc.written; - if (!retval) { - retval = desc.error; + if (desc.written < iov[seg].iov_len) { + if (retval == 0) + retval = desc.error; break; } } @@ -1172,7 +1173,13 @@ BUG_ON(iocb->ki_pos != pos); return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); } +EXPORT_SYMBOL(generic_file_aio_readv); +ssize_t +generic_file_aio_readv(struct kiocb *iocb) +{ + return __generic_file_aio_read(iocb, iocb->ki_iov, iocb->ki_nr_segs, &iocb->ki_pos); +} EXPORT_SYMBOL(generic_file_aio_read); ssize_t @@ -2259,7 +2266,6 @@ ret = iov->iov_len; /* vector AIO not supported yet */ goto osync; } - ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos); osync: @@ -2302,24 +2308,24 @@ EXPORT_SYMBOL(generic_file_write_nolock); -ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +EXPORT_SYMBOL(generic_file_aio_write); +ssize_t generic_file_aio_writev(struct kiocb *iocb) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + const struct iovec *iov = iocb->ki_iov; + unsigned long nr_segs = iocb->ki_nr_segs; + loff_t pos = iocb->ki_pos; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; if (!is_sync_kiocb(iocb) && kiocbIsSynced(iocb)) { /* nothing to transfer, may just need to sync data */ - ret = count; + ret = iov_length(iov, nr_segs); goto osync; } - down(&inode->i_sem); - ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); up(&inode->i_sem); @@ -2331,7 +2337,18 @@ } return ret; } -EXPORT_SYMBOL(generic_file_aio_write); + +EXPORT_SYMBOL(generic_file_aio_writev); + +ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, + size_t count, loff_t pos) +{ + /* aio_write is a legacy interface. */ + BUG_ON(buf != iocb->ki_iov[0].iov_base + || count != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return generic_file_aio_writev(iocb); +} ssize_t generic_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) diff -Nru a/net/socket.c b/net/socket.c --- a/net/socket.c 2004-09-22 11:48:25 -07:00 +++ b/net/socket.c 2004-09-22 11:48:25 -07:00 @@ -99,6 +99,8 @@ size_t size, loff_t pos); static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, size_t size, loff_t pos); +static ssize_t sock_aio_readv(struct kiocb *iocb); +static ssize_t sock_aio_writev(struct kiocb *iocb); static int sock_mmap(struct file *file, struct vm_area_struct * vma); static int sock_close(struct inode *inode, struct file *file); @@ -125,6 +127,8 @@ .llseek = no_llseek, .aio_read = sock_aio_read, .aio_write = sock_aio_write, + .aio_readv = sock_aio_readv, + .aio_writev = sock_aio_writev, .poll = sock_poll, .ioctl = sock_ioctl, .mmap = sock_mmap, @@ -640,15 +644,15 @@ * area ubuf...ubuf+size-1 is writable before asking the protocol. */ -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t size, loff_t pos) +static ssize_t sock_aio_readv(struct kiocb *iocb) { struct sock_iocb *x, siocb; struct socket *sock; - int flags; + const struct iovec *iov = iocb->ki_iov; + unsigned long nr_segs = iocb->ki_nr_segs; - if (pos != 0) - return -ESPIPE; + int flags; + size_t size = iov_length(iov, nr_segs); if (size==0) /* Match SYS5 behaviour */ return 0; @@ -666,31 +670,46 @@ x->async_msg.msg_name = NULL; x->async_msg.msg_namelen = 0; - x->async_msg.msg_iov = &x->async_iov; - x->async_msg.msg_iovlen = 1; x->async_msg.msg_control = NULL; x->async_msg.msg_controllen = 0; - x->async_iov.iov_base = ubuf; - x->async_iov.iov_len = size; + if (nr_segs == 1) { + // handle sock_aio_read that may pass iov on the stack. + x->async_msg.msg_iov = &x->async_iov; + x->async_msg.msg_iovlen = 1; + x->async_iov.iov_base = iov[0].iov_base; + x->async_iov.iov_len = iov[0].iov_len; + } else { + // we can assume that iov is held in iocb and not + // freed until x is freed. + x->async_msg.msg_iov = (struct iovec*)iov; + x->async_msg.msg_iovlen = nr_segs; + } flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags); } - - +static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, + size_t size, loff_t pos) +{ + /* aio_read is a legacy interface. */ + BUG_ON(ubuf != iocb->ki_iov[0].iov_base + || size != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return sock_aio_readv(iocb); +} /* * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 * is readable by the user process. */ -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t size, loff_t pos) +static ssize_t sock_aio_writev(struct kiocb *iocb) { struct sock_iocb *x, siocb; struct socket *sock; - - if (pos != 0) - return -ESPIPE; + struct iovec *iov = iocb->ki_iov; + unsigned long nr_segs = iocb->ki_nr_segs; + + size_t size = iov_length(iov, nr_segs); if(size==0) /* Match SYS5 behaviour */ return 0; @@ -708,17 +727,34 @@ x->async_msg.msg_name = NULL; x->async_msg.msg_namelen = 0; - x->async_msg.msg_iov = &x->async_iov; - x->async_msg.msg_iovlen = 1; x->async_msg.msg_control = NULL; x->async_msg.msg_controllen = 0; x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; + if (nr_segs == 1) { + // handle sock_aio_read that may pass iov on the stack. + x->async_msg.msg_iov = &x->async_iov; + x->async_msg.msg_iovlen = 1; + x->async_iov.iov_base = iov[0].iov_base; + x->async_iov.iov_len = iov[0].iov_len; + } else { + // we can assume that iov is held in iocb and not + // freed until x is freed. + x->async_msg.msg_iov = (struct iovec*)iov; + x->async_msg.msg_iovlen = nr_segs; + } if (sock->type == SOCK_SEQPACKET) x->async_msg.msg_flags |= MSG_EOR; - x->async_iov.iov_base = (void __user *)ubuf; - x->async_iov.iov_len = size; return __sock_sendmsg(iocb, sock, &x->async_msg, size); +} +static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, + size_t size, loff_t pos) +{ + /* aio_write is a legacy interface. */ + BUG_ON(ubuf != iocb->ki_iov[0].iov_base + || size != iocb->ki_iov[0].iov_len + || pos != iocb->ki_pos); + return sock_aio_writev(iocb); } ssize_t sock_sendpage(struct file *file, struct page *page,