diff --git a/Makefile b/Makefile index e28e263..fcd1c3ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = .68 +EXTRAVERSION = .69-rc1 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 303eaeb8..b289d66 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1552,7 +1552,18 @@ END(error_exit) /* runs on exception stack */ ENTRY(nmi) INTR_FRAME + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + pushq_cfi $-1 subq $15*8, %rsp CFI_ADJUST_CFA_OFFSET 15*8 diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d..889e54f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -38,10 +38,18 @@ #include #include -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 39493bc..936b0ba 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -550,27 +550,59 @@ void set_personality_ia32(void) current_thread_info()->status |= TS_COMPAT; } +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = ACCESS_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = ACCESS_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = ACCESS_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = ACCESS_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 05dd307..6d022dc 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -253,10 +253,10 @@ void * devres_get(struct device *dev, void *new_res, if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; - new_dr = NULL; + new_res = NULL; } spin_unlock_irqrestore(&dev->devres_lock, flags); - devres_free(new_dr); + devres_free(new_res); return dr->data; } diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index 5269fa0..34be7fd 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -1287,8 +1287,7 @@ handle_newline: tty->canon_data++; spin_unlock_irqrestore(&tty->read_lock, flags); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible(&tty->read_wait); + wake_up_interruptible(&tty->read_wait); return; } } @@ -1410,8 +1409,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, if (!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - if (waitqueue_active(&tty->read_wait)) - wake_up_interruptible(&tty->read_wait); + wake_up_interruptible(&tty->read_wait); } /* diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e7e28b5..644ab4d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1235,7 +1235,7 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask) "Multi-Axis Controller" }; const char *type, *bus; - char buf[64]; + char buf[64] = ""; unsigned int i; int len; diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index dee6706..f05566f 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -102,19 +102,14 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; + mutex_lock(&evdev->mutex); - if (!evdev->exist) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + if (evdev->exist) + input_flush_device(&evdev->handle, file); mutex_unlock(&evdev->mutex); - return retval; + return 0; } static void evdev_free(struct device *dev) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 075b4d9..553414e 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -418,7 +418,7 @@ int wf_unregister_client(struct notifier_block *nb) { mutex_lock(&wf_lock); blocking_notifier_chain_unregister(&wf_client_list, nb); - wf_client_count++; + wf_client_count--; if (wf_client_count == 0) wf_stop_thread(); mutex_unlock(&wf_lock); diff --git a/drivers/md/md.c b/drivers/md/md.c index 4ce6e2f..ab0f708 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4680,9 +4680,9 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) int err = -ENOMEM; if (md_allow_write(mddev)) - file = kmalloc(sizeof(*file), GFP_NOIO); + file = kzalloc(sizeof(*file), GFP_NOIO); else - file = kmalloc(sizeof(*file), GFP_KERNEL); + file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) goto out; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 97a56f0..2bc6661 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -868,7 +868,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* Do we support "hardware" checksums? */ if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ - dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->features |= NETIF_F_TSO | NETIF_F_UFO | NETIF_F_TSO_ECN | NETIF_F_TSO6; diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 0d21386..e4c01b5 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -1035,6 +1035,8 @@ static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc) static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task, struct mvs_slot_info *slot, u32 slot_idx) { + if (!slot) + return; if (!slot->task) return; if (!sas_protocol_ata(task->task_proto)) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 38fb682..fa22638 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -80,7 +80,7 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, return 0; /* offset in TRBs */ segment_offset = trb - seg->trbs; - if (segment_offset > TRBS_PER_SEGMENT) + if (segment_offset >= TRBS_PER_SEGMENT) return 0; return seg->dma + (segment_offset * sizeof(*trb)); } diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 748c627..1e5f35d 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -143,6 +143,8 @@ static int whiteheat_firmware_download(struct usb_serial *serial, static int whiteheat_firmware_attach(struct usb_serial *serial); /* function prototypes for the Connect Tech WhiteHEAT serial converter */ +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id); static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_release(struct usb_serial *serial); static int whiteheat_open(struct tty_struct *tty, @@ -188,6 +190,7 @@ static struct usb_serial_driver whiteheat_device = { .usb_driver = &whiteheat_driver, .id_table = id_table_std, .num_ports = 4, + .probe = whiteheat_probe, .attach = whiteheat_attach, .release = whiteheat_release, .open = whiteheat_open, @@ -387,6 +390,34 @@ static int whiteheat_firmware_attach(struct usb_serial *serial) /***************************************************************************** * Connect Tech's White Heat serial driver functions *****************************************************************************/ + +static int whiteheat_probe(struct usb_serial *serial, + const struct usb_device_id *id) +{ + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *endpoint; + size_t num_bulk_in = 0; + size_t num_bulk_out = 0; + size_t min_num_bulk; + unsigned int i; + + iface_desc = serial->interface->cur_altsetting; + + for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { + endpoint = &iface_desc->endpoint[i].desc; + if (usb_endpoint_is_bulk_in(endpoint)) + ++num_bulk_in; + if (usb_endpoint_is_bulk_out(endpoint)) + ++num_bulk_out; + } + + min_num_bulk = COMMAND_PORT + 1; + if (num_bulk_in < min_num_bulk || num_bulk_out < min_num_bulk) + return -ENODEV; + + return 0; +} + static int whiteheat_attach(struct usb_serial *serial) { struct usb_serial_port *command_port; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 400786e..b8a0388 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -676,16 +676,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (file_permission(interpreter, MAY_READ) < 0) bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); - if (retval != BINPRM_BUF_SIZE) { + /* Get the exec headers */ + retval = kernel_read(interpreter, 0, + (void *)&loc->interp_elf_ex, + sizeof(loc->interp_elf_ex)); + if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } - /* Get the exec headers */ - loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; diff --git a/fs/dcache.c b/fs/dcache.c index 44c0aea..07c4472 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root, struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; char *end = buffer + buflen; - char *retval; + char *retval, *tail; spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); @@ -1923,6 +1923,7 @@ char *__d_path(const struct path *path, struct path *root, /* Get '/' right */ retval = end-1; *retval = '/'; + tail = end; for (;;) { struct dentry * parent; @@ -1930,6 +1931,14 @@ char *__d_path(const struct path *path, struct path *root, if (dentry == root->dentry && vfsmnt == root->mnt) break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { + /* Escaped? */ + if (dentry != vfsmnt->mnt_root) { + buflen += (tail - end); + end = tail; + prepend(&end, &buflen, "(unreachable)/", 14); + retval = end; + goto out; + } /* Global root? */ if (vfsmnt->mnt_parent == vfsmnt) { goto global_root; diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 0d20006..3136308 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -286,7 +286,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -396,11 +395,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 92fb358..db240c5 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -132,13 +132,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -167,9 +170,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -366,6 +366,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 29da657..7d75904 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -446,7 +446,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -556,11 +555,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 71ee6f6..614446b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -929,7 +929,7 @@ restart: __func__); } nfs4_put_open_state(state); - clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); goto restart; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 73db5a6..78658aa 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -539,6 +540,7 @@ const struct file_operations proc_clear_refs_operations = { struct pagemapread { u64 __user *out, *end; + bool show_pfn; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -589,14 +591,14 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -static u64 pte_to_pagemap_entry(pte_t pte) +static u64 pte_to_pagemap_entry(struct pagemapread *pm, pte_t pte) { u64 pme = 0; if (is_swap_pte(pte)) pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; else if (pte_present(pte)) - pme = PM_PFRAME(pte_pfn(pte)) + pme = (pm->show_pfn ? PM_PFRAME(pte_pfn(pte)) : 0) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; return pme; } @@ -624,7 +626,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { pte = pte_offset_map(pmd, addr); - pfn = pte_to_pagemap_entry(*pte); + pfn = pte_to_pagemap_entry(pm, *pte); /* unmap before userspace copy */ pte_unmap(pte); } @@ -695,6 +697,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; + /* do not disclose physical addresses: attack vector */ + pm.show_pfn = !security_capable(CAP_SYS_ADMIN); + mm = get_task_mm(task); if (!mm) goto out_task; @@ -773,19 +778,9 @@ out: return ret; } -static int pagemap_open(struct inode *inode, struct file *file) -{ - /* do not disclose physical addresses to unprivileged - userspace (closes a rowhammer attack vector) */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - return 0; -} - const struct file_operations proc_pagemap_operations = { .llseek = mem_lseek, /* borrow this */ .read = pagemap_read, - .open = pagemap_open, }; #endif /* CONFIG_PROC_PAGE_MONITOR */ diff --git a/fs/splice.c b/fs/splice.c index 1ef1c00..5c006c8b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1123,7 +1123,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, long ret, bytes; umode_t i_mode; size_t len; - int i, flags; + int i, flags, more; /* * We require the input being a regular file, as we don't want to @@ -1166,6 +1166,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; + more = sd->flags & SPLICE_F_MORE; while (len) { size_t read_len; @@ -1179,6 +1180,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, sd->total_len = read_len; /* + * If more data is pending, set SPLICE_F_MORE + * If this is the last data and SPLICE_F_MORE was not set + * initially, clears it. + */ + if (read_len < len) + sd->flags |= SPLICE_F_MORE; + else if (!more) + sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 9c7d22f..c782906 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -111,8 +111,15 @@ typedef struct xfs_attr_leaf_name_remote { typedef struct xfs_attr_leafblock { xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced and definining them can actually make gcc optimize away + * accesses to the 'entries' array above index 0 so don't do that. + * + * xfs_attr_leaf_name_local_t namelist; + * xfs_attr_leaf_name_remote_t valuelist; + */ } xfs_attr_leafblock_t; /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ae77862..c282a2c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1765,7 +1765,8 @@ extern int skb_copy_datagram_iovec(const struct sk_buff *from, int size); extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, - struct iovec *iov); + struct iovec *iov, + int len); extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, const struct iovec *from, diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 18c7732..1fb67ab 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -47,7 +47,8 @@ extern int inet_ctl_sock_create(struct sock **sk, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + if (sk) + sk_release_kernel(sk); } #endif diff --git a/ipc/msg.c b/ipc/msg.c index 779f762..94c6411 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -199,6 +199,15 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } + msq->q_stime = msq->q_rtime = 0; + msq->q_ctime = get_seconds(); + msq->q_cbytes = msq->q_qnum = 0; + msq->q_qbytes = ns->msg_ctlmnb; + msq->q_lspid = msq->q_lrpid = 0; + INIT_LIST_HEAD(&msq->q_messages); + INIT_LIST_HEAD(&msq->q_receivers); + INIT_LIST_HEAD(&msq->q_senders); + /* * ipc_addid() locks msq */ @@ -209,15 +218,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return id; } - msq->q_stime = msq->q_rtime = 0; - msq->q_ctime = get_seconds(); - msq->q_cbytes = msq->q_qnum = 0; - msq->q_qbytes = ns->msg_ctlmnb; - msq->q_lspid = msq->q_lrpid = 0; - INIT_LIST_HEAD(&msq->q_messages); - INIT_LIST_HEAD(&msq->q_receivers); - INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(msq); return msq->q_perm.id; diff --git a/ipc/sem.c b/ipc/sem.c index b781007..fe3579f 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -264,6 +264,12 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) return retval; } + sma->sem_base = (struct sem *) &sma[1]; + INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->list_id); + sma->sem_nsems = nsems; + sma->sem_ctime = get_seconds(); + id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (id < 0) { security_sem_free(sma); @@ -272,11 +278,6 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } ns->used_sems += nsems; - sma->sem_base = (struct sem *) &sma[1]; - INIT_LIST_HEAD(&sma->sem_pending); - INIT_LIST_HEAD(&sma->list_id); - sma->sem_nsems = nsems; - sma->sem_ctime = get_seconds(); sem_unlock(sma); return sma->sem_perm.id; @@ -1295,16 +1296,27 @@ void exit_sem(struct task_struct *tsk) rcu_read_lock(); un = list_entry_rcu(ulp->list_proc.next, struct sem_undo, list_proc); - if (&un->list_proc == &ulp->list_proc) - semid = -1; - else - semid = un->semid; + if (&un->list_proc == &ulp->list_proc) { + /* + * We must wait for freeary() before freeing this ulp, + * in case we raced with last sem_undo. There is a small + * possibility where we exit while freeary() didn't + * finish unlocking sem_undo_list. + */ + spin_unlock_wait(&ulp->lock); + rcu_read_unlock(); + break; + } + spin_lock(&ulp->lock); + semid = un->semid; + spin_unlock(&ulp->lock); rcu_read_unlock(); + /* exit_sem raced with IPC_RMID, nothing to do */ if (semid == -1) - break; + continue; - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); + sma = sem_lock_check(tsk->nsproxy->ipc_ns, semid); /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) diff --git a/ipc/shm.c b/ipc/shm.c index d30732c..75cb87c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -386,12 +386,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -399,6 +393,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_segsz = size; shp->shm_nattch = 0; shp->shm_file = file; + + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. diff --git a/ipc/util.c b/ipc/util.c index 79ce84e..b6fe615 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -264,6 +264,10 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + err = idr_get_new(&ids->ipcs_idr, new, &id); if (err) { spin_unlock(&new->lock); @@ -273,10 +277,6 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - new->seq = ids->seq++; if(ids->seq > ids->seq_max) ids->seq = 0; diff --git a/kernel/module.c b/kernel/module.c index 4b270e6..04aa4f1 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -923,11 +923,15 @@ void symbol_put_addr(void *addr) if (core_kernel_text(a)) return; - /* module_text_address is safe here: we're supposed to have reference - * to module from symbol_get, so it can't go away. */ + /* + * Even though we hold a reference on the module; we still need to + * disable preemption in order to safely traverse the data structure. + */ + preempt_disable(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); + preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); diff --git a/lib/devres.c b/lib/devres.c index 72c8909..e4891d5 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -338,7 +338,7 @@ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) if (!iomap) return; - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + for (i = 0; i < PCIM_IOMAP_MAX; i++) { if (!(mask & (1 << i))) continue; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b435d1f..d81312f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2006,6 +2006,14 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, continue; /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these * areas. This is because a future no-page fault on this VMA diff --git a/net/core/datagram.c b/net/core/datagram.c index 4ade301..767c17a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -127,6 +127,35 @@ out_noerr: goto out; } +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return skb; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return ERR_PTR(-ENOMEM); + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return skb; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -160,6 +189,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *err) { struct sk_buff *skb; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -178,14 +208,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * Look at current nfs client by the way... * However, this function was corrent in any case. 8) */ - unsigned long cpu_flags; - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); skb = skb_peek(&sk->sk_receive_queue); if (skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { - skb->peeked = 1; + + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, &sk->sk_receive_queue); @@ -204,6 +237,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); no_packet: *err = error; return NULL; @@ -640,7 +675,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!skb_shared(skb)) + skb->ip_summed = CHECKSUM_UNNECESSARY; } return sum; } @@ -657,6 +693,7 @@ EXPORT_SYMBOL(__skb_checksum_complete); * @skb: skbuff * @hlen: hardware length * @iov: io vector + * @len: amount of data to copy from skb to iov * * Caller _must_ check that skb will fit to this iovec. * @@ -666,11 +703,14 @@ EXPORT_SYMBOL(__skb_checksum_complete); * can be modified! */ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, - int hlen, struct iovec *iov) + int hlen, struct iovec *iov, int len) { __wsum csum; int chunk = skb->len - hlen; + if (chunk > len) + chunk = len; + if (!chunk) return 0; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f9e7179..ed17505 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -794,7 +794,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) } } - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 99508d6..36dbc6e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1203,7 +1203,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1266,7 +1266,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c821218..d3dcfb9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4985,7 +4985,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); else err = skb_copy_and_csum_datagram_iovec(skb, hlen, - tp->ucopy.iov); + tp->ucopy.iov, chunk); if (!err) { tp->ucopy.len -= chunk; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3ae286b..83b507d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -975,7 +975,7 @@ try_again: else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov); + msg->msg_iov, copied); if (err == -EINVAL) goto csum_copy_err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e8c4fd9..34eed01 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4036,6 +4036,40 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, return addrconf_fixup_forwarding(table, valp, val); } +static +struct ctl_table *addrconf_sysctl_mtu_init(struct ctl_table *newctl, + const struct ctl_table *ctl) +{ + struct inet6_dev *idev = ctl->extra1; + static int min_mtu = IPV6_MIN_MTU; + + *newctl = *ctl; + newctl->extra1 = &min_mtu; + newctl->extra2 = idev ? &idev->dev->mtu : NULL; + return newctl; +} + +static +int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table lctl; + + return proc_dointvec_minmax(addrconf_sysctl_mtu_init(&lctl, ctl), + write, buffer, lenp, ppos); +} + +static int addrconf_sysctl_mtu_strategy(struct ctl_table *ctl, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + struct ctl_table lctl; + + return sysctl_intvec(addrconf_sysctl_mtu_init(&lctl, ctl), + oldval, oldlenp, newval, newlen); +} + static void dev_disable_change(struct inet6_dev *idev) { if (!idev || !idev->dev) @@ -4142,7 +4176,8 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_mtu, + .strategy = addrconf_sysctl_mtu_strategy, }, { .ctl_name = NET_IPV6_ACCEPT_RA, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d5b09c7..f016542 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -476,7 +476,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, goto csum_copy_err; err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); } else { - err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); + err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov, copied); if (err == -EINVAL) goto csum_copy_err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0b023f3..5c8bd19 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -233,7 +233,8 @@ try_again: err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied ); else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied); if (err == -EINVAL) goto csum_copy_err; } diff --git a/net/rds/connection.c b/net/rds/connection.c index cc8b568..8c3ddcd 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -187,6 +187,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } } + if (trans == NULL) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-ENODEV); + goto out; + } + conn->c_trans = trans; ret = trans->conn_alloc(conn, gfp); diff --git a/net/rds/info.c b/net/rds/info.c index 814a91a..0a857a0 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -174,7 +174,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c00daff..43d9d66 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -233,8 +233,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); + if (!pskb_pull(clone, offset) || + pskb_trim(clone, to_copy)) { + printk(KERN_WARNING "rds_tcp_data_recv: pull/trim failed " + "left %zu data_rem %zu skb_len %d\n", + left, tc->t_tinc_data_rem, skb->len); + kfree_skb(clone); + desc->error = -ENOMEM; + goto out; + } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index b6076b2..813e1c4 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -184,7 +184,8 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_iov, copy); } else { ret = skb_copy_and_csum_datagram_iovec(skb, offset, - msg->msg_iov); + msg->msg_iov, + copy); if (ret == -EINVAL) goto csum_copy_error; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d37f07c..ec21612 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -734,6 +734,7 @@ static void xs_reset_transport(struct sock_xprt *transport) { struct socket *sock = transport->sock; struct sock *sk = transport->inet; + struct rpc_xprt *xprt = &transport->xprt; if (sk == NULL) return; @@ -747,6 +748,7 @@ static void xs_reset_transport(struct sock_xprt *transport) sk->sk_user_data = NULL; xs_restore_old_callbacks(transport, sk); + xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); sk->sk_no_check = 0;