diff --git a/Makefile b/Makefile index 37a90519bcc8..cc174ae4abb7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 34 -EXTRAVERSION = .14 +EXTRAVERSION = .15 NAME = Sheep on Meth # *DOCUMENTATION* diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a813f82eb9b..b87027b463a3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1462,7 +1462,7 @@ config ARCH_USES_PG_UNCACHED config ARCH_RANDOM def_bool y - prompt "x86 architectural random number generator" if EXPERT + prompt "x86 architectural random number generator" if EMBEDDED ---help--- Enable the x86 architectural RDRAND instruction (Intel Bull Mountain technology) to generate random numbers. diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a34c785c5a63..321d24d6a924 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -132,6 +132,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..84b313c1297e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -453,6 +453,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { m->ip = regs->ip; m->cs = regs->cs; + /* + * When in VM86 mode make the cs look like ring 3 + * always. This is a lie, but it's better than passing + * the additional vm86 bit around everywhere. + */ + if (v8086_mode(regs)) + m->cs |= 3; } else { m->ip = 0; m->cs = 0; @@ -990,6 +997,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); + mce_get_rip(&m, regs); severity = mce_severity(&m, tolerant, NULL); /* @@ -1028,7 +1036,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) mce_ring_add(m.addr >> PAGE_SHIFT); - mce_get_rip(&m, regs); mce_log(&m); if (severity > worst) { diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4d4468e9f47c..56b77410a9fe 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -176,6 +176,9 @@ static int msr_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 6bb7b8579e70..bcfec2d23769 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset, { const struct desc_struct *tls; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; @@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1e586d82c1d..65f9c0c45312 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1152,6 +1152,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + /* Check that the address is 32-byte aligned. */ + if (vcpu->arch.time_offset & + (sizeof(struct pvclock_vcpu_time_info) - 1)) + break; + vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 544ed251a40c..7c96106039a1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -379,10 +379,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) + if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_ref); - else + arch_flush_lazy_mmu_mode(); + } else { BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } /* * Below here mismatches are bugs because these lower tables diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee41bba315d1..3cd243b8b01d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -864,6 +864,9 @@ int kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return 0; + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 25d787c17ad8..1b7d744f6f30 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -779,7 +779,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -945,6 +954,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -952,6 +966,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index b040b0e518ca..7328f71651e1 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -88,11 +88,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -105,11 +105,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -117,7 +117,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax diff --git a/block/blk-core.c b/block/blk-core.c index 94f274bc9683..2ddcf96c854d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -865,6 +865,9 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { struct request *rq; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + BUG_ON(rw != READ && rw != WRITE); spin_lock_irq(q->queue_lock); diff --git a/block/blk-exec.c b/block/blk-exec.c index 49557e91f0da..85bd7b445d86 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -50,6 +50,13 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { + rq->errors = -ENXIO; + if (rq->end_io) + rq->end_io(rq, rq->errors); + return; + } + rq->rq_disk = bd_disk; rq->end_io = done; WARN_ON(irqs_disabled()); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4f4230b79bb6..b661f8940ef5 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -691,6 +692,57 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod } EXPORT_SYMBOL(scsi_cmd_ioctl); +int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) +{ + if (bd && bd == bd->bd_contains) + return 0; + + /* Actually none of these is particularly useful on a partition, + * but they are safe. + */ + switch (cmd) { + case SCSI_IOCTL_GET_IDLUN: + case SCSI_IOCTL_GET_BUS_NUMBER: + case SCSI_IOCTL_GET_PCI: + case SCSI_IOCTL_PROBE_HOST: + case SG_GET_VERSION_NUM: + case SG_SET_TIMEOUT: + case SG_GET_TIMEOUT: + case SG_GET_RESERVED_SIZE: + case SG_SET_RESERVED_SIZE: + case SG_EMULATED_HOST: + return 0; + case CDROM_GET_CAPABILITY: + /* Keep this until we remove the printk below. udev sends it + * and we do not want to spam dmesg about it. CD-ROMs do + * not have partitions, so we get here only for disks. + */ + return -ENOIOCTLCMD; + default: + break; + } + + /* In particular, rule out all resets and host-specific ioctls. */ + printk_ratelimited(KERN_WARNING + "%s: sending ioctl %x to a partition!\n", current->comm, cmd); + + return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD; +} +EXPORT_SYMBOL(scsi_verify_blk_ioctl); + +int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode, + unsigned int cmd, void __user *arg) +{ + int ret; + + ret = scsi_verify_blk_ioctl(bd, cmd); + if (ret < 0) + return ret; + + return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg); +} +EXPORT_SYMBOL(scsi_cmd_blk_ioctl); + static int __init blk_scsi_ioctl_init(void) { blk_set_cmd_filter_defaults(&blk_default_cmd_filter); diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 2bc332142849..5c565d72d1cc 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -230,11 +230,11 @@ remainder: */ if (byte_count < DEFAULT_BLK_SZ) { empty_rbuf: - for (; ctx->rand_data_valid < DEFAULT_BLK_SZ; - ctx->rand_data_valid++) { + while (ctx->rand_data_valid < DEFAULT_BLK_SZ) { *ptr = ctx->rand_data[ctx->rand_data_valid]; ptr++; byte_count--; + ctx->rand_data_valid++; if (byte_count == 0) goto done; } diff --git a/crypto/cryptd.c b/crypto/cryptd.c index ef71318976c7..6e241640219d 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -116,13 +116,18 @@ static void cryptd_queue_worker(struct work_struct *work) struct crypto_async_request *req, *backlog; cpu_queue = container_of(work, struct cryptd_cpu_queue, work); - /* Only handle one request at a time to avoid hogging crypto - * workqueue. preempt_disable/enable is used to prevent - * being preempted by cryptd_enqueue_request() */ + /* + * Only handle one request at a time to avoid hogging crypto workqueue. + * preempt_disable/enable is used to prevent being preempted by + * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent + * cryptd_enqueue_request() being accessed from software interrupts. + */ + local_bh_disable(); preempt_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); preempt_enable(); + local_bh_enable(); if (!req) return; diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 9ed9f60316e5..899b2fa24e50 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -177,7 +177,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) index = sctx->count[0] & 0x7f; /* Update number of bytes */ - if (!(sctx->count[0] += len)) + if ((sctx->count[0] += len) < len) sctx->count[1]++; part_len = 128 - index; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 346b758c47ed..10e6efe75daf 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1064,6 +1064,9 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) return -EINVAL; } + if (!dev) + return -EINVAL; + dev->cpu = pr->id; for (i = 0; i < CPUIDLE_STATE_MAX; i++) { dev->states[i].name[0] = '\0'; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6e4b795d79e0..ce0ba625bd62 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4407,6 +4407,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Devices which aren't very happy with higher link speeds */ { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, + { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS, }, /* * Devices which choke on SETXFER. Applies only if both the diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0dfa46877e39..191b375df95e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -339,7 +339,8 @@ ata_scsi_activity_show(struct device *dev, struct device_attribute *attr, struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *atadev = ata_scsi_find_dev(ap, sdev); - if (ap->ops->sw_activity_show && (ap->flags & ATA_FLAG_SW_ACTIVITY)) + if (atadev && ap->ops->sw_activity_show && + (ap->flags & ATA_FLAG_SW_ACTIVITY)) return ap->ops->sw_activity_show(atadev, buf); return -EINVAL; } @@ -354,7 +355,8 @@ ata_scsi_activity_store(struct device *dev, struct device_attribute *attr, enum sw_activity val; int rc; - if (ap->ops->sw_activity_store && (ap->flags & ATA_FLAG_SW_ACTIVITY)) { + if (atadev && ap->ops->sw_activity_store && + (ap->flags & ATA_FLAG_SW_ACTIVITY)) { val = simple_strtoul(buf, NULL, 0); switch (val) { case OFF: case BLINK_ON: case BLINK_OFF: diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 12eec3f633b1..1913e74ee582 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -290,7 +290,7 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start, struct device *dev; int error = 0; - if (!bus) + if (!bus || !bus->p) return -EINVAL; klist_iter_init_node(&bus->p->klist_devices, &i, @@ -324,7 +324,7 @@ struct device *bus_find_device(struct bus_type *bus, struct klist_iter i; struct device *dev; - if (!bus) + if (!bus || !bus->p) return NULL; klist_iter_init_node(&bus->p->klist_devices, &i, diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index eb5ff0531cfb..54bad7584ea4 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1652,7 +1652,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, return status; } - /* scsi_cmd_ioctl handles these, below, though some are not */ + /* scsi_cmd_blk_ioctl handles these, below, though some are not */ /* very meaningful for cciss. SG_IO is the main one people want. */ case SG_GET_VERSION_NUM: @@ -1663,9 +1663,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, case SG_EMULATED_HOST: case SG_IO: case SCSI_IOCTL_SEND_COMMAND: - return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); + return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); - /* scsi_cmd_ioctl would normally handle these, below, but */ + /* scsi_cmd_blk_ioctl would normally handle these, below, but */ /* they aren't a good fit for cciss, as CD-ROMs are */ /* not supported, and we don't have any bus/target/lun */ /* which we present to the kernel. */ diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 0536b5b29adc..1c1533a59c4d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1727,10 +1727,9 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode) static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct gendisk *disk = bdev->bd_disk; void __user *usermem = (void __user *) arg; - return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem); + return scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem); } /* diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2138a7ae050c..4abfa80fdcd6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -201,8 +201,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) return -ENOTTY; - return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, - (void __user *)data); + return scsi_cmd_blk_ioctl(bdev, mode, cmd, + (void __user *)data); } /* We provide getgeo only to please some old bootloader/partitioning tools */ diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 283b127cea74..31c653a1005c 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -312,9 +312,11 @@ static void hci_uart_tty_close(struct tty_struct *tty) hci_uart_close(hdev); if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) { + if (hdev) { + hci_unregister_dev(hdev); + hci_free_dev(hdev); + } hu->proto->close(hu); - hci_unregister_dev(hdev); - hci_free_dev(hdev); } } } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e3749d0ba68b..5e7c72d3fe39 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2684,12 +2684,11 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, { void __user *argp = (void __user *)arg; int ret; - struct gendisk *disk = bdev->bd_disk; /* * Try the generic SCSI command ioctl's first. */ - ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp); + ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); if (ret != -ENOTTY) return ret; diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index 7b98c067190a..a65a574eac6b 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -95,9 +95,9 @@ struct si_sm_data { enum bt_states state; unsigned char seq; /* BT sequence number */ struct si_sm_io *io; - unsigned char write_data[IPMI_MAX_MSG_LENGTH]; + unsigned char write_data[IPMI_MAX_MSG_LENGTH + 2]; /* +2 for memcpy */ int write_count; - unsigned char read_data[IPMI_MAX_MSG_LENGTH]; + unsigned char read_data[IPMI_MAX_MSG_LENGTH + 2]; /* +2 for memcpy */ int read_count; int truncated; long timeout; /* microseconds countdown */ diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c index 51e0e2d8fac6..a330492e06f9 100644 --- a/drivers/firmware/pcdp.c +++ b/drivers/firmware/pcdp.c @@ -95,7 +95,7 @@ efi_setup_pcdp_console(char *cmdline) if (efi.hcdp == EFI_INVALID_TABLE_ADDR) return -ENODEV; - pcdp = ioremap(efi.hcdp, 4096); + pcdp = early_ioremap(efi.hcdp, 4096); printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp); if (strstr(cmdline, "console=hcdp")) { @@ -131,6 +131,6 @@ efi_setup_pcdp_console(char *cmdline) } out: - iounmap(pcdp); + early_iounmap(pcdp, 4096); return rc; } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 07ddda553a95..b71ff324bec9 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -56,6 +56,8 @@ struct hid_report *hid_register_report(struct hid_device *device, unsigned type, struct hid_report_enum *report_enum = device->report_enum + type; struct hid_report *report; + if (id >= HID_MAX_IDS) + return NULL; if (report_enum->report_id_hash[id]) return report_enum->report_id_hash[id]; @@ -367,8 +369,10 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) case HID_GLOBAL_ITEM_TAG_REPORT_ID: parser->global.report_id = item_udata(item); - if (parser->global.report_id == 0) { - dbg_hid("report_id 0 is invalid\n"); + if (parser->global.report_id == 0 || + parser->global.report_id >= HID_MAX_IDS) { + dbg_hid("report_id %u is invalid\n", + parser->global.report_id); return -1; } return 0; @@ -545,7 +549,7 @@ static void hid_device_release(struct device *dev) for (i = 0; i < HID_REPORT_TYPES; i++) { struct hid_report_enum *report_enum = device->report_enum + i; - for (j = 0; j < 256; j++) { + for (j = 0; j < HID_MAX_IDS; j++) { struct hid_report *report = report_enum->report_id_hash[j]; if (report) hid_free_report(report); @@ -804,6 +808,64 @@ static __inline__ int search(__s32 *array, __s32 value, unsigned n) return -1; } +static const char * const hid_report_names[] = { + "HID_INPUT_REPORT", + "HID_OUTPUT_REPORT", + "HID_FEATURE_REPORT", +}; +/** + * hid_validate_values - validate existing device report's value indexes + * + * @device: hid device + * @type: which report type to examine + * @id: which report ID to examine (0 for first) + * @field_index: which report field to examine + * @report_counts: expected number of values + * + * Validate the number of values in a given field of a given report, after + * parsing. + */ +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts) +{ + struct hid_report *report; + + if (type > HID_FEATURE_REPORT) { + err_hid("invalid HID report type %u\n", type); + return NULL; + } + + if (id >= HID_MAX_IDS) { + err_hid("invalid HID report id %u\n", id); + return NULL; + } + + /* + * Explicitly not using hid_get_report() here since it depends on + * ->numbered being checked, which may not always be the case when + * drivers go to access report values. + */ + report = hid->report_enum[type].report_id_hash[id]; + if (!report) { + err_hid("missing %s %u\n", hid_report_names[type], id); + return NULL; + } + if (report->maxfield <= field_index) { + err_hid("not enough fields in %s %u\n", + hid_report_names[type], id); + return NULL; + } + if (report->field[field_index]->report_count < report_counts) { + err_hid("not enough values in %s %u field %u\n", + hid_report_names[type], id, field_index); + return NULL; + } + return report; +} +EXPORT_SYMBOL_GPL(hid_validate_values); + /** * hid_match_report - check if driver's raw_event should be called * diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index d888f1e6794f..2b8109d7fc6a 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -66,26 +66,13 @@ int lg2ff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; int error; - if (list_empty(report_list)) { - dev_err(&hid->dev, "no output report found\n"); + /* Check that the report looks ok */ + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); + if (!report) return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 1) { - dev_err(&hid->dev, "output report is empty\n"); - return -ENODEV; - } - if (report->field[0]->report_count < 7) { - dev_err(&hid->dev, "not enough values in the field\n"); - return -ENODEV; - } lg2ff = kmalloc(sizeof(struct lg2ff_device), GFP_KERNEL); if (!lg2ff) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index 4002832ee4af..b998b1675f62 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -68,10 +68,11 @@ static int hid_lg3ff_play(struct input_dev *dev, void *data, int x, y; /* - * Maxusage should always be 63 (maximum fields) - * likely a better way to ensure this data is clean + * Available values in the field should always be 63, but we only use up to + * 35. Instead, clear the entire area, however big it is. */ - memset(report->field[0]->value, 0, sizeof(__s32)*report->field[0]->maxusage); + memset(report->field[0]->value, 0, + sizeof(__s32) * report->field[0]->report_count); switch (effect->type) { case FF_CONSTANT: @@ -131,32 +132,14 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff3_joystick_ac; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - err_hid("No output report found"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - if (!report) { - err_hid("NULL output report"); - return -1; - } - - field = report->field[0]; - if (!field) { - err_hid("NULL field"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) + return -ENODEV; /* Assume single fixed device G940 */ for (i = 0; ff_bits[i] >= 0; i++) diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index 61142b76a9b1..60978a33eeb6 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -136,27 +136,14 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff_joystick; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - err_hid("No output report found"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - field = report->field[0]; - if (!field) { - err_hid("NULL field"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) + return -ENODEV; for (i = 0; i < ARRAY_SIZE(devices); i++) { if (dev->id.vendor == devices[i].idVendor && diff --git a/drivers/hid/hid-pl.c b/drivers/hid/hid-pl.c index 9f41e2bd8483..427f3a57aa23 100644 --- a/drivers/hid/hid-pl.c +++ b/drivers/hid/hid-pl.c @@ -129,8 +129,14 @@ static int plff_init(struct hid_device *hid) strong = &report->field[0]->value[2]; weak = &report->field[0]->value[3]; debug("detected single-field device"); - } else if (report->maxfield >= 4 && report->field[0]->maxusage == 1 && - report->field[0]->usage[0].hid == (HID_UP_LED | 0x43)) { + } else if (report->field[0]->maxusage == 1 && + report->field[0]->usage[0].hid == + (HID_UP_LED | 0x43) && + report->maxfield >= 4 && + report->field[0]->report_count >= 1 && + report->field[1]->report_count >= 1 && + report->field[2]->report_count >= 1 && + report->field[3]->report_count >= 1) { report->field[0]->value[0] = 0x00; report->field[1]->value[0] = 0x00; strong = &report->field[2]->value[0]; diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index b7acceabba80..a24f9fb43c34 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -69,21 +69,13 @@ static int zpff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - int error; + int i, error; - if (list_empty(report_list)) { - dev_err(&hid->dev, "no output report found\n"); - return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 4) { - dev_err(&hid->dev, "not enough fields in report\n"); - return -ENODEV; + for (i = 0; i < 4; i++) { + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); + if (!report) + return -ENODEV; } zpff = kzalloc(sizeof(struct zpff_device), GFP_KERNEL); diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 9c2288234dea..05f024caf4c9 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -287,8 +287,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev, * and CDROM_SEND_PACKET (legacy) ioctls */ if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) - err = scsi_cmd_ioctl(bdev->bd_disk->queue, bdev->bd_disk, - mode, cmd, argp); + err = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); if (err == -ENOTTY) err = generic_ide_ioctl(drive, bdev, cmd, arg); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b4b22576f12a..f6a23ecd982f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -157,7 +157,7 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_stop(dev, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b166bb75753d..917540c81260 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -189,7 +189,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, mcast->mcmember = *mcmember; - /* Set the cached Q_Key before we attach if it's the broadcast group */ + /* Set the multicast MTU and cached Q_Key before we attach if it's + * the broadcast group. + */ if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, sizeof (union ib_gid))) { spin_lock_irq(&priv->lock); @@ -197,10 +199,17 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, spin_unlock_irq(&priv->lock); return -EAGAIN; } + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; set_qkey = 1; + + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); + rtnl_unlock(); + } } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -589,14 +598,6 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - - if (!ipoib_cm_admin_enabled(dev)) { - rtnl_lock(); - dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); - rtnl_unlock(); - } - ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); clear_bit(IPOIB_MCAST_RUN, &priv->flags); diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index b8a1098b66ed..385279b2d54c 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -16,7 +16,6 @@ #include #include "isdnloop.h" -static char *revision = "$Revision: 1.11.6.7 $"; static char *isdnloop_id = "loop0"; MODULE_DESCRIPTION("ISDN4Linux: Pseudo Driver that simulates an ISDN card"); @@ -1494,17 +1493,6 @@ isdnloop_addcard(char *id1) static int __init isdnloop_init(void) { - char *p; - char rev[10]; - - if ((p = strchr(revision, ':'))) { - strcpy(rev, p + 1); - p = strchr(rev, '$'); - *p = 0; - } else - strcpy(rev, " ??? "); - printk(KERN_NOTICE "isdnloop-ISDN-driver Rev%s\n", rev); - if (isdnloop_id) return (isdnloop_addcard(isdnloop_id)); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 9200dbf2391a..bf404845aa9d 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -115,7 +115,17 @@ static int linear_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct linear_c *lc = (struct linear_c *) ti->private; - return __blkdev_driver_ioctl(lc->dev->bdev, lc->dev->mode, cmd, arg); + struct dm_dev *dev = lc->dev; + int r = 0; + + /* + * Only pass ioctls through if the device sizes match exactly. + */ + if (lc->start || + ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + r = scsi_verify_blk_ioctl(NULL, cmd); + + return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); } static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 78090ebaba6c..3c06fdb13868 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1541,6 +1541,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, spin_unlock_irqrestore(&m->lock, flags); + /* + * Only pass ioctls through if the device sizes match exactly. + */ + if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) + r = scsi_verify_blk_ioctl(NULL, cmd); + return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); } diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index e5a9f9ccea60..882d01910804 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -104,7 +104,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL }; static int cafe_device_ready(struct mtd_info *mtd) { struct cafe_priv *cafe = mtd->priv; - int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000); + int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000); uint32_t irqs = cafe_readl(cafe, NAND_IRQ); cafe_writel(cafe, irqs, NAND_IRQ); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index ecc41cffb470..e6045d1998af 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -5278,6 +5278,9 @@ static void tg3_poll_controller(struct net_device *dev) int i; struct tg3 *tp = netdev_priv(dev); + if (tg3_irq_sync(tp)) + return; + for (i = 0; i < tp->irq_cnt; i++) tg3_interrupt(tp->napi[i].irq_vec, &tp->napi[i]); } @@ -14476,6 +14479,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->pm_cap = pm_cap; tp->rx_mode = TG3_DEF_RX_MODE; tp->tx_mode = TG3_DEF_TX_MODE; + tp->irq_sync = 1; if (tg3_debug > 0) tp->msg_enable = tg3_debug; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7b4a88b2f696..bb181e92ad2c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1177,10 +1177,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int sndbuf; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) + if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; - + } else { + memset(&ifr, 0, sizeof(ifr)); + } if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index c4c334d9770f..72906eb06b0a 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -1317,7 +1317,7 @@ static int kaweth_internal_control_msg(struct usb_device *usb_dev, int retv; int length = 0; /* shut up GCC */ - urb = usb_alloc_urb(0, GFP_NOIO); + urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index bb2dd9329aa0..40112d49ec3e 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3849,6 +3849,8 @@ static void b43legacy_remove(struct ssb_device *dev) cancel_work_sync(&wldev->restart_work); B43legacy_WARN_ON(!wl); + if (!wldev->fw.ucode) + return; /* NULL if fw never loaded */ if (wl->current_dev == wldev) ieee80211_unregister_hw(wl->hw); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6938fdc41e79..052af89854c1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2692,6 +2692,40 @@ static void __devinit fixup_ti816x_class(struct pci_dev* dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class); +/* + * Some BIOS implementations leave the Intel GPU interrupts enabled, + * even though no one is handling them (f.e. i915 driver is never loaded). + * Additionally the interrupt destination is not set up properly + * and the interrupt ends up -somewhere-. + * + * These spurious interrupts are "sticky" and the kernel disables + * the (shared) interrupt line after 100.000+ generated interrupts. + * + * Fix it by disabling the still enabled interrupts. + * This resolves crashes often seen on monitor unplug. + */ +#define I915_DEIER_REG 0x4400c +static void __devinit disable_igfx_irq(struct pci_dev *dev) +{ + void __iomem *regs = pci_iomap(dev, 0, 0); + if (regs == NULL) { + dev_warn(&dev->dev, "igfx quirk: Can't iomap PCI device\n"); + return; + } + + /* Check if any interrupt line is still enabled */ + if (readl(regs + I915_DEIER_REG) != 0) { + dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; " + "disabling\n"); + + writel(0, regs + I915_DEIER_REG); + } + + pci_iounmap(dev, regs); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 18352ff82101..cb288b7542e4 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1184,6 +1184,9 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) int rc = 0; u64 mask64; + memset(&iscsi_init, 0x00, sizeof(struct iscsi_kwqe_init1)); + memset(&iscsi_init2, 0x00, sizeof(struct iscsi_kwqe_init2)); + bnx2i_adjust_qp_size(hba); iscsi_init.flags = diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 4ee42bb48dcd..f31f85e03f3a 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -755,7 +755,7 @@ static struct domain_device *sas_ex_discover_end_dev( } /* See if this phy is part of a wide port */ -static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) +static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id) { struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id]; int i; @@ -771,11 +771,11 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id) sas_port_add_phy(ephy->port, phy->phy); phy->port = ephy->port; phy->phy_state = PHY_DEVICE_DISCOVERED; - return 0; + return true; } } - return -ENODEV; + return false; } static struct domain_device *sas_ex_discover_expander( @@ -913,8 +913,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) return res; } - res = sas_ex_join_wide_port(dev, phy_id); - if (!res) { + if (sas_ex_join_wide_port(dev, phy_id)) { SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", phy_id, SAS_ADDR(ex_phy->attached_sas_addr)); return res; @@ -959,8 +958,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == SAS_ADDR(child->sas_addr)) { ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; - res = sas_ex_join_wide_port(dev, i); - if (!res) + if (sas_ex_join_wide_port(dev, i)) SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n", i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr)); @@ -1813,32 +1811,20 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) { struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; struct domain_device *child; - bool found = false; - int res, i; + int res; SAS_DPRINTK("ex %016llx phy%d new device attached\n", SAS_ADDR(dev->sas_addr), phy_id); res = sas_ex_phy_discover(dev, phy_id); if (res) - goto out; - /* to support the wide port inserted */ - for (i = 0; i < dev->ex_dev.num_phys; i++) { - struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i]; - if (i == phy_id) - continue; - if (SAS_ADDR(ex_phy_temp->attached_sas_addr) == - SAS_ADDR(ex_phy->attached_sas_addr)) { - found = true; - break; - } - } - if (found) { - sas_ex_join_wide_port(dev, phy_id); + return res; + + if (sas_ex_join_wide_port(dev, phy_id)) return 0; - } + res = sas_ex_discover_devices(dev, phy_id); - if (!res) - goto out; + if (res) + return res; list_for_each_entry(child, &dev->ex_dev.children, siblings) { if (SAS_ADDR(child->sas_addr) == SAS_ADDR(ex_phy->attached_sas_addr)) { @@ -1848,7 +1834,6 @@ static int sas_discover_new(struct domain_device *dev, int phy_id) break; } } -out: return res; } @@ -1947,9 +1932,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) struct domain_device *dev = NULL; res = sas_find_bcast_dev(port_dev, &dev); - if (res) - goto out; - if (dev) { + while (res == 0 && dev) { struct expander_device *ex = &dev->ex_dev; int i = 0, phy_id; @@ -1961,8 +1944,10 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev) res = sas_rediscover(dev, phy_id); i = phy_id + 1; } while (i < ex->num_phys); + + dev = NULL; + res = sas_find_bcast_dev(port_dev, &dev); } -out: return res; } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 7ad53fa42766..3a56835c3ad3 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1614,6 +1614,20 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * requests are started. */ scsi_run_host_queues(shost); + + /* + * if eh is active and host_eh_scheduled is pending we need to re-run + * recovery. we do this check after scsi_run_host_queues() to allow + * everything pent up since the last eh run a chance to make forward + * progress before we sync again. Either we'll immediately re-run + * recovery or scsi_device_unbusy() will wake us again when these + * pending commands complete. + */ + spin_lock_irqsave(shost->host_lock, flags); + if (shost->host_eh_scheduled) + if (scsi_host_set_state(shost, SHOST_RECOVERY)) + WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)); + spin_unlock_irqrestore(shost->host_lock, flags); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ca8666b19c54..a7e6572940ef 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -215,6 +215,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int ret = DRIVER_ERROR << 24; req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); + if (!req) + return ret; if (bufflen && blk_rq_map_kern(sdev->request_queue, req, buffer, bufflen, __GFP_WAIT)) @@ -482,15 +484,26 @@ static void scsi_run_queue(struct request_queue *q) */ static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd) { + struct scsi_device *sdev = cmd->device; struct request *req = cmd->request; unsigned long flags; + /* + * We need to hold a reference on the device to avoid the queue being + * killed after the unlock and before scsi_run_queue is invoked which + * may happen because scsi_unprep_request() puts the command which + * releases its reference on the device. + */ + get_device(&sdev->sdev_gendev); + spin_lock_irqsave(q->queue_lock, flags); scsi_unprep_request(req); blk_requeue_request(q, req); spin_unlock_irqrestore(q->queue_lock, flags); scsi_run_queue(q); + + put_device(&sdev->sdev_gendev); } void scsi_next_command(struct scsi_cmnd *cmd) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 18e6c59ed12d..1b543a229487 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -886,6 +886,10 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode, SCSI_LOG_IOCTL(1, printk("sd_ioctl: disk=%s, cmd=0x%x\n", disk->disk_name, cmd)); + error = scsi_verify_blk_ioctl(bdev, cmd); + if (error < 0) + return error; + /* * If we are in the middle of error recovery, don't let anyone * else try and use this device. Also, if error recovery fails, it @@ -907,7 +911,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode, case SCSI_IOCTL_GET_BUS_NUMBER: return scsi_ioctl(sdp, cmd, p); default: - error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p); + error = scsi_cmd_blk_ioctl(bdev, mode, cmd, p); if (error != -ENOTTY) return error; } @@ -1065,6 +1069,11 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; + int ret; + + ret = scsi_verify_blk_ioctl(bdev, cmd); + if (ret < 0) + return ret; /* * If we are in the middle of error recovery, don't let anyone @@ -1076,8 +1085,6 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, return -ENODEV; if (sdev->host->hostt->compat_ioctl) { - int ret; - ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); return ret; diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index c1d79a233476..848894773c64 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -82,7 +82,7 @@ static unsigned int skip_txen_test; /* force skip of txen test at init time */ #define DEBUG_INTR(fmt...) do { } while (0) #endif -#define PASS_LIMIT 256 +#define PASS_LIMIT 512 #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index af45f735f6e5..45f85df306e5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1120,7 +1120,8 @@ skip_normal_probe: } - if (data_interface->cur_altsetting->desc.bNumEndpoints < 2) + if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 || + control_interface->cur_altsetting->desc.bNumEndpoints == 0) return -EINVAL; epctrl = &control_interface->cur_altsetting->endpoint[0].desc; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 189141ca4e05..85e20efba9ca 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -54,6 +54,7 @@ MODULE_DEVICE_TABLE (usb, wdm_ids); #define WDM_POLL_RUNNING 6 #define WDM_RESPONDING 7 #define WDM_SUSPENDING 8 +#define WDM_OVERFLOW 10 #define WDM_MAX 16 @@ -114,6 +115,7 @@ static void wdm_in_callback(struct urb *urb) { struct wdm_device *desc = urb->context; int status = urb->status; + int length = urb->actual_length; spin_lock(&desc->iuspin); clear_bit(WDM_RESPONDING, &desc->flags); @@ -144,9 +146,17 @@ static void wdm_in_callback(struct urb *urb) } desc->rerr = status; - desc->reslength = urb->actual_length; - memmove(desc->ubuf + desc->length, desc->inbuf, desc->reslength); - desc->length += desc->reslength; + if (length + desc->length > desc->wMaxCommand) { + /* The buffer would overflow */ + set_bit(WDM_OVERFLOW, &desc->flags); + } else { + /* we may already be in overflow */ + if (!test_bit(WDM_OVERFLOW, &desc->flags)) { + memmove(desc->ubuf + desc->length, desc->inbuf, length); + desc->length += length; + desc->reslength = length; + } + } skip_error: wake_up(&desc->wait); @@ -410,6 +420,11 @@ retry: rv = -ENODEV; goto err; } + if (test_bit(WDM_OVERFLOW, &desc->flags)) { + clear_bit(WDM_OVERFLOW, &desc->flags); + rv = -ENOBUFS; + goto err; + } i++; if (file->f_flags & O_NONBLOCK) { if (!test_bit(WDM_READ, &desc->flags)) { @@ -449,7 +464,10 @@ retry: spin_unlock_irq(&desc->iuspin); goto retry; } + if (!desc->reslength) { /* zero length read */ + dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__); + clear_bit(WDM_READ, &desc->flags); spin_unlock_irq(&desc->iuspin); goto retry; } @@ -860,6 +878,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = usb_get_intfdata(intf); int rv; + clear_bit(WDM_OVERFLOW, &desc->flags); rv = recover_from_urb_loss(desc); mutex_unlock(&desc->lock); return 0; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 85a496754780..3437cf2cdcaf 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1527,10 +1527,14 @@ static int processcompl_compat(struct async *as, void __user * __user *arg) void __user *addr = as->userurb; unsigned int i; - if (as->userbuffer && urb->actual_length) - if (copy_to_user(as->userbuffer, urb->transfer_buffer, - urb->actual_length)) + if (as->userbuffer && urb->actual_length) { + if (urb->number_of_packets > 0) /* Isochronous */ + i = urb->transfer_buffer_length; + else /* Non-Isoc */ + i = urb->actual_length; + if (copy_to_user(as->userbuffer, urb->transfer_buffer, i)) return -EFAULT; + } if (put_user(as->status, &userurb->status)) return -EFAULT; if (put_user(urb->actual_length, &userurb->actual_length)) diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c index 6e98a3697844..e4c7f53dc56b 100644 --- a/drivers/usb/early/ehci-dbgp.c +++ b/drivers/usb/early/ehci-dbgp.c @@ -437,7 +437,7 @@ static int dbgp_ehci_startup(void) writel(FLAG_CF, &ehci_regs->configured_flag); /* Wait until the controller is no longer halted */ - loop = 10; + loop = 1000; do { status = readl(&ehci_regs->status); if (!(status & STS_HALT)) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 5a320236ee3a..1674039cf683 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -84,7 +84,8 @@ static const char hcd_name [] = "ehci_hcd"; #define EHCI_IAA_MSECS 10 /* arbitrary */ #define EHCI_IO_JIFFIES (HZ/10) /* io watchdog > irq_thresh */ #define EHCI_ASYNC_JIFFIES (HZ/20) /* async idle timeout */ -#define EHCI_SHRINK_FRAMES 5 /* async qh unlink delay */ +#define EHCI_SHRINK_JIFFIES (DIV_ROUND_UP(HZ, 200) + 1) + /* 200-ms async qh unlink delay */ /* Initial IRQ latency: faster than hw default */ static int log2_irq_thresh = 0; // 0 to 6 @@ -139,10 +140,7 @@ timer_action(struct ehci_hcd *ehci, enum ehci_timer_action action) break; /* case TIMER_ASYNC_SHRINK: */ default: - /* add a jiffie since we synch against the - * 8 KHz uframe counter. - */ - t = DIV_ROUND_UP(EHCI_SHRINK_FRAMES * HZ, 1000) + 1; + t = EHCI_SHRINK_JIFFIES; break; } mod_timer(&ehci->watchdog, t + jiffies); diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 9b46a1ee616f..1992abbe223d 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -1226,6 +1226,8 @@ static void start_unlink_async (struct ehci_hcd *ehci, struct ehci_qh *qh) prev->hw->hw_next = qh->hw->hw_next; prev->qh_next = qh->qh_next; + if (ehci->qh_scan_next == qh) + ehci->qh_scan_next = qh->qh_next.qh; wmb (); /* If the controller isn't running, we don't have to wait for it */ @@ -1251,53 +1253,49 @@ static void scan_async (struct ehci_hcd *ehci) struct ehci_qh *qh; enum ehci_timer_action action = TIMER_IO_WATCHDOG; - ehci->stamp = ehci_readl(ehci, &ehci->regs->frame_index); timer_action_done (ehci, TIMER_ASYNC_SHRINK); -rescan: stopped = !HC_IS_RUNNING(ehci_to_hcd(ehci)->state); - qh = ehci->async->qh_next.qh; - if (likely (qh != NULL)) { - do { - /* clean any finished work for this qh */ - if (!list_empty(&qh->qtd_list) && (stopped || - qh->stamp != ehci->stamp)) { - int temp; - - /* unlinks could happen here; completion - * reporting drops the lock. rescan using - * the latest schedule, but don't rescan - * qhs we already finished (no looping) - * unless the controller is stopped. - */ - qh = qh_get (qh); - qh->stamp = ehci->stamp; - temp = qh_completions (ehci, qh); - if (qh->needs_rescan) - unlink_async(ehci, qh); - qh_put (qh); - if (temp != 0) { - goto rescan; - } - } - /* unlink idle entries, reducing DMA usage as well - * as HCD schedule-scanning costs. delay for any qh - * we just scanned, there's a not-unusual case that it - * doesn't stay idle for long. - * (plus, avoids some kind of re-activation race.) + ehci->qh_scan_next = ehci->async->qh_next.qh; + while (ehci->qh_scan_next) { + qh = ehci->qh_scan_next; + ehci->qh_scan_next = qh->qh_next.qh; + rescan: + /* clean any finished work for this qh */ + if (!list_empty(&qh->qtd_list)) { + int temp; + + /* + * Unlinks could happen here; completion reporting + * drops the lock. That's why ehci->qh_scan_next + * always holds the next qh to scan; if the next qh + * gets unlinked then ehci->qh_scan_next is adjusted + * in start_unlink_async(). */ - if (list_empty(&qh->qtd_list) - && qh->qh_state == QH_STATE_LINKED) { - if (!ehci->reclaim && (stopped || - ((ehci->stamp - qh->stamp) & 0x1fff) - >= EHCI_SHRINK_FRAMES * 8)) - start_unlink_async(ehci, qh); - else - action = TIMER_ASYNC_SHRINK; - } + qh = qh_get(qh); + temp = qh_completions(ehci, qh); + if (qh->needs_rescan) + unlink_async(ehci, qh); + qh->unlink_time = jiffies + EHCI_SHRINK_JIFFIES; + qh_put(qh); + if (temp != 0) + goto rescan; + } - qh = qh->qh_next.qh; - } while (qh); + /* unlink idle entries, reducing DMA usage as well + * as HCD schedule-scanning costs. delay for any qh + * we just scanned, there's a not-unusual case that it + * doesn't stay idle for long. + * (plus, avoids some kind of re-activation race.) + */ + if (list_empty(&qh->qtd_list) + && qh->qh_state == QH_STATE_LINKED) { + if (!ehci->reclaim && (stopped || + time_after_eq(jiffies, qh->unlink_time))) + start_unlink_async(ehci, qh); + else + action = TIMER_ASYNC_SHRINK; + } } if (action == TIMER_ASYNC_SHRINK) timer_action (ehci, TIMER_ASYNC_SHRINK); diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 1bb7a7f1ae77..d32d26e27f53 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -74,6 +74,7 @@ struct ehci_hcd { /* one per controller */ /* async schedule support */ struct ehci_qh *async; struct ehci_qh *reclaim; + struct ehci_qh *qh_scan_next; unsigned scanning : 1; /* periodic schedule support */ @@ -116,7 +117,6 @@ struct ehci_hcd { /* one per controller */ struct timer_list iaa_watchdog; struct timer_list watchdog; unsigned long actions; - unsigned stamp; unsigned random_frame; unsigned long next_statechange; ktime_t last_periodic_enable; @@ -336,6 +336,7 @@ struct ehci_qh { struct ehci_qh *reclaim; /* next to reclaim */ struct ehci_hcd *ehci; + unsigned long unlink_time; /* * Do NOT use atomic operations for QH refcounting. On some CPUs diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index eae8b18437cb..afc37d4879cc 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -418,12 +418,12 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) void __iomem *op_reg_base; u32 val; int timeout; + int len = pci_resource_len(pdev, 0); if (!mmio_resource_enabled(pdev, 0)) return; - base = ioremap_nocache(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); + base = ioremap_nocache(pci_resource_start(pdev, 0), len); if (base == NULL) return; @@ -433,9 +433,17 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) */ ext_cap_offset = xhci_find_next_cap_offset(base, XHCI_HCC_PARAMS_OFFSET); do { + if ((ext_cap_offset + sizeof(val)) > len) { + /* We're reading garbage from the controller */ + dev_warn(&pdev->dev, + "xHCI controller failing to respond"); + return; + } + if (!ext_cap_offset) /* We've reached the end of the extended capabilities */ goto hc_init; + val = readl(base + ext_cap_offset); if (XHCI_EXT_CAPS_ID(val) == XHCI_EXT_CAPS_LEGACY) break; @@ -458,9 +466,13 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) } } - /* Disable any BIOS SMIs */ - writel(XHCI_LEGACY_DISABLE_SMI, - base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + val = readl(base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + /* Mask off (turn off) any enabled SMIs */ + val &= XHCI_LEGACY_DISABLE_SMI; + /* Mask all SMI events bits, RW1C */ + val |= XHCI_LEGACY_SMI_EVENTS; + /* Disable any BIOS SMIs and clear all SMI events*/ + writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); hc_init: op_reg_base = base + XHCI_HC_LENGTH(readl(base)); diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h index 78c4edac1db1..e2acc97b169b 100644 --- a/drivers/usb/host/xhci-ext-caps.h +++ b/drivers/usb/host/xhci-ext-caps.h @@ -62,8 +62,9 @@ /* USB Legacy Support Control and Status Register - section 7.1.2 */ /* Add this offset, plus the value of xECP in HCCPARAMS to the base address */ #define XHCI_LEGACY_CONTROL_OFFSET (0x04) -/* bits 1:2, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */ -#define XHCI_LEGACY_DISABLE_SMI ((0x3 << 1) + (0xff << 5) + (0x7 << 17)) +/* bits 1:3, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */ +#define XHCI_LEGACY_DISABLE_SMI ((0x7 << 1) + (0xff << 5) + (0x7 << 17)) +#define XHCI_LEGACY_SMI_EVENTS (0x7 << 29) /* command register values to disable interrupts and halt the HC */ /* start/stop HC execution - do not write unless HC is halted*/ diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 31cf540480fe..e244e8cc5c1d 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1031,11 +1031,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) int i; /* Free the Event Ring Segment Table and the actual Event Ring */ - if (xhci->ir_set) { - xhci_writel(xhci, 0, &xhci->ir_set->erst_size); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_base); - xhci_write_64(xhci, 0, &xhci->ir_set->erst_dequeue); - } size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries); if (xhci->erst.entries) pci_free_consistent(pdev, size, @@ -1047,7 +1042,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->event_ring = NULL; xhci_dbg(xhci, "Freed event ring\n"); - xhci_write_64(xhci, 0, &xhci->op_regs->cmd_ring); + xhci->cmd_ring_reserved_trbs = 0; if (xhci->cmd_ring) xhci_ring_free(xhci, xhci->cmd_ring); xhci->cmd_ring = NULL; @@ -1066,7 +1061,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->device_pool = NULL; xhci_dbg(xhci, "Freed device context pool\n"); - xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr); if (xhci->dcbaa) pci_free_consistent(pdev, sizeof(*xhci->dcbaa), xhci->dcbaa, xhci->dcbaa->dma); @@ -1402,6 +1396,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_warn(xhci, "Couldn't initialize memory\n"); + xhci_halt(xhci); + xhci_reset(xhci); xhci_mem_cleanup(xhci); return -ENOMEM; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a5901f607c6..d8ec9ec57d15 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -158,7 +158,7 @@ int xhci_reset(struct xhci_hcd *xhci) xhci_to_hcd(xhci)->state = HC_STATE_HALT; ret = handshake(xhci, &xhci->op_regs->command, - CMD_RESET, 0, 250 * 1000); + CMD_RESET, 0, 10 * 1000 * 1000); if (ret) return ret; @@ -167,7 +167,8 @@ int xhci_reset(struct xhci_hcd *xhci) * xHCI cannot write to any doorbells or operational registers other * than status until the "Controller Not Ready" flag is cleared. */ - return handshake(xhci, &xhci->op_regs->status, STS_CNR, 0, 250 * 1000); + return handshake(xhci, &xhci->op_regs->status, + STS_CNR, 0, 10 * 1000 * 1000); } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 646cc5326219..882af44bf8bb 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2336,6 +2336,9 @@ static void ftdi_set_termios(struct tty_struct *tty, cflag = termios->c_cflag; + if (old_termios == 0) + goto no_skip; + if (old_termios->c_cflag == termios->c_cflag && old_termios->c_ispeed == termios->c_ispeed && old_termios->c_ospeed == termios->c_ospeed) @@ -2349,6 +2352,7 @@ static void ftdi_set_termios(struct tty_struct *tty, (termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB))) goto no_data_parity_stop_changes; +no_skip: /* Set number of data bits, parity, stop bits */ urb_value = 0; diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 0f0a122c6525..0e5838138f4b 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -973,10 +973,7 @@ static void garmin_close(struct usb_serial_port *port) if (!serial) return; - mutex_lock(&port->serial->disc_mutex); - - if (!port->serial->disconnected) - garmin_clear(garmin_data_p); + garmin_clear(garmin_data_p); /* shutdown our urbs */ usb_kill_urb(port->read_urb); @@ -985,8 +982,6 @@ static void garmin_close(struct usb_serial_port *port) /* keep reset state so we know that we must start a new session */ if (garmin_data_p->state != STATE_RESET) garmin_data_p->state = STATE_DISCONNECTED; - - mutex_unlock(&port->serial->disc_mutex); } diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index b6e8908b5080..2e2bcf230be3 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -581,6 +581,9 @@ static void chase_port(struct edgeport_port *port, unsigned long timeout, wait_queue_t wait; unsigned long flags; + if (!tty) + return; + if (!timeout) timeout = (HZ * EDGE_CLOSING_WAIT)/100; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 16f0548f5f3d..a87c43012b5a 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1180,9 +1180,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty) } spin_lock_irqsave(&mos7840_port->pool_lock, flags); - for (i = 0; i < NUM_URBS; ++i) - if (mos7840_port->busy[i]) - chars += URB_TRANSFER_BUFFER_SIZE; + for (i = 0; i < NUM_URBS; ++i) { + if (mos7840_port->busy[i]) { + struct urb *urb = mos7840_port->write_urb_pool[i]; + chars += urb->transfer_buffer_length; + } + } spin_unlock_irqrestore(&mos7840_port->pool_lock, flags); dbg("%s - returns %d", __func__, chars); return chars; @@ -2565,7 +2568,6 @@ error: kfree(mos7840_port->ctrl_buf); usb_free_urb(mos7840_port->control_urb); kfree(mos7840_port); - serial->port[i] = NULL; } return status; } @@ -2632,6 +2634,7 @@ static void mos7840_release(struct usb_serial *serial) mos7840_port = mos7840_get_port_private(serial->port[i]); dbg("mos7840_port %d = %p", i, mos7840_port); if (mos7840_port) { + usb_free_urb(mos7840_port->control_urb); kfree(mos7840_port->ctrl_buf); kfree(mos7840_port->dr); kfree(mos7840_port); diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index e3f32a41ef34..42ea133b0d57 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -980,6 +980,7 @@ static void sierra_release(struct usb_serial *serial) continue; kfree(portdata); } + kfree(serial->private); } #ifdef CONFIG_PM diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index b40884a4191d..561bf115619b 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1040,6 +1040,12 @@ int usb_serial_probe(struct usb_interface *interface, serial->attached = 1; } + /* Avoid race with tty_open and serial_install by setting the + * disconnected flag and not clearing it until all ports have been + * registered. + */ + serial->disconnected = 1; + if (get_free_serial(serial, num_ports, &minor) == NULL) { dev_err(&interface->dev, "No more free serial devices\n"); goto probe_error; @@ -1062,6 +1068,8 @@ int usb_serial_probe(struct usb_interface *interface, } } + serial->disconnected = 0; + usb_serial_console_init(debug, minor); exit: diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 12ed8209ca72..9bd51e9dc30a 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -576,6 +576,7 @@ no_firmware: "%s: please contact support@connecttech.com\n", serial->type->description); kfree(result); + kfree(command); return -ENODEV; no_command_private: diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 7b8839ebf3c4..917c465463db 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -815,8 +815,15 @@ static int __devinit uvesafb_vbe_init(struct fb_info *info) par->pmi_setpal = pmi_setpal; par->ypan = ypan; - if (par->pmi_setpal || par->ypan) - uvesafb_vbe_getpmi(task, par); + if (par->pmi_setpal || par->ypan) { + if (__supported_pte_mask & _PAGE_NX) { + par->pmi_setpal = par->ypan = 0; + printk(KERN_WARNING "uvesafb: NX protection is actively." + "We have better not to use the PMI.\n"); + } else { + uvesafb_vbe_getpmi(task, par); + } + } #else /* The protected mode interface is not available on non-x86. */ par->pmi_setpal = par->ypan = 0; diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index ad5897dc4495..cf05f4c82baa 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -918,7 +918,8 @@ void w1_search(struct w1_master *dev, u8 search_type, w1_slave_found_callback cb tmp64 = (triplet_ret >> 2); rn |= (tmp64 << i); - if (kthread_should_stop()) { + /* ensure we're called from kthread and not by netlink callback */ + if (!dev->priv && kthread_should_stop()) { dev_dbg(&dev->dev, "Abort w1_search\n"); return; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c21da8aebe46..eee4dd5a13cc 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1661,30 +1661,19 @@ static int elf_note_info_init(struct elf_note_info *info) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - goto notes_free; + return 0; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - goto psinfo_free; + return 0; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - goto prstatus_free; + return 0; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - goto fpu_free; + return 0; #endif return 1; -#ifdef ELF_CORE_COPY_XFPREGS - fpu_free: - kfree(info->fpu); -#endif - prstatus_free: - kfree(info->prstatus); - psinfo_free: - kfree(info->psinfo); - notes_free: - kfree(info->notes); - return 0; } static int fill_note_info(struct elfhdr *elf, int phdrs, diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index b8e8b0acf9bd..4a1b984638a3 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 42b60b04ea06..258c5ca3f534 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -116,10 +116,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; - retval = -ENOEXEC; - if (bprm->recursion_depth > BINPRM_MAX_RECURSION) - goto _ret; - /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -176,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _error; bprm->argc ++; - bprm->interp = iname; /* for binfmt_script */ + /* Update interp in case binfmt_script needs it. */ + retval = bprm_change_interp(iname, bprm); + if (retval < 0) + goto _error; interp_file = open_exec (iname); retval = PTR_ERR (interp_file); @@ -197,8 +196,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; - bprm->recursion_depth++; - retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..65a3c1732ced 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -21,15 +21,13 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) char interp[BINPRM_BUF_SIZE]; int retval; - if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || - (bprm->recursion_depth > BINPRM_MAX_RECURSION)) + if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT */ - bprm->recursion_depth++; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -81,7 +79,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) retval = copy_strings_kernel(1, &i_name, bprm); if (retval) return retval; bprm->argc++; - bprm->interp = interp; + retval = bprm_change_interp(interp, bprm); + if (retval < 0) + return retval; /* * OK, now restart the process with the interpreter's dentry. diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 462859a30141..474d1b82e38c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -212,10 +212,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and call the freeing code */ + /* now take the lock again and drop our item from the list */ spin_lock(&workers->order_lock); list_del(&work->order_list); + spin_unlock(&workers->order_lock); + + /* + * we don't want to call the ordered free functions + * with the lock held though + */ work->ordered_free(work); + spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c04ebb1e4c98..7f75546941ac 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -565,6 +565,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } + /* + * Wait for rcu kworkers under __btrfs_close_devices + * to finish all blkdev_puts so device is really + * free when umount is done. + */ + rcu_barrier(); return ret; } diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..1d1f9b4cbd87 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,11 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -97,6 +102,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 78e4d2a3a68b..61338373315e 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -227,6 +227,8 @@ compose_mount_options_out: compose_mount_options_err: kfree(mountdata); mountdata = ERR_PTR(rc); + kfree(*devname); + *devname = NULL; goto compose_mount_options_out; } diff --git a/fs/compat.c b/fs/compat.c index 633e63c32aa7..388555d404bf 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1231,11 +1231,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1288,11 +1291,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 641640dc7ae5..7a00d9b155bf 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -227,6 +227,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, err = get_user(palp, &up->palette); err |= get_user(length, &up->length); + if (err) + return -EFAULT; up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); err = put_user(compat_ptr(palp), &up_native->palette); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index b39e46f020a7..532b97bb1f15 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "ecryptfs_kernel.h" @@ -1054,10 +1055,10 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, rc = -EOPNOTSUPP; goto out; } - mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, - size, flags); - mutex_unlock(&lower_dentry->d_inode->i_mutex); + + rc = vfs_setxattr(lower_dentry, name, value, size, flags); + if (!rc) + fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; } diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index d8c3a373aafa..920d5d9a0cdb 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file, (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; - if (flags & O_RDONLY) { + if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 34ca5ca9c3e8..f8a6c0876a7a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1033,10 +1033,30 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ - epi->event.events = event->events; + epi->event.events = event->events; /* need barrier below */ epi->event.data = event->data; /* protected by mtx */ /* + * The following barrier has two effects: + * + * 1) Flush epi changes above to other CPUs. This ensures + * we do not miss events from ep_poll_callback if an + * event occurs immediately after we call f_op->poll(). + * We need this because we did not take ep->lock while + * changing epi above (but ep_poll_callback does take + * ep->lock). + * + * 2) We also need to ensure we do not miss _past_ events + * when calling f_op->poll(). This barrier also + * pairs with the barrier in wq_has_sleeper (see + * comments for wq_has_sleeper). + * + * This barrier will now guarantee ep_poll_callback or f_op->poll + * (or both) will notice the readiness of an item. + */ + smp_mb(); + + /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ diff --git a/fs/exec.c b/fs/exec.c index 4afb996086d5..aa3d2ec58c97 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1119,9 +1119,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ @@ -1281,6 +1296,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) int try,retval; struct linux_binfmt *fmt; + /* This allows 4 levels of binfmt rewrites before failing hard. */ + if (depth > 5) + return -ELOOP; + retval = security_bprm_check(bprm); if (retval) return retval; @@ -1299,12 +1318,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); + bprm->recursion_depth = depth + 1; retval = fn(bprm, regs); - /* - * Restore the depth counter to its starting value - * in this call, so we don't have to rely on every - * load_binary function to restore it on return. - */ bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 0d0e97ed3ff6..fd16a928574c 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -575,8 +575,12 @@ got: if (IS_DIRSYNC(inode)) handle->h_sync = 1; if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ea33bdf0a300..f841730b751e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2959,6 +2959,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; + __le32 disksize; again: /* we can't allow multiple procs in here at once, its a bit racey */ @@ -2996,7 +2998,11 @@ again: raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); + disksize = cpu_to_le32(ei->i_disksize); + if (disksize != raw_inode->i_size) { + need_datasync = 1; + raw_inode->i_size = disksize; + } raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); @@ -3012,8 +3018,11 @@ again: if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); + disksize = cpu_to_le32(ei->i_disksize >> 32); + if (disksize != raw_inode->i_size_high) { + raw_inode->i_size_high = disksize; + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, @@ -3066,6 +3075,8 @@ again: ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); + if (need_datasync) + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 8a2a29d35a6f..f14fa786dad0 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -442,8 +442,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, retry: handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto release_and_out; + } error = ext4_set_acl(handle, inode, type, acl); ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 7f6b5826d5a6..4783c5a4adac 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1009,8 +1009,12 @@ got: if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b8965bb679ee..658ca8d92ded 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1089,6 +1089,15 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } + if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + "with only %d reserved metadata blocks\n", __func__, + inode->i_ino, ei->i_allocated_meta_blocks, + ei->i_reserved_meta_blocks); + WARN_ON(1); + ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; + } + /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; @@ -5227,6 +5236,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ @@ -5275,7 +5285,10 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); + if (ei->i_disksize != ext4_isize(raw_inode)) { + ext4_isize_set(raw_inode, ei->i_disksize); + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -5328,7 +5341,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - ext4_update_inode_fsync_trans(handle, inode, 0); + ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f1c9a84c50a3..ac7889907361 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2068,7 +2068,11 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { - if (group == ngroups) + /* + * Artificially restricted ngroups for non-extent + * files makes group > ngroups possible on first loop. + */ + if (group >= ngroups) group = 0; /* This now checks without needing the buddy page */ @@ -4153,7 +4157,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - rcu_read_lock(); + spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4177,12 +4181,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - rcu_read_unlock(); + spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index deff4a5085e8..6764168776bf 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1209,7 +1209,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - + /* TODO: This is non obvious task to swap blocks for inodes with full + jornaling enabled */ + if (ext4_should_journal_data(orig_inode) || + ext4_should_journal_data(donor_inode)) { + return -EINVAL; + } /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2f31631935ba..f501bdf9d4c1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1451,10 +1451,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, frame->at = entries; frame->bh = bh; bh = bh2; + + ext4_handle_dirty_metadata(handle, dir, frame->bh); + ext4_handle_dirty_metadata(handle, dir, bh); + de = do_split(handle,dir, &bh, frame, &hinfo, &retval); - dx_release (frames); - if (!(de)) + if (!de) { + /* + * Even if the block split failed, we have to properly write + * out all the changes we did so far. Otherwise we can end up + * with corrupted filesystem. + */ + ext4_mark_inode_dirty(handle, dir); + dx_release(frames); return retval; + } + dx_release(frames); retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); @@ -1813,9 +1825,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4_FS_XATTR inode->i_op = &ext4_special_inode_operations; -#endif err = ext4_add_nondir(handle, dentry, inode); } ext4_journal_stop(handle); @@ -1990,7 +2000,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0, rc; - if (!ext4_handle_valid(handle)) + if (!EXT4_SB(sb)->s_journal) return 0; mutex_lock(&EXT4_SB(sb)->s_orphan_lock); @@ -2071,8 +2081,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; - /* ext4_handle_valid() assumes a valid handle_t pointer */ - if (handle && !ext4_handle_valid(handle)) + if ((!EXT4_SB(inode->i_sb)->s_journal) && + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) return 0; mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); @@ -2091,7 +2101,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (sbi->s_journal && !handle) + if (!handle) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6928d5ad2c0d..90906948e242 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2028,7 +2028,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); + mutex_lock(&inode->i_mutex); ext4_truncate(inode); + mutex_unlock(&inode->i_mutex); nr_truncates++; } else { ext4_msg(sb, KERN_DEBUG, diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537f..73c0bd7f7424 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -276,5 +276,5 @@ const struct file_operations fscache_stats_fops = { .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4787ae6c5c1c..b359543c68e5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -855,6 +855,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; } } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f6104a958812..102d58297174 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1664,7 +1664,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) size_t n; u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; - for (n = 0; n < count; n++) { + for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e6d614d10467..829aceeb77ad 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -76,6 +76,9 @@ struct fuse_inode { preserve the original mode */ mode_t orig_i_mode; + /** 64 bit inode number */ + u64 orig_ino; + /** Version of last attribute change */ u64 attr_version; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ec14d19ce501..675aa27d393d 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -86,6 +86,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nlookup = 0; fi->attr_version = 0; fi->writectr = 0; + fi->orig_ino = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); @@ -140,6 +141,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static ino_t fuse_squash_ino(u64 ino64) +{ + ino_t ino = (ino_t) ino64; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; + return ino; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -149,7 +162,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; - inode->i_ino = attr->ino; + inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); inode->i_nlink = attr->nlink; inode->i_uid = attr->uid; @@ -175,6 +188,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0022eec63cda..b3d234e5b498 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -447,7 +447,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; int res; res = pagecache_write_begin(NULL, mapping, size, 0, diff --git a/fs/isofs/export.c b/fs/isofs/export.c index ed752cb38474..344aa606eecd 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -131,6 +131,7 @@ isofs_export_encode_fh(struct dentry *dentry, len = 3; fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ + fh16[3] = 0; /* avoid leaking uninitialized data */ fh32[2] = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 1df9270c900b..3fb1656917aa 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -84,7 +84,12 @@ nope: static void release_data_buffer(struct buffer_head *bh) { if (buffer_freed(bh)) { + WARN_ON_ONCE(buffer_dirty(bh)); clear_buffer_freed(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; release_buffer_page(bh); } else put_bh(bh); @@ -863,17 +868,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather throughout in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 70713d508a0f..62b66042e104 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1078,6 +1078,14 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (be32_to_cpu(sb->s_first) == 0 || + be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + printk(KERN_WARNING + "JBD: Invalid start block of journal: %u\n", + be32_to_cpu(sb->s_first)); + goto out; + } + return 0; out: diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 5ae71e75a491..590e23885c98 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1838,15 +1838,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1874,10 +1875,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1904,13 +1913,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1918,13 +1923,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1945,10 +1946,26 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + unlock_buffer(bh); + log_wait_commit(journal, tid); + lock_buffer(bh); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1971,6 +1988,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. */ + jh->b_modified = 0; journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2019,7 +2044,8 @@ void journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 7f16feab9917..21bf6cdaedfd 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1194,6 +1194,14 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (be32_to_cpu(sb->s_first) == 0 || + be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + printk(KERN_WARNING + "JBD2: Invalid start block of journal: %u\n", + be32_to_cpu(sb->s_first)); + goto out; + } + return 0; out: diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index bfc70f57900f..ed89123a240f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1836,6 +1836,8 @@ zap_buffer_unlocked: clear_buffer_mapped(bh); clear_buffer_req(bh); clear_buffer_new(bh); + clear_buffer_delay(bh); + clear_buffer_unwritten(bh); bh->b_bdev = NULL; return may_free; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e701002694e5..4958497da25b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -68,7 +68,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8dd330925ede..96e440aba77e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1669,6 +1669,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in goto err_opendata_put; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + nfs_revalidate_inode(server, state->inode); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 28d586a5e77e..3800e7680037 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2607,11 +2607,16 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused++; + pn = resp->rqstp->rq_resused; + if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */ + maxcount -= len; + break; + } resp->rqstp->rq_vec[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); resp->rqstp->rq_vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_resused++; v++; len -= PAGE_SIZE; } @@ -2659,6 +2664,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); @@ -2708,6 +2715,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; RESERVE_SPACE(8); /* verifier */ savep = p; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 5d3d2a782abc..29da76656b9c 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -150,6 +150,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); + atomic_dec(&group->inotify_data.user->inotify_devs); free_uid(group->inotify_data.user); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 72f882552608..7b1a2c3e36c6 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -290,15 +290,12 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct user_struct *user = group->inotify_data.user; fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); - atomic_dec(&user->inotify_devs); - return 0; } @@ -616,7 +613,7 @@ retry: return ret; } -static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) +static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; unsigned int grp_num; @@ -632,8 +629,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); group->inotify_data.last_wd = 0; - group->inotify_data.user = user; group->inotify_data.fa = NULL; + group->inotify_data.user = get_current_user(); + + if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > + inotify_max_user_instances) { + fsnotify_put_group(group); + return ERR_PTR(-EMFILE); + } return group; } @@ -643,7 +646,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign SYSCALL_DEFINE1(inotify_init1, int, flags) { struct fsnotify_group *group; - struct user_struct *user; int ret; /* Check the IN_* constants for consistency. */ @@ -653,31 +655,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) return -EINVAL; - user = get_current_user(); - if (unlikely(atomic_read(&user->inotify_devs) >= - inotify_max_user_instances)) { - ret = -EMFILE; - goto out_free_uid; - } - /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ - group = inotify_new_group(user, inotify_max_queued_events); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_free_uid; - } - - atomic_inc(&user->inotify_devs); + group = inotify_new_group(inotify_max_queued_events); + if (IS_ERR(group)) + return PTR_ERR(group); ret = anon_inode_getfd("inotify", &inotify_fops, group, O_RDONLY | flags); - if (ret >= 0) - return ret; + if (ret < 0) + fsnotify_put_group(group); - fsnotify_put_group(group); - atomic_dec(&user->inotify_devs); -out_free_uid: - free_uid(user); return ret; } diff --git a/fs/splice.c b/fs/splice.c index cc617b09e4c2..1c991d6a64b4 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -638,7 +639,11 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, ret = buf->ops->confirm(pipe, buf); if (!ret) { - more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; + more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; + + if (sd->len < sd->total_len && pipe->nrbufs > 1) + more |= MSG_SENDPAGE_NOTLAST; + if (file->f_op && file->f_op->sendpage) ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 590717861c7a..37d7153d3f72 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -400,20 +400,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) /** * sysfs_pathname - return full path to sysfs dirent * @sd: sysfs_dirent whose path we want - * @path: caller allocated buffer + * @path: caller allocated buffer of size PATH_MAX * * Gives the name "/" to the sysfs_root entry; any path returned * is relative to wherever sysfs is mounted. - * - * XXX: does no error checking on @path size */ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) { if (sd->s_parent) { sysfs_pathname(sd->s_parent, path); - strcat(path, "/"); + strlcat(path, "/", PATH_MAX); } - strcat(path, sd->s_name); + strlcat(path, sd->s_name, PATH_MAX); return path; } @@ -446,9 +444,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) char *path = kzalloc(PATH_MAX, GFP_KERNEL); WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name : - strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), - sd->s_name)); + (path == NULL) ? sd->s_name + : (sysfs_pathname(acxt->parent_sd, path), + strlcat(path, "/", PATH_MAX), + strlcat(path, sd->s_name, PATH_MAX), + path)); kfree(path); } diff --git a/fs/udf/file.c b/fs/udf/file.c index 4b6a46ccbf46..af6f30ac228f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -41,20 +41,24 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page *page) +static void __udf_adinicb_readpage(struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - BUG_ON(!PageLocked(page)); - kaddr = kmap(page); - memset(kaddr, 0, PAGE_CACHE_SIZE); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); + memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); +} + +static int udf_adinicb_readpage(struct file *file, struct page *page) +{ + BUG_ON(!PageLocked(page)); + __udf_adinicb_readpage(page); unlock_page(page); return 0; @@ -79,6 +83,25 @@ static int udf_adinicb_writepage(struct page *page, return 0; } +static int udf_adinicb_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + struct page *page; + + if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) + return -EIO; + page = grab_cache_page_write_begin(mapping, 0, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) + __udf_adinicb_readpage(page); + return 0; +} + static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -101,8 +124,8 @@ const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .sync_page = block_sync_page, - .write_begin = simple_write_begin, - .write_end = udf_adinicb_write_end, + .write_begin = udf_adinicb_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8a3fbd177cab..fe64cf54b11e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -654,6 +654,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); *err = -ENOSPC; return NULL; } @@ -684,6 +686,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); newblock = udf_get_pblock(inode->i_sb, newblocknum, iinfo->i_location.partitionReferenceNum, 0); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 75816025f95f..919fa1e5f761 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1366,6 +1366,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, *lenp = 3; fid->udf.block = location.logicalBlockNum; fid->udf.partref = location.partitionReferenceNum; + fid->udf.parent_partref = 0; fid->udf.generation = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { diff --git a/fs/udf/super.c b/fs/udf/super.c index 1d36fdd4ae56..325d4d6856b1 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1254,13 +1254,15 @@ static int udf_load_sparable_map(struct super_block *sb, map->s_partition_type = UDF_SPARABLE_MAP15; sdata->s_packet_len = le16_to_cpu(spm->packetLength); if (!is_power_of_2(sdata->s_packet_len)) { - udf_error(sb, "error loading logical volume descriptor: " + udf_error(sb, __func__, + "error loading logical volume descriptor: " "Invalid packet length %u\n", (unsigned)sdata->s_packet_len); return -EIO; } if (spm->numSparingTables > 4) { - udf_error(sb, "error loading logical volume descriptor: " + udf_error(sb, __func__, + "error loading logical volume descriptor: " "Too many sparing tables (%d)\n", (int)spm->numSparingTables); return -EIO; @@ -1308,9 +1310,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); if (table_len > sb->s_blocksize - sizeof(*lvd)) { - udf_error(sb, "error loading logical volume descriptor: " + udf_error(sb, __func__, + "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1357,8 +1361,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index d113b72c2768..efa82c91ec18 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -78,7 +78,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extLength; __u32 s_extPosition; - __u16 s_nr_groups; + int s_nr_groups; struct buffer_head **s_block_bitmap; }; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 074b620d5d8b..d0ddba228449 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -71,8 +71,6 @@ extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, #define BINPRM_FLAGS_EXECFD_BIT 1 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT) -#define BINPRM_MAX_RECURSION 4 - /* Function parameter for binfmt->coredump */ struct coredump_params { long signr; @@ -131,6 +129,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); +extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cda62da68108..22713e8385f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -793,6 +793,9 @@ extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); extern void blk_recount_segments(struct request_queue *, struct bio *); +extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); +extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, + unsigned int, void __user *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, diff --git a/include/linux/hid.h b/include/linux/hid.h index b1344ec4b7fc..cd7049a670bc 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -410,10 +410,12 @@ struct hid_report { struct hid_device *device; /* associated device */ }; +#define HID_MAX_IDS 256 + struct hid_report_enum { unsigned numbered; struct list_head report_list; - struct hid_report *report_id_hash[256]; + struct hid_report *report_id_hash[HID_MAX_IDS]; }; #define HID_REPORT_TYPES 3 @@ -692,6 +694,10 @@ void hid_output_report(struct hid_report *report, __u8 *data); struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); void hid_disconnect(struct hid_device *hid); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 94cb72cfc2c3..5072583996f9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -454,6 +454,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); +void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); int kvm_assign_device(struct kvm *kvm, @@ -468,6 +469,11 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm, return 0; } +static inline void kvm_iommu_unmap_pages(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + static inline int kvm_iommu_map_guest(struct kvm *kvm) { return -ENODEV; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 1cc966cd3e5f..be6db86cc748 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -180,7 +180,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - spinlock_t lock; + struct mutex mutex; }; void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5b59f35dcb8f..040ce385f262 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -362,7 +362,7 @@ static inline int PageCompound(struct page *page) * pages on the LRU and/or pagecache. */ TESTPAGEFLAG(Compound, compound) -__PAGEFLAG(Head, compound) +__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) /* * PG_reclaim is used in combination with PG_compound to mark the @@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound) * PG_compound & PG_reclaim => Tail page * PG_compound & ~PG_reclaim => Head page */ +#define PG_head_mask ((1L << PG_compound)) #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) +static inline int PageHead(struct page *page) +{ + return ((page->flags & PG_head_tail_mask) == PG_head_mask); +} + static inline int PageTail(struct page *page) { return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); diff --git a/include/linux/socket.h b/include/linux/socket.h index 354cc5617f8b..7cfb4f881644 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -256,7 +256,7 @@ struct ucred { #define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ #define MSG_MORE 0x8000 /* Sender will send more */ #define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ - +#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_EOF MSG_FIN #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 83fd34437cf1..648000dd4b97 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -56,7 +56,15 @@ struct ip_options { unsigned char __data[0]; }; -#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) +struct ip_options_rcu { + struct rcu_head rcu; + struct ip_options opt; +}; + +struct ip_options_data { + struct ip_options_rcu opt; + char data[40]; +}; struct inet_request_sock { struct request_sock req; @@ -77,7 +85,7 @@ struct inet_request_sock { acked : 1, no_srccheck: 1; kmemcheck_bitfield_end(flags); - struct ip_options *opt; + struct ip_options_rcu *opt; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -125,7 +133,7 @@ struct inet_sock { __be16 inet_sport; __u16 inet_id; - struct ip_options *opt; + struct ip_options_rcu *inet_opt; __u8 tos; __u8 min_ttl; __u8 mc_ttl; diff --git a/include/net/ip.h b/include/net/ip.h index 503994a38ed1..ac9506e74c29 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -52,7 +52,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb) struct ipcm_cookie { __be32 addr; int oif; - struct ip_options *opt; + struct ip_options_rcu *opt; union skb_shared_tx shtx; }; @@ -89,7 +89,7 @@ extern int igmp_mc_proc_init(void); extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr, - struct ip_options *opt); + struct ip_options_rcu *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); @@ -376,14 +376,15 @@ extern int ip_forward(struct sk_buff *skb); * Functions provided by ip_options.c */ -extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); +extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, + __be32 daddr, struct rtable *rt, int is_frag); extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); -extern int ip_options_get(struct net *net, struct ip_options **optp, +extern int ip_options_get(struct net *net, struct ip_options_rcu **optp, unsigned char *data, int optlen); -extern int ip_options_get_from_user(struct net *net, struct ip_options **optp, +extern int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen); extern void ip_options_undo(struct ip_options * opt); extern void ip_forward_options(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index b365fc2597c3..133e350c6fa3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1428,6 +1428,8 @@ extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { + int len = skb->len; + /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ @@ -1437,7 +1439,7 @@ static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); + sk->sk_data_ready(sk, len); return 0; } diff --git a/include/net/udp.h b/include/net/udp.h index b02f5d9f4d7e..acc0a1928c33 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -169,6 +169,7 @@ extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); +extern int udp_push_pending_frames(struct sock *sk); extern void udp_flush_pending_frames(struct sock *sk); extern int udp_rcv(struct sk_buff *skb); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 3adca0ca9dbe..2018784c931a 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -250,7 +250,7 @@ TRACE_EVENT(mm_page_alloc, TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", __entry->page, - page_to_pfn(__entry->page), + __entry->page ? page_to_pfn(__entry->page) : 0, __entry->order, __entry->migratetype, show_gfp_flags(__entry->gfp_flags)) @@ -276,7 +276,7 @@ DECLARE_EVENT_CLASS(mm_page, TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", __entry->page, - page_to_pfn(__entry->page), + __entry->page ? page_to_pfn(__entry->page) : 0, __entry->order, __entry->migratetype, __entry->order == 0) diff --git a/kernel/async.c b/kernel/async.c index 15319d6c18fe..b640ffc3f5a1 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -94,6 +94,13 @@ static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; + if (!running) { /* just check the entry count */ + if (atomic_read(&entry_count)) + return 0; /* smaller than any cookie */ + else + return next_cookie; + } + if (!list_empty(running)) { entry = list_first_entry(running, struct async_entry, list); @@ -249,9 +256,7 @@ EXPORT_SYMBOL_GPL(async_schedule_domain); */ void async_synchronize_full(void) { - do { - async_synchronize_cookie(next_cookie); - } while (!list_empty(&async_running) || !list_empty(&async_pending)); + async_synchronize_cookie_domain(next_cookie, NULL); } EXPORT_SYMBOL_GPL(async_synchronize_full); @@ -271,7 +276,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); /** * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing * @cookie: async_cookie_t to use as checkpoint - * @running: running list to synchronize on + * @running: running list to synchronize on, NULL indicates all lists * * This function waits until all asynchronous function calls for the * synchronization domain specified by the running list @list submitted diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bf4f78f026e8..477bb7e17779 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2187,9 +2187,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, dentry->d_fsdata = cgrp; inc_nlink(parent->d_inode); rcu_assign_pointer(cgrp->dentry, dentry); - dget(dentry); } - dput(dentry); return error; } diff --git a/kernel/futex.c b/kernel/futex.c index 8b467b4a437f..0e8043833223 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2204,11 +2204,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * @uaddr2: the pi futex we will take prior to returning to user-space * * The caller will wait on uaddr and will be requeued by futex_requeue() to - * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and - * complete the acquisition of the rt_mutex prior to returning to userspace. - * This ensures the rt_mutex maintains an owner when it has waiters; without - * one, the pi logic wouldn't know which task to boost/deboost, if there was a - * need to. + * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake + * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to + * userspace. This ensures the rt_mutex maintains an owner when it has waiters; + * without one, the pi logic would not know which task to boost/deboost, if + * there was a need to. * * We call schedule in futex_wait_queue_me() when we enqueue and return there * via the following: @@ -2245,6 +2245,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, struct futex_q q; int res, ret; + if (uaddr == uaddr2) + return -EINVAL; + if (!bitset) return -EINVAL; @@ -2318,7 +2321,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * signal. futex_unlock_pi() will not destroy the lock_ptr nor * the pi_state. */ - WARN_ON(!&q.pi_state); + WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); debug_rt_mutex_free_waiter(&rt_waiter); @@ -2345,7 +2348,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, * fault, unlock the rt_mutex and return the fault to userspace. */ if (ret == -EFAULT) { - if (rt_mutex_owner(pi_mutex) == current) + if (pi_mutex && rt_mutex_owner(pi_mutex) == current) rt_mutex_unlock(pi_mutex); } else if (ret == -EINTR) { /* diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a443..3d83a1553a41 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1544,8 +1544,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, while (!signal_pending(current)) { if (timer.it.cpu.expires.sched == 0) { /* - * Our timer fired and was reset. + * Our timer fired and was reset, below + * deletion can not fail. */ + posix_cpu_timer_del(&timer); spin_unlock_irq(&timer.it_lock); return 0; } @@ -1563,9 +1565,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, it); + error = posix_cpu_timer_set(&timer, 0, &zero_it, it); + if (!error) { + /* + * Timer is now unarmed, deletion can not fail. + */ + posix_cpu_timer_del(&timer); + } spin_unlock_irq(&timer.it_lock); + while (error == TIMER_RETRY) { + /* + * We need to handle case when timer was or is in the + * middle of firing. In other cases we already freed + * resources. + */ + spin_lock_irq(&timer.it_lock); + error = posix_cpu_timer_del(&timer); + spin_unlock_irq(&timer.it_lock); + } + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b7b491e6c25b..9450ec22e5a6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -507,7 +507,7 @@ static int ptrace_resume(struct task_struct *child, long request, long data) } child->exit_code = data; - wake_up_process(child); + wake_up_state(child, __TASK_TRACED); return 0; } diff --git a/kernel/resource.c b/kernel/resource.c index 9c358e263534..bb76480a566f 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -613,6 +613,7 @@ static void __init __reserve_region_with_split(struct resource *root, struct resource *parent = root; struct resource *conflict; struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC); + struct resource *next_res = NULL; if (!res) return; @@ -622,21 +623,46 @@ static void __init __reserve_region_with_split(struct resource *root, res->end = end; res->flags = IORESOURCE_BUSY; - conflict = __request_resource(parent, res); - if (!conflict) - return; + while (1) { - /* failed, split and try again */ - kfree(res); + conflict = __request_resource(parent, res); + if (!conflict) { + if (!next_res) + break; + res = next_res; + next_res = NULL; + continue; + } - /* conflict covered whole area */ - if (conflict->start <= start && conflict->end >= end) - return; + /* conflict covered whole area */ + if (conflict->start <= res->start && + conflict->end >= res->end) { + kfree(res); + WARN_ON(next_res); + break; + } + + /* failed, split and try again */ + if (conflict->start > res->start) { + end = res->end; + res->end = conflict->start - 1; + if (conflict->end < end) { + next_res = kzalloc(sizeof(*next_res), + GFP_ATOMIC); + if (!next_res) { + kfree(res); + break; + } + next_res->name = name; + next_res->start = conflict->end + 1; + next_res->end = end; + next_res->flags = IORESOURCE_BUSY; + } + } else { + res->start = conflict->end + 1; + } + } - if (conflict->start > start) - __reserve_region_with_split(root, start, conflict->start-1, name); - if (conflict->end < end) - __reserve_region_with_split(root, conflict->end+1, end, name); } void __init reserve_region_with_split(struct resource *root, diff --git a/kernel/sched.c b/kernel/sched.c index e24d139ff694..ddcb8aeb2f59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2492,7 +2492,8 @@ out: */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + WARN_ON(task_is_stopped_or_traced(p)); + return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); diff --git a/kernel/signal.c b/kernel/signal.c index 137a3333b444..06f13229a709 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2358,7 +2358,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) static int do_tkill(pid_t tgid, pid_t pid, int sig) { - struct siginfo info; + struct siginfo info = {}; info.si_signo = sig; info.si_errno = 0; diff --git a/kernel/softirq.c b/kernel/softirq.c index 7c1a67ef0274..0df9a9406271 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -178,21 +178,21 @@ void local_bh_enable_ip(unsigned long ip) EXPORT_SYMBOL(local_bh_enable_ip); /* - * We restart softirq processing MAX_SOFTIRQ_RESTART times, - * and we fall back to softirqd after that. + * We restart softirq processing for at most 2 ms, + * and if need_resched() is not set. * - * This number has been established via experimentation. + * These limits have been established via experimentation. * The two things to balance is latency against fairness - * we want to handle softirqs as soon as possible, but they * should not be able to lock up the box. */ -#define MAX_SOFTIRQ_RESTART 10 +#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) asmlinkage void __do_softirq(void) { struct softirq_action *h; __u32 pending; - int max_restart = MAX_SOFTIRQ_RESTART; + unsigned long end = jiffies + MAX_SOFTIRQ_TIME; int cpu; pending = local_softirq_pending(); @@ -236,11 +236,12 @@ restart: local_irq_disable(); pending = local_softirq_pending(); - if (pending && --max_restart) - goto restart; + if (pending) { + if (time_before(jiffies, end) && !need_resched()) + goto restart; - if (pending) wakeup_softirqd(); + } lockdep_softirq_exit(); diff --git a/kernel/sys.c b/kernel/sys.c index 0324c1cd8e7b..006883113861 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -308,6 +308,7 @@ void kernel_restart_prepare(char *cmd) void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); + disable_nonboot_cpus(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); else diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 521987f85874..ab16ee785b7f 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -67,7 +67,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) */ int tick_check_broadcast_device(struct clock_event_device *dev) { - if ((tick_broadcast_device.evtdev && + if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || + (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f992762d7f51..25d34e1a7911 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -769,7 +769,7 @@ void tick_cancel_sched_timer(int cpu) hrtimer_cancel(&ts->sched_timer); # endif - ts->nohz_mode = NOHZ_MODE_INACTIVE; + memset(ts, 0, sizeof(*ts)); } #endif diff --git a/kernel/timer.c b/kernel/timer.c index b7e39516bf0b..12aa4bdb150d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1552,12 +1552,12 @@ static int __cpuinit init_timers_cpu(int cpu) boot_done = 1; base = &boot_tvec_bases; } + spin_lock_init(&base->lock); tvec_base_done[cpu] = 1; } else { base = per_cpu(tvec_bases, cpu); } - spin_lock_init(&base->lock); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7dd746cc86ff..337f928749c7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -480,7 +480,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) free_page(tmp); } - free_page((unsigned long)stat->pages); stat->pages = NULL; stat->start = NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ca9ce49d71e3..c80dd852f3a0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2030,6 +2030,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) kref_get(&reservations->refs); } +static void resv_map_put(struct vm_area_struct *vma) +{ + struct resv_map *reservations = vma_resv_map(vma); + + if (!reservations) + return; + kref_put(&reservations->refs, resv_map_release); +} + static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); @@ -2045,7 +2054,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) reserve = (end - start) - region_count(&reservations->regions, start, end); - kref_put(&reservations->refs, resv_map_release); + resv_map_put(vma); if (reserve) { hugetlb_acct_memory(h, -reserve); @@ -2744,12 +2753,16 @@ int hugetlb_reserve_pages(struct inode *inode, set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } - if (chg < 0) - return chg; + if (chg < 0) { + ret = chg; + goto out_err; + } /* There must be enough filesystem quota for the mapping */ - if (hugetlb_get_quota(inode->i_mapping, chg)) - return -ENOSPC; + if (hugetlb_get_quota(inode->i_mapping, chg)) { + ret = -ENOSPC; + goto out_err; + } /* * Check enough hugepages are available for the reservation. @@ -2758,7 +2771,7 @@ int hugetlb_reserve_pages(struct inode *inode, ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugetlb_put_quota(inode->i_mapping, chg); - return ret; + goto out_err; } /* @@ -2775,6 +2788,10 @@ int hugetlb_reserve_pages(struct inode *inode, if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; +out_err: + if (vma) + resv_map_put(vma); + return ret; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) diff --git a/mm/madvise.c b/mm/madvise.c index 319528b8db74..83aa92aad8bc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -191,14 +192,16 @@ static long madvise_remove(struct vm_area_struct *vma, struct address_space *mapping; loff_t offset, endoff; int error; + struct file *f; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; - if (!vma->vm_file || !vma->vm_file->f_mapping - || !vma->vm_file->f_mapping->host) { + f = vma->vm_file; + + if (!f || !f->f_mapping || !f->f_mapping->host) { return -EINVAL; } @@ -212,9 +215,16 @@ static long madvise_remove(struct vm_area_struct *vma, endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + /* + * vmtruncate_range may need to take i_mutex and i_alloc_sem. + * We need to explicitly grab a reference because the vma (and + * hence the vma's reference to the file) can go away as soon as + * we drop mmap_sem. + */ + get_file(f); up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); + fput(f); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c7f53b1228b6..1d5c89a7e128 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1835,7 +1835,7 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) */ /* lookup first element intersecting start-end */ -/* Caller holds sp->lock */ +/* Caller holds sp->mutex */ static struct sp_node * sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) { @@ -1899,13 +1899,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) if (!sp->root.rb_node) return NULL; - spin_lock(&sp->lock); + mutex_lock(&sp->mutex); sn = sp_lookup(sp, idx, idx+1); if (sn) { mpol_get(sn->policy); pol = sn->policy; } - spin_unlock(&sp->lock); + mutex_unlock(&sp->mutex); return pol; } @@ -1936,10 +1936,10 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end, static int shared_policy_replace(struct shared_policy *sp, unsigned long start, unsigned long end, struct sp_node *new) { - struct sp_node *n, *new2 = NULL; + struct sp_node *n; + int ret = 0; -restart: - spin_lock(&sp->lock); + mutex_lock(&sp->mutex); n = sp_lookup(sp, start, end); /* Take care of old policies in the same range. */ while (n && n->start < end) { @@ -1952,16 +1952,14 @@ restart: } else { /* Old policy spanning whole new range. */ if (n->end > end) { + struct sp_node *new2; + new2 = sp_alloc(end, n->end, n->policy); if (!new2) { - spin_unlock(&sp->lock); - new2 = sp_alloc(end, n->end, n->policy); - if (!new2) - return -ENOMEM; - goto restart; + ret = -ENOMEM; + goto out; } n->end = start; sp_insert(sp, new2); - new2 = NULL; break; } else n->end = start; @@ -1972,12 +1970,9 @@ restart: } if (new) sp_insert(sp, new); - spin_unlock(&sp->lock); - if (new2) { - mpol_put(new2->policy); - kmem_cache_free(sn_cache, new2); - } - return 0; +out: + mutex_unlock(&sp->mutex); + return ret; } /** @@ -1995,7 +1990,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) int ret; sp->root = RB_ROOT; /* empty tree == default mempolicy */ - spin_lock_init(&sp->lock); + mutex_init(&sp->mutex); if (mpol) { struct vm_area_struct pvma; @@ -2063,7 +2058,7 @@ void mpol_free_shared_policy(struct shared_policy *p) if (!p->root.rb_node) return; - spin_lock(&p->lock); + mutex_lock(&p->mutex); next = rb_first(&p->root); while (next) { n = rb_entry(next, struct sp_node, nd); @@ -2072,7 +2067,7 @@ void mpol_free_shared_policy(struct shared_policy *p) mpol_put(n->policy); kmem_cache_free(sn_cache, n); } - spin_unlock(&p->lock); + mutex_unlock(&p->mutex); } /* assumes fs == KERNEL_DS */ @@ -2335,7 +2330,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) break; default: - BUG(); + return -EINVAL; } l = strlen(policy_types[mode]); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 438951d366f2..0b54146f986e 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -33,6 +33,24 @@ void __mmu_notifier_release(struct mm_struct *mm) { struct mmu_notifier *mn; + struct hlist_node *n; + + /* + * RCU here will block mmu_notifier_unregister until + * ->release returns. + */ + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) + /* + * if ->release runs before mmu_notifier_unregister it + * must be handled as it's the only way for the driver + * to flush all existing sptes and stop the driver + * from establishing any more sptes before all the + * pages in the mm are freed. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); spin_lock(&mm->mmu_notifier_mm->lock); while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { @@ -46,23 +64,6 @@ void __mmu_notifier_release(struct mm_struct *mm) * mmu_notifier_unregister to return. */ hlist_del_init_rcu(&mn->hlist); - /* - * RCU here will block mmu_notifier_unregister until - * ->release returns. - */ - rcu_read_lock(); - spin_unlock(&mm->mmu_notifier_mm->lock); - /* - * if ->release runs before mmu_notifier_unregister it - * must be handled as it's the only way for the driver - * to flush all existing sptes and stop the driver - * from establishing any more sptes before all the - * pages in the mm are freed. - */ - if (mn->ops->release) - mn->ops->release(mn, mm); - rcu_read_unlock(); - spin_lock(&mm->mmu_notifier_mm->lock); } spin_unlock(&mm->mmu_notifier_mm->lock); @@ -264,16 +265,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_count) <= 0); - spin_lock(&mm->mmu_notifier_mm->lock); if (!hlist_unhashed(&mn->hlist)) { - hlist_del_rcu(&mn->hlist); - /* * RCU here will force exit_mmap to wait ->release to finish * before freeing the pages. */ rcu_read_lock(); - spin_unlock(&mm->mmu_notifier_mm->lock); + /* * exit_mmap will block in mmu_notifier_release to * guarantee ->release is called before freeing the @@ -282,8 +280,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) if (mn->ops->release) mn->ops->release(mn, mm); rcu_read_unlock(); - } else + + spin_lock(&mm->mmu_notifier_mm->lock); + hlist_del_rcu(&mn->hlist); spin_unlock(&mm->mmu_notifier_mm->lock); + } /* * Wait any running method to finish, of course including diff --git a/mm/percpu.c b/mm/percpu.c index 83523d9a351b..558543b33b52 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -111,9 +111,9 @@ static int pcpu_atom_size __read_mostly; static int pcpu_nr_slots __read_mostly; static size_t pcpu_chunk_struct_size __read_mostly; -/* cpus with the lowest and highest unit addresses */ -static unsigned int pcpu_low_unit_cpu __read_mostly; -static unsigned int pcpu_high_unit_cpu __read_mostly; +/* cpus with the lowest and highest unit numbers */ +static unsigned int pcpu_first_unit_cpu __read_mostly; +static unsigned int pcpu_last_unit_cpu __read_mostly; /* the address of the first chunk which starts with the kernel static area */ void *pcpu_base_addr __read_mostly; @@ -747,8 +747,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { flush_cache_vunmap( - pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), - pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) @@ -810,8 +810,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { flush_tlb_kernel_range( - pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), - pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static int __pcpu_map_pages(unsigned long addr, struct page **pages, @@ -888,8 +888,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { flush_cache_vmap( - pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), - pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } /** @@ -1345,19 +1345,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) { void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); bool in_first_chunk = false; - unsigned long first_low, first_high; + unsigned long first_start, first_end; unsigned int cpu; /* - * The following test on unit_low/high isn't strictly + * The following test on first_start/end isn't strictly * necessary but will speed up lookups of addresses which * aren't in the first chunk. */ - first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); - first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, - pcpu_unit_pages); - if ((unsigned long)addr >= first_low && - (unsigned long)addr < first_high) { + first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); + first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, + pcpu_unit_pages); + if ((unsigned long)addr >= first_start && + (unsigned long)addr < first_end) { for_each_possible_cpu(cpu) { void *start = per_cpu_ptr(base, cpu); @@ -1754,9 +1754,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = UINT_MAX; - - pcpu_low_unit_cpu = NR_CPUS; - pcpu_high_unit_cpu = NR_CPUS; + pcpu_first_unit_cpu = NR_CPUS; for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { const struct pcpu_group_info *gi = &ai->groups[group]; @@ -1776,13 +1774,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, unit_map[cpu] = unit + i; unit_off[cpu] = gi->base_offset + i * ai->unit_size; - /* determine low/high unit_cpu */ - if (pcpu_low_unit_cpu == NR_CPUS || - unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) - pcpu_low_unit_cpu = cpu; - if (pcpu_high_unit_cpu == NR_CPUS || - unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) - pcpu_high_unit_cpu = cpu; + if (pcpu_first_unit_cpu == NR_CPUS) + pcpu_first_unit_cpu = cpu; + pcpu_last_unit_cpu = cpu; } } pcpu_nr_units = unit; diff --git a/mm/shmem.c b/mm/shmem.c index 0203cda3297a..f24ce93efc15 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2254,6 +2254,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) unsigned long inodes; int error = -EINVAL; + config.mpol = NULL; if (shmem_parse_options(data, &config, true)) return error; @@ -2281,8 +2282,13 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) sbinfo->max_inodes = config.max_inodes; sbinfo->free_inodes = config.max_inodes - inodes; - mpol_put(sbinfo->mpol); - sbinfo->mpol = config.mpol; /* transfers initial ref */ + /* + * Preserve previous mempolicy unless mpol remount option was specified. + */ + if (config.mpol) { + mpol_put(sbinfo->mpol); + sbinfo->mpol = config.mpol; /* transfers initial ref */ + } out: spin_unlock(&sbinfo->stat_lock); return error; diff --git a/mm/truncate.c b/mm/truncate.c index f42675a3615d..d0698a15c61f 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -381,11 +381,12 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; + clear_page_mlock(page); + spin_lock_irq(&mapping->tree_lock); if (PageDirty(page)) goto failed; - clear_page_mlock(page); BUG_ON(page_has_private(page)); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); diff --git a/mm/vmscan.c b/mm/vmscan.c index 5c4620600333..365a899bcae0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2341,6 +2341,8 @@ static int kswapd(void *p) if (!ret) balance_pgdat(pgdat, order); } + + current->reclaim_state = NULL; return 0; } diff --git a/net/atm/common.c b/net/atm/common.c index 97ed94aa0cbc..4ccc872209f0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -475,6 +475,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; + msg->msg_namelen = 0; + if (sock->state != SS_CONNECTED) return -ENOTCONN; if (flags & ~MSG_DONTWAIT) /* only handle MSG_DONTWAIT */ @@ -759,6 +761,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 437ee70c5e62..db0dd47de61b 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -94,6 +94,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 12c350c63717..d082b6db6b81 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1655,6 +1655,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_address src; const unsigned char *mac = skb_mac_header(skb); + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, &digi, NULL, NULL); sax->sax25_family = AF_AX25; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 404a8500fd03..0891857b7ca2 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -240,14 +240,14 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; + msg->msg_namelen = 0; + if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; return err; } - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 38f08f6b86f6..e5d788faf03b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -583,6 +583,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 280529ad9274..a01808691565 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -790,7 +790,7 @@ static int hidp_setup_hid(struct hidp_session *session, hid->version = req->version; hid->country = req->country; - strncpy(hid->name, req->name, 128); + strncpy(hid->name, req->name, sizeof(req->name) - 1); strncpy(hid->phys, batostr(&src), 64); strncpy(hid->uniq, batostr(&dst), 64); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 0b6cf87d5eb0..64ccd83d52a2 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1191,6 +1191,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index b045bbbc2353..557122ee3e24 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -547,6 +547,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) @@ -655,6 +656,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); + msg->msg_namelen = 0; return 0; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eaa0e1bae49b..ea4452f3dacb 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -532,7 +532,8 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - hlist_add_head(&mp->mglist, &br->mglist); + if (hlist_unhashed(&mp->mglist)) + hlist_add_head(&mp->mglist, &br->mglist); mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index edc7111b3db8..9fd76244adf6 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p, skb->dev = p->dev; skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); memcpy(__skb_put(skb, length), data, length); diff --git a/net/core/dev.c b/net/core/dev.c index 2aaf2e610f92..3dbbba137c93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1034,6 +1034,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1047,9 +1049,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; @@ -3045,7 +3048,7 @@ static void net_rx_action(struct softirq_action *h) * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) + if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; local_irq_enable(); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index cf208d8042b1..cd3f2b942991 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -33,22 +33,19 @@ #define TRACE_ON 1 #define TRACE_OFF 0 -static void send_dm_alert(struct work_struct *unused); - - /* * Globals, our netlink socket pointer * and the work handle that will send up * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_SPINLOCK(trace_state_lock); +static DEFINE_MUTEX(trace_state_mutex); struct per_cpu_dm_data { - struct work_struct dm_alert_work; - struct sk_buff *skb; - atomic_t dm_hit_count; - struct timer_list send_timer; + spinlock_t lock; + struct sk_buff *skb; + struct work_struct dm_alert_work; + struct timer_list send_timer; }; struct dm_hw_stat_delta { @@ -74,56 +71,59 @@ static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); -static void reset_per_cpu_data(struct per_cpu_dm_data *data) +static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; struct net_dm_alert_msg *msg; struct nlattr *nla; + struct sk_buff *skb; + unsigned long flags; al = sizeof(struct net_dm_alert_msg); al += dm_hit_limit * sizeof(struct net_dm_drop_point); al += sizeof(struct nlattr); - data->skb = genlmsg_new(al, GFP_KERNEL); - genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, - 0, NET_DM_CMD_ALERT); - nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); - msg = nla_data(nla); - memset(msg, 0, al); - atomic_set(&data->dm_hit_count, dm_hit_limit); + skb = genlmsg_new(al, GFP_KERNEL); + + if (skb) { + genlmsg_put(skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + nla = nla_reserve(skb, NLA_UNSPEC, + sizeof(struct net_dm_alert_msg)); + msg = nla_data(nla); + memset(msg, 0, al); + } else { + mod_timer(&data->send_timer, jiffies + HZ / 10); + } + + spin_lock_irqsave(&data->lock, flags); + swap(data->skb, skb); + spin_unlock_irqrestore(&data->lock, flags); + + return skb; } -static void send_dm_alert(struct work_struct *unused) +static void send_dm_alert(struct work_struct *work) { struct sk_buff *skb; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data; - /* - * Grab the skb we're about to send - */ - skb = data->skb; + data = container_of(work, struct per_cpu_dm_data, dm_alert_work); - /* - * Replace it with a new one - */ - reset_per_cpu_data(data); - - /* - * Ship it! - */ - genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + skb = reset_per_cpu_data(data); + if (skb) + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); } /* * This is the timer function to delay the sending of an alert * in the event that more drops will arrive during the - * hysteresis period. Note that it operates under the timer interrupt - * so we don't need to disable preemption here + * hysteresis period. */ -static void sched_send_work(unsigned long unused) +static void sched_send_work(unsigned long _data) { - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; schedule_work(&data->dm_alert_work); } @@ -134,17 +134,19 @@ static void trace_drop_common(struct sk_buff *skb, void *location) struct nlmsghdr *nlh; struct nlattr *nla; int i; - struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + struct sk_buff *dskb; + struct per_cpu_dm_data *data; + unsigned long flags; + local_irq_save(flags); + data = &__get_cpu_var(dm_cpu_data); + spin_lock(&data->lock); + dskb = data->skb; - if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { - /* - * we're already at zero, discard this hit - */ + if (!dskb) goto out; - } - nlh = (struct nlmsghdr *)data->skb->data; + nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); for (i = 0; i < msg->entries; i++) { @@ -153,11 +155,12 @@ static void trace_drop_common(struct sk_buff *skb, void *location) goto out; } } - + if (msg->entries == dm_hit_limit) + goto out; /* * We need to create a new entry */ - __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); msg->points[msg->entries].count = 1; @@ -165,11 +168,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location) if (!timer_pending(&data->send_timer)) { data->send_timer.expires = jiffies + dm_delay * HZ; - add_timer_on(&data->send_timer, smp_processor_id()); + add_timer(&data->send_timer); } out: - return; + spin_unlock_irqrestore(&data->lock, flags); } static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) @@ -221,7 +224,7 @@ static int set_all_monitor_traces(int state) struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *temp; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); switch (state) { case TRACE_ON: @@ -252,7 +255,7 @@ static int set_all_monitor_traces(int state) if (!rc) trace_state = state; - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); if (rc) return -EINPROGRESS; @@ -297,12 +300,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; case NETDEV_UNREGISTER: - spin_lock(&trace_state_lock); + mutex_lock(&trace_state_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -313,7 +316,7 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - spin_unlock(&trace_state_lock); + mutex_unlock(&trace_state_mutex); break; } out: @@ -378,13 +381,15 @@ static int __init init_net_drop_monitor(void) for_each_present_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); - reset_per_cpu_data(data); INIT_WORK(&data->dm_alert_work, send_dm_alert); init_timer(&data->send_timer); - data->send_timer.data = cpu; + data->send_timer.data = (unsigned long)data; data->send_timer.function = sched_send_work; + spin_lock_init(&data->lock); + reset_per_cpu_data(data); } + goto out; out_unreg: diff --git a/net/core/sock.c b/net/core/sock.c index 4b45ad8f6b9e..8ad61ba17a39 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -582,7 +582,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6df6f8ac9636..4f78abbf1045 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -218,7 +218,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -229,7 +229,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e072e018b068..d73f17ff95f6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -48,6 +48,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) __be32 daddr, nexthop; int tmp; int err; + struct ip_options_rcu *inet_opt; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -58,10 +59,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt != NULL && inet->opt->srr) { + + inet_opt = inet->inet_opt; + if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, @@ -76,7 +79,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -ENETUNREACH; } - if (inet->opt == NULL || !inet->opt->srr) + if (inet_opt == NULL || !inet_opt->opt.srr) daddr = rt->rt_dst; if (inet->inet_saddr == 0) @@ -87,8 +90,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt != NULL) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -402,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->inet_daddr = ireq->rmt_addr; newinet->inet_rcv_saddr = ireq->loc_addr; newinet->inet_saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fec7de6cfe6e..90f65e1dd4cf 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -600,7 +600,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, First: no IPv4 options. */ - newinet->opt = NULL; + newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8897b3c7d05a..4dd4aad58d93 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); - kfree(inet->opt); + kfree(inet->inet_opt); dst_release(sk->sk_dst_cache); sk_refcnt_debug_dec(sk); } @@ -1069,9 +1069,11 @@ static int inet_sk_reselect_saddr(struct sock *sk) __be32 old_saddr = inet->inet_saddr; __be32 new_saddr; __be32 daddr = inet->inet_daddr; + struct ip_options_rcu *inet_opt; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; + inet_opt = inet->inet_opt; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; /* Query new route. */ err = ip_route_connect(&rt, daddr, 0, @@ -1113,6 +1115,7 @@ int inet_sk_rebuild_header(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); __be32 daddr; + struct ip_options_rcu *inet_opt; int err; /* Route is OK, nothing to do. */ @@ -1120,9 +1123,12 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); daddr = inet->inet_daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rcu_read_unlock(); { struct flowi fl = { .oif = sk->sk_bound_dev_if, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c97cd9ff697e..f8f338874719 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1727,8 +1727,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) case CIPSO_V4_TAG_LOCAL: /* This is a non-standard tag that we only allow for * local connections, so if the incoming interface is - * not the loopback device drop the packet. */ - if (!(skb->dev->flags & IFF_LOOPBACK)) { + * not the loopback device drop the packet. Further, + * there is no legitimate reason for setting this from + * userspace so reject it if skb is NULL. */ + if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) { err_offset = opt_iter; goto validate_return_locked; } @@ -1859,6 +1861,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, return CIPSO_V4_HDR_LEN + ret_val; } +static void opt_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_options_rcu, rcu)); +} + /** * cipso_v4_sock_setattr - Add a CIPSO option to a socket * @sk: the socket @@ -1881,7 +1888,7 @@ int cipso_v4_sock_setattr(struct sock *sk, unsigned char *buf = NULL; u32 buf_len; u32 opt_len; - struct ip_options *opt = NULL; + struct ip_options_rcu *old, *opt = NULL; struct inet_sock *sk_inet; struct inet_connection_sock *sk_conn; @@ -1917,22 +1924,25 @@ int cipso_v4_sock_setattr(struct sock *sk, ret_val = -ENOMEM; goto socket_setattr_failure; } - memcpy(opt->__data, buf, buf_len); - opt->optlen = opt_len; - opt->cipso = sizeof(struct iphdr); + memcpy(opt->opt.__data, buf, buf_len); + opt->opt.optlen = opt_len; + opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; sk_inet = inet_sk(sk); + + old = sk_inet->inet_opt; if (sk_inet->is_icsk) { sk_conn = inet_csk(sk); - if (sk_inet->opt) - sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; - sk_conn->icsk_ext_hdr_len += opt->optlen; + if (old) + sk_conn->icsk_ext_hdr_len -= old->opt.optlen; + sk_conn->icsk_ext_hdr_len += opt->opt.optlen; sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); } - opt = xchg(&sk_inet->opt, opt); - kfree(opt); + rcu_assign_pointer(sk_inet->inet_opt, opt); + if (old) + call_rcu(&old->rcu, opt_kfree_rcu); return 0; @@ -1962,7 +1972,7 @@ int cipso_v4_req_setattr(struct request_sock *req, unsigned char *buf = NULL; u32 buf_len; u32 opt_len; - struct ip_options *opt = NULL; + struct ip_options_rcu *opt = NULL; struct inet_request_sock *req_inet; /* We allocate the maximum CIPSO option size here so we are probably @@ -1990,15 +2000,16 @@ int cipso_v4_req_setattr(struct request_sock *req, ret_val = -ENOMEM; goto req_setattr_failure; } - memcpy(opt->__data, buf, buf_len); - opt->optlen = opt_len; - opt->cipso = sizeof(struct iphdr); + memcpy(opt->opt.__data, buf, buf_len); + opt->opt.optlen = opt_len; + opt->opt.cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; req_inet = inet_rsk(req); opt = xchg(&req_inet->opt, opt); - kfree(opt); + if (opt) + call_rcu(&opt->rcu, opt_kfree_rcu); return 0; @@ -2018,34 +2029,34 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) { int hdr_delta = 0; - struct ip_options *opt = *opt_ptr; + struct ip_options_rcu *opt = *opt_ptr; - if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; unsigned char *cipso_ptr; int iter; int optlen_new; - cipso_off = opt->cipso - sizeof(struct iphdr); - cipso_ptr = &opt->__data[cipso_off]; + cipso_off = opt->opt.cipso - sizeof(struct iphdr); + cipso_ptr = &opt->opt.__data[cipso_off]; cipso_len = cipso_ptr[1]; - if (opt->srr > opt->cipso) - opt->srr -= cipso_len; - if (opt->rr > opt->cipso) - opt->rr -= cipso_len; - if (opt->ts > opt->cipso) - opt->ts -= cipso_len; - if (opt->router_alert > opt->cipso) - opt->router_alert -= cipso_len; - opt->cipso = 0; + if (opt->opt.srr > opt->opt.cipso) + opt->opt.srr -= cipso_len; + if (opt->opt.rr > opt->opt.cipso) + opt->opt.rr -= cipso_len; + if (opt->opt.ts > opt->opt.cipso) + opt->opt.ts -= cipso_len; + if (opt->opt.router_alert > opt->opt.cipso) + opt->opt.router_alert -= cipso_len; + opt->opt.cipso = 0; memmove(cipso_ptr, cipso_ptr + cipso_len, - opt->optlen - cipso_off - cipso_len); + opt->opt.optlen - cipso_off - cipso_len); /* determining the new total option length is tricky because of * the padding necessary, the only thing i can think to do at @@ -2054,21 +2065,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) * from there we can determine the new total option length */ iter = 0; optlen_new = 0; - while (iter < opt->optlen) - if (opt->__data[iter] != IPOPT_NOP) { - iter += opt->__data[iter + 1]; + while (iter < opt->opt.optlen) + if (opt->opt.__data[iter] != IPOPT_NOP) { + iter += opt->opt.__data[iter + 1]; optlen_new = iter; } else iter++; - hdr_delta = opt->optlen; - opt->optlen = (optlen_new + 3) & ~3; - hdr_delta -= opt->optlen; + hdr_delta = opt->opt.optlen; + opt->opt.optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->opt.optlen; } else { /* only the cipso option was present on the socket so we can * remove the entire option struct */ *opt_ptr = NULL; - hdr_delta = opt->optlen; - kfree(opt); + hdr_delta = opt->opt.optlen; + call_rcu(&opt->rcu, opt_kfree_rcu); } return hdr_delta; @@ -2085,15 +2096,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr) void cipso_v4_sock_delattr(struct sock *sk) { int hdr_delta; - struct ip_options *opt; + struct ip_options_rcu *opt; struct inet_sock *sk_inet; sk_inet = inet_sk(sk); - opt = sk_inet->opt; - if (opt == NULL || opt->cipso == 0) + opt = sk_inet->inet_opt; + if (opt == NULL || opt->opt.cipso == 0) return; - hdr_delta = cipso_v4_delopt(&sk_inet->opt); + hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { struct inet_connection_sock *sk_conn = inet_csk(sk); sk_conn->icsk_ext_hdr_len -= hdr_delta; @@ -2111,12 +2122,12 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options *opt; + struct ip_options_rcu *opt; struct inet_request_sock *req_inet; req_inet = inet_rsk(req); opt = req_inet->opt; - if (opt == NULL || opt->cipso == 0) + if (opt == NULL || opt->opt.cipso == 0) return; cipso_v4_delopt(&req_inet->opt); @@ -2186,14 +2197,18 @@ getattr_return: */ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { - struct ip_options *opt; + struct ip_options_rcu *opt; + int res = -ENOMSG; - opt = inet_sk(sk)->opt; - if (opt == NULL || opt->cipso == 0) - return -ENOMSG; - - return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr), - secattr); + rcu_read_lock(); + opt = rcu_dereference(inet_sk(sk)->inet_opt); + if (opt && opt->opt.cipso) + res = cipso_v4_getattr(opt->opt.__data + + opt->opt.cipso - + sizeof(struct iphdr), + secattr); + rcu_read_unlock(); + return res; } /** diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ac4dec132735..4a5137a9e24c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -108,8 +108,7 @@ struct icmp_bxm { __be32 times[3]; } data; int head_len; - struct ip_options replyopts; - unsigned char optbuf[40]; + struct ip_options_data replyopts; }; /* An array of errno for error messages from dest unreach. */ @@ -363,7 +362,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct inet_sock *inet; __be32 daddr; - if (ip_options_echo(&icmp_param->replyopts, skb)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; sk = icmp_xmit_lock(net); @@ -377,10 +376,10 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; ipc.shtx.flags = 0; - if (icmp_param->replyopts.optlen) { - ipc.opt = &icmp_param->replyopts; - if (ipc.opt->srr) - daddr = icmp_param->replyopts.faddr; + if (icmp_param->replyopts.opt.opt.optlen) { + ipc.opt = &icmp_param->replyopts.opt; + if (ipc.opt->opt.srr) + daddr = icmp_param->replyopts.opt.opt.faddr; } { struct flowi fl = { .nl_u = { .ip4_u = @@ -518,7 +517,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts, skb_in)) + if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -534,15 +533,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts; + ipc.opt = &icmp_param.replyopts.opt; ipc.shtx.flags = 0; { struct flowi fl = { .nl_u = { .ip4_u = { - .daddr = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : + .daddr = icmp_param.replyopts.opt.opt.srr ? + icmp_param.replyopts.opt.opt.faddr : iph->saddr, .saddr = saddr, .tos = RT_TOS(tos) @@ -631,7 +630,7 @@ route_done: room = dst_mtu(&rt->u.dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; + room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); icmp_param.data_len = skb_in->len - icmp_param.offset; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429269dd..9f57d0f75631 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,12 +356,12 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options *opt = inet_rsk(req)->opt; + struct ip_options_rcu *opt = inet_rsk(req)->opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : + { .daddr = ((opt && opt->opt.srr) ? + opt->opt.faddr : ireq->rmt_addr), .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, @@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, security_req_classify_flow(req, &fl); if (ip_route_output_flow(net, &rt, &fl, sk, 0)) goto no_route; - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; return &rt->u.dst; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 4c09a31fd140..f4281aad1df0 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -36,7 +36,7 @@ * saddr is address of outgoing interface. */ -void ip_options_build(struct sk_buff * skb, struct ip_options * opt, +void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, * NOTE: dopt cannot point to skb. */ -int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) +int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) { - struct ip_options *sopt; + const struct ip_options *sopt; unsigned char *sptr, *dptr; int soffset, doffset; int optlen; @@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sopt = &(IPCB(skb)->opt); - if (sopt->optlen == 0) { - dopt->optlen = 0; + if (sopt->optlen == 0) return 0; - } sptr = skb_network_header(skb); dptr = dopt->__data; @@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->optlen += optlen; } if (sopt->srr) { - unsigned char * start = sptr+sopt->srr; + unsigned char *start = sptr+sopt->srr; __be32 faddr; optlen = start[1]; @@ -500,19 +498,19 @@ void ip_options_undo(struct ip_options * opt) } } -static struct ip_options *ip_options_get_alloc(const int optlen) +static struct ip_options_rcu *ip_options_get_alloc(const int optlen) { - return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), GFP_KERNEL); } -static int ip_options_get_finish(struct net *net, struct ip_options **optp, - struct ip_options *opt, int optlen) +static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, + struct ip_options_rcu *opt, int optlen) { while (optlen & 3) - opt->__data[optlen++] = IPOPT_END; - opt->optlen = optlen; - if (optlen && ip_options_compile(net, opt, NULL)) { + opt->opt.__data[optlen++] = IPOPT_END; + opt->opt.optlen = optlen; + if (optlen && ip_options_compile(net, &opt->opt, NULL)) { kfree(opt); return -EINVAL; } @@ -521,29 +519,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, return 0; } -int ip_options_get_from_user(struct net *net, struct ip_options **optp, +int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; - if (optlen && copy_from_user(opt->__data, data, optlen)) { + if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { kfree(opt); return -EFAULT; } return ip_options_get_finish(net, optp, opt, optlen); } -int ip_options_get(struct net *net, struct ip_options **optp, +int ip_options_get(struct net *net, struct ip_options_rcu **optp, unsigned char *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; if (optlen) - memcpy(opt->__data, data, optlen); + memcpy(opt->opt.__data, data, optlen); return ip_options_get_finish(net, optp, opt, optlen); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d52fe4bd573f..e669da63be31 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -138,14 +138,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * */ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - __be32 saddr, __be32 daddr, struct ip_options *opt) + __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = skb_rtable(skb); struct iphdr *iph; /* Build the IP header. */ - skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); + skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; @@ -161,9 +161,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->protocol = sk->sk_protocol; ip_select_ident(iph, &rt->u.dst, sk); - if (opt && opt->optlen) { - iph->ihl += opt->optlen>>2; - ip_options_build(skb, opt, daddr, rt, 0); + if (opt && opt->opt.optlen) { + iph->ihl += opt->opt.optlen>>2; + ip_options_build(skb, &opt->opt, daddr, rt, 0); } skb->priority = sk->sk_priority; @@ -315,9 +315,10 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - struct ip_options *opt = inet->opt; + struct ip_options_rcu *inet_opt = NULL; struct rtable *rt; struct iphdr *iph; + int res; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. @@ -328,13 +329,15 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); if (rt == NULL) { __be32 daddr; /* Use correct destination address if we have options. */ daddr = inet->inet_daddr; - if(opt && opt->srr) - daddr = opt->faddr; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; { struct flowi fl = { .oif = sk->sk_bound_dev_if, @@ -362,11 +365,11 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) skb_dst_set(skb, dst_clone(&rt->u.dst)); packet_routed: - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_dst != rt->rt_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ - skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); + skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); @@ -380,9 +383,9 @@ packet_routed: iph->daddr = rt->rt_dst; /* Transport layer set skb->h.foo itself. */ - if (opt && opt->optlen) { - iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, inet->inet_daddr, rt, 0); + if (inet_opt && inet_opt->opt.optlen) { + iph->ihl += inet_opt->opt.optlen >> 2; + ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } ip_select_ident_more(iph, &rt->u.dst, sk, @@ -390,10 +393,12 @@ packet_routed: skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - - return ip_local_out(skb); + res = ip_local_out(skb); + rcu_read_unlock(); + return res; no_route: + rcu_read_unlock(); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EHOSTUNREACH; @@ -812,7 +817,7 @@ int ip_append_data(struct sock *sk, /* * setup for corking. */ - opt = ipc->opt; + opt = ipc->opt ? &ipc->opt->opt : NULL; if (opt) { if (inet->cork.opt == NULL) { inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); @@ -1371,26 +1376,23 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar unsigned int len) { struct inet_sock *inet = inet_sk(sk); - struct { - struct ip_options opt; - char data[40]; - } replyopts; + struct ip_options_data replyopts; struct ipcm_cookie ipc; __be32 daddr; struct rtable *rt = skb_rtable(skb); - if (ip_options_echo(&replyopts.opt, skb)) + if (ip_options_echo(&replyopts.opt.opt, skb)) return; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; ipc.shtx.flags = 0; - if (replyopts.opt.optlen) { + if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; - if (ipc.opt->srr) - daddr = replyopts.opt.faddr; + if (replyopts.opt.opt.srr) + daddr = replyopts.opt.opt.faddr; } { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 1e64dabbd232..3ca6fa7d1365 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -435,6 +435,11 @@ out: } +static void opt_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_options_rcu, rcu)); +} + /* * Socket option code for IP. This is the end of the line after any * TCP,UDP etc options on an IP socket. @@ -481,13 +486,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, switch (optname) { case IP_OPTIONS: { - struct ip_options *opt = NULL; + struct ip_options_rcu *old, *opt = NULL; + if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); if (err) break; + old = inet->inet_opt; if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -496,17 +503,18 @@ static int do_ip_setsockopt(struct sock *sk, int level, (TCPF_LISTEN | TCPF_CLOSE)) && inet->inet_daddr != LOOPBACK4_IPV6)) { #endif - if (inet->opt) - icsk->icsk_ext_hdr_len -= inet->opt->optlen; + if (old) + icsk->icsk_ext_hdr_len -= old->opt.optlen; if (opt) - icsk->icsk_ext_hdr_len += opt->optlen; + icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) } #endif } - opt = xchg(&inet->opt, opt); - kfree(opt); + rcu_assign_pointer(inet->inet_opt, opt); + if (old) + call_rcu(&old->rcu, opt_kfree_rcu); break; } case IP_PKTINFO: @@ -565,7 +573,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_TTL: if (optlen < 1) goto e_inval; - if (val != -1 && (val < 0 || val > 255)) + if (val != -1 && (val < 1 || val > 255)) goto e_inval; inet->uc_ttl = val; break; @@ -1042,12 +1050,15 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; - struct ip_options * opt = (struct ip_options *)optbuf; + struct ip_options *opt = (struct ip_options *)optbuf; + struct ip_options_rcu *inet_opt; + + inet_opt = inet->inet_opt; opt->optlen = 0; - if (inet->opt) - memcpy(optbuf, inet->opt, - sizeof(struct ip_options)+ - inet->opt->optlen); + if (inet_opt) + memcpy(optbuf, &inet_opt->opt, + sizeof(struct ip_options) + + inet_opt->opt.optlen); release_sock(sk); if (opt->optlen == 0) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2bb1f87051c4..a0af7a2d6117 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -84,6 +84,14 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, *dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; + /* Check bogus IP headers */ + if (*dataoff > skb->len) { + pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " + "nhoff %u, ihl %u, skblen %u\n", + nhoff, iph->ihl << 2, skb->len); + return -NF_ACCEPT; + } + return NF_ACCEPT; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cc6f097fbd5f..d5f57acd6f4b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -457,6 +457,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __be32 saddr; u8 tos; int err; + struct ip_options_data opt_copy; err = -EMSGSIZE; if (len > 0xFFFF) @@ -517,8 +518,18 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, saddr = ipc.addr; ipc.addr = daddr; - if (!ipc.opt) - ipc.opt = inet->opt; + if (!ipc.opt) { + struct ip_options_rcu *inet_opt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + memcpy(&opt_copy, inet_opt, + sizeof(*inet_opt) + inet_opt->opt.optlen); + ipc.opt = &opt_copy.opt; + } + rcu_read_unlock(); + } if (ipc.opt) { err = -EINVAL; @@ -527,10 +538,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, */ if (inet->hdrincl) goto done; - if (ipc.opt->srr) { + if (ipc.opt->opt.srr) { if (!daddr) goto done; - daddr = ipc.opt->faddr; + daddr = ipc.opt->opt.faddr; } } tos = RT_CONN_FLAGS(sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9f6b22206c52..95ac6d7e0f42 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -310,10 +310,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * the ACK carries the same options again (see RFC1122 4.2.3.8) */ if (opt && opt->optlen) { - int opt_size = sizeof(struct ip_options) + opt->optlen; + int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; ireq->opt = kmalloc(opt_size, GFP_ATOMIC); - if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) { + if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { kfree(ireq->opt); ireq->opt = NULL; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3a8cbf72b06e..392d594521c1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -839,8 +839,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - if (copied) - tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -849,7 +848,7 @@ wait_for_memory: } out: - if (copied) + if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) tcp_push(sk, flags, mss_now, tp->nonagle); return copied; diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 1eba160b72dc..c35d91f0fb11 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, .tcpv_rttcnt = ca->cnt_rtt, .tcpv_minrtt = ca->base_rtt, }; - u64 t = ca->sum_rtt; - do_div(t, ca->cnt_rtt); - info.tcpv_rtt = t; + if (info.tcpv_rttcnt > 0) { + u64 t = ca->sum_rtt; + do_div(t, info.tcpv_rttcnt); + info.tcpv_rtt = t; + } nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index add69a11754d..20af12ae55a7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5315,7 +5315,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA - if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && + tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } @@ -5735,6 +5737,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ab7165565d23..8a0bff623731 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -153,6 +153,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) __be32 daddr, nexthop; int tmp; int err; + struct ip_options_rcu *inet_opt; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -161,10 +162,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt && inet->opt->srr) { + inet_opt = inet->inet_opt; + if (inet_opt && inet_opt->opt.srr) { if (!daddr) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, @@ -182,7 +184,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -ENETUNREACH; } - if (!inet->opt || !inet->opt->srr) + if (!inet_opt || !inet_opt->opt.srr) daddr = rt->rt_dst; if (!inet->inet_saddr) @@ -216,8 +218,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; @@ -812,17 +814,18 @@ static void syn_flood_warning(struct sk_buff *skb) /* * Save and compile IPv4 options into the request_sock if needed. */ -static struct ip_options *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, + struct sk_buff *skb) { - struct ip_options *opt = &(IPCB(skb)->opt); - struct ip_options *dopt = NULL; + const struct ip_options *opt = &(IPCB(skb)->opt); + struct ip_options_rcu *dopt = NULL; if (opt && opt->optlen) { - int opt_size = optlength(opt); + int opt_size = sizeof(*dopt) + opt->optlen; + dopt = kmalloc(opt_size, GFP_ATOMIC); if (dopt) { - if (ip_options_echo(dopt, skb)) { + if (ip_options_echo(&dopt->opt, skb)) { kfree(dopt); dopt = NULL; } @@ -1412,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct ip_options_rcu *inet_opt; if (sk_acceptq_is_full(sk)) goto exit_overflow; @@ -1432,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->inet_daddr = ireq->rmt_addr; newinet->inet_rcv_saddr = ireq->loc_addr; newinet->inet_saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + inet_opt = ireq->opt; + rcu_assign_pointer(newinet->inet_opt, inet_opt); ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newinet->opt) - inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; + if (inet_opt) + inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; tcp_mtup_init(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6fdff3045b62..407a6ff44415 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1566,8 +1566,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7932dc68c669..5346abf0e095 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -705,7 +705,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -767,6 +767,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) @@ -784,6 +785,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct ip_options_data opt_copy; if (len > 0xFFFF) return -EMSGSIZE; @@ -855,22 +857,32 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, free = 1; connected = 0; } - if (!ipc.opt) - ipc.opt = inet->opt; + if (!ipc.opt) { + struct ip_options_rcu *inet_opt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + memcpy(&opt_copy, inet_opt, + sizeof(*inet_opt) + inet_opt->opt.optlen); + ipc.opt = &opt_copy.opt; + } + rcu_read_unlock(); + } saddr = ipc.addr; ipc.addr = faddr = daddr; - if (ipc.opt && ipc.opt->srr) { + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; - faddr = ipc.opt->faddr; + faddr = ipc.opt->opt.faddr; connected = 0; } tos = RT_TOS(inet->tos); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { + (ipc.opt && ipc.opt->opt.is_strictroute)) { tos |= RTO_ONLINK; connected = 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d854453b4daa..138a2db58bf8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1446,7 +1446,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ - newinet->opt = NULL; + newinet->inet_opt = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a1d3d32237dd..bf7702a5f0b6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -872,11 +872,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi *fl; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl = &inet->cork.fl; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c18286a2167b..1e3b2ec74622 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1160,6 +1160,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb, *rskb, *cskb; int err = 0; + msg->msg_namelen = 0; + if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index ad4296c852eb..121c92e3c128 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -719,6 +719,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; + msg->msg_namelen = 0; + lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) @@ -959,14 +961,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 36dc1d88c2fa..bd9d805a85a6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2469,6 +2469,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(&t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fe6313140563..1c889685d575 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -830,12 +830,19 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); + return len; +} + +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) +{ + int len = __netlink_sendskb(sk, skb); + sock_put(sk); return len; } @@ -960,9 +967,8 @@ static inline int netlink_broadcast_deliver(struct sock *sk, if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; + __netlink_sendskb(sk, skb); + return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); } return -1; } @@ -1685,10 +1691,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); return 0; } @@ -1700,10 +1704,8 @@ static int netlink_dump(struct sock *sk) if (sk_filter(sk, skb)) kfree_skb(skb); - else { - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk, skb->len); - } + else + __netlink_sendskb(sk, skb); if (cb->done) cb->done(cb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4096a66f6379..dbe4dd160631 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -812,7 +812,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index dc1e8ae81781..ca21189392fe 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -862,6 +862,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, int flags = msg->msg_flags; int err, done; + if (len > USHORT_MAX) + return -EMSGSIZE; + if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) || !(msg->msg_flags & MSG_EOR)) diff --git a/net/rds/recv.c b/net/rds/recv.c index 93aadc0173cb..de4d79c56662 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -411,6 +411,8 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); + msg->msg_namelen = 0; + if (msg_flags & MSG_OOB) goto out; @@ -486,6 +488,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = inc->i_saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + msg->msg_namelen = sizeof(*sin); } break; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 547e5cd6d375..11072546d7e9 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1277,6 +1277,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (srose != NULL) { + memset(srose, 0, msg->msg_namelen); srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f9fc6ec1fef6..faebd8a6da57 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, struct tcf_common *pc; int ret = 0; int err; +#ifdef CONFIG_GACT_PROB + struct tc_gact_p *p_parm = NULL; +#endif if (nla == NULL) return -EINVAL; @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; +#else + if (tb[TCA_GACT_PROB]) { + p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm->ptype >= MAX_RAND) + return -EINVAL; + } #endif pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB - if (tb[TCA_GACT_PROB] != NULL) { - struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; @@ -132,7 +140,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + if (gact->tcfg_ptype) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 51dcc2aa5c92..7d97904c7dae 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -545,11 +545,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.packets = q->packetsin; opt.bytesin = q->bytesin; - if (gred_wred_mode(table)) { - q->parms.qidlestart = - table->tab[table->def]->parms.qidlestart; - q->parms.qavg = table->tab[table->def]->parms.qavg; - } + if (gred_wred_mode(table)) + gred_load_wred_set(table, q); opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0b52b8de562c..efabd30425ec 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -866,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) q->now = psched_get_time(); start_at = jiffies; - next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; + next_event = q->now + 5LLU * PSCHED_TICKS_PER_SEC; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4714ff162bbd..e105245dac93 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -202,10 +202,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - sch->qstats.drops++; - return NET_XMIT_DROP; - } + skb_checksum_help(skb))) + return qdisc_drop(skb, sch); skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index ddbbf7c81fa1..ce9ef56708ac 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -71,7 +71,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key) return; if (atomic_dec_and_test(&key->refcnt)) { - kfree(key); + kzfree(key); SCTP_DBG_OBJCNT_DEC(keys); } } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3eab6db59a37..df773fcaaa03 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -282,7 +282,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, goto errout; err = sctp_user_addto_chunk(chunk, offset, len, msgh->msg_iov); if (err < 0) - goto errout; + goto errout_chunk_free; offset += len; @@ -322,7 +322,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, __skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr - (__u8 *)chunk->skb->data); if (err < 0) - goto errout; + goto errout_chunk_free; sctp_datamsg_assign(msg, chunk); list_add_tail(&chunk->frag_list, &msg->chunks); @@ -330,6 +330,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, return msg; +errout_chunk_free: + sctp_chunk_free(chunk); + errout: list_for_each_safe(pos, temp, &msg->chunks) { list_del_init(pos); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 7ec09ba03a1c..e80ba5def747 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -250,6 +250,8 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) /* Final destructor for endpoint. */ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { + int i; + SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free up the HMAC transform. */ @@ -272,6 +274,9 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) sctp_inq_free(&ep->base.inqueue); sctp_bind_addr_free(&ep->base.bind_addr); + for (i = 0; i < SCTP_HOW_MANY_SECRETS; ++i) + memset(&ep->secret_key[i], 0, SCTP_SECRET_SIZE); + /* Remove and free the port */ if (sctp_sk(ep->base.sk)->bind_hash) sctp_put_port(ep->base.sk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 03daceb2d9a0..38c19d38f438 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3276,7 +3276,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); out: - kfree(authkey); + kzfree(authkey); return ret; } diff --git a/net/socket.c b/net/socket.c index c63ebf4e31b5..b0d3b6a025ea 100644 --- a/net/socket.c +++ b/net/socket.c @@ -746,9 +746,9 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, sock = file->private_data; - flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; - if (more) - flags |= MSG_MORE; + flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ + flags |= more; return kernel_sendpage(sock, page, offset, size, flags); } @@ -2508,6 +2508,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index dbf50f9f7ed2..f4da18e14c0b 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -459,7 +459,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry, { struct inode *inode; - BUG_ON(!d_unhashed(dentry)); + d_drop(dentry); inode = rpc_get_inode(dir->i_sb, mode); if (!inode) goto out_err; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 33df29bd8c61..afa0bceb67ad 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -310,7 +310,6 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -384,8 +383,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); @@ -663,8 +660,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -754,6 +749,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) } else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } /* No data, incomplete (TCP) read, or accept() */ @@ -807,7 +804,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0b7148bed6b3..072835bc61fa 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -370,7 +370,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct dentry *dentry; @@ -445,8 +445,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static int unix_listen(struct socket *sock, int backlog) @@ -661,9 +659,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6106b72826d3..88c6c8d59d79 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -598,6 +598,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); @@ -764,6 +765,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -774,9 +776,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -1199,6 +1202,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -1302,6 +1306,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); @@ -1430,6 +1435,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1440,9 +1446,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 20a38fed61b1..058d4fdf5de1 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -55,7 +55,7 @@ int install_user_keyrings(void) kenter("%p{%u}", user, user->uid); - if (user->uid_keyring) { + if (user->uid_keyring && user->session_keyring) { kleave(" = 0 [exist]"); return 0; } @@ -207,7 +207,7 @@ static int install_process_keyring(void) ret = install_process_keyring_to_cred(new); if (ret < 0) { abort_creds(new); - return ret != -EEXIST ?: 0; + return ret != -EEXIST ? ret : 0; } return commit_creds(new); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index 160b1bd0cd62..24d44b2f61ac 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -290,10 +290,10 @@ int snd_seq_timer_open(struct snd_seq_queue *q) tid.device = SNDRV_TIMER_GLOBAL_SYSTEM; err = snd_timer_open(&t, str, &tid, q->queue); } - if (err < 0) { - snd_printk(KERN_ERR "seq fatal error: cannot create timer (%i)\n", err); - return err; - } + } + if (err < 0) { + snd_printk(KERN_ERR "seq fatal error: cannot create timer (%i)\n", err); + return err; } t->callback = snd_seq_timer_interrupt; t->callback_data = q; diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index b2b3c2d1cf8b..0478657c15bc 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -299,7 +299,7 @@ static int cpio_mkfile(const char *name, const char *location, int retval; int rc = -1; int namesize; - int i; + unsigned int i; mode |= S_IFREG; @@ -375,25 +375,28 @@ error: static char *cpio_replace_env(char *new_location) { - char expanded[PATH_MAX + 1]; - char env_var[PATH_MAX + 1]; - char *start; - char *end; - - for (start = NULL; (start = strstr(new_location, "${")); ) { - end = strchr(start, '}'); - if (start < end) { - *env_var = *expanded = '\0'; - strncat(env_var, start + 2, end - start - 2); - strncat(expanded, new_location, start - new_location); - strncat(expanded, getenv(env_var), PATH_MAX); - strncat(expanded, end + 1, PATH_MAX); - strncpy(new_location, expanded, PATH_MAX); - } else - break; - } - - return new_location; + char expanded[PATH_MAX + 1]; + char env_var[PATH_MAX + 1]; + char *start; + char *end; + + for (start = NULL; (start = strstr(new_location, "${")); ) { + end = strchr(start, '}'); + if (start < end) { + *env_var = *expanded = '\0'; + strncat(env_var, start + 2, end - start - 2); + strncat(expanded, new_location, start - new_location); + strncat(expanded, getenv(env_var), + PATH_MAX - strlen(expanded)); + strncat(expanded, end + 1, + PATH_MAX - strlen(expanded)); + strncpy(new_location, expanded, PATH_MAX); + new_location[PATH_MAX] = 0; + } else + break; + } + + return new_location; } diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 3500dee9cf2b..57afcdaa2863 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -72,9 +72,12 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; u64 redir_content; - ASSERT(redir_index < IOAPIC_NUM_PINS); + if (redir_index < IOAPIC_NUM_PINS) + redir_content = + ioapic->redirtbl[redir_index].bits; + else + redir_content = ~0ULL; - redir_content = ioapic->redirtbl[redir_index].bits; result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : redir_content & 0xffffffff; diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2de..8c510edca9c9 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -174,18 +174,20 @@ int kvm_iommu_map_guest(struct kvm *kvm) return -ENODEV; } + mutex_lock(&kvm->slots_lock); + kvm->arch.iommu_domain = iommu_domain_alloc(); - if (!kvm->arch.iommu_domain) - return -ENOMEM; + if (!kvm->arch.iommu_domain) { + r = -ENOMEM; + goto out_unlock; + } r = kvm_iommu_map_memslots(kvm); if (r) - goto out_unmap; - - return 0; + kvm_iommu_unmap_memslots(kvm); -out_unmap: - kvm_iommu_unmap_memslots(kvm); +out_unlock: + mutex_unlock(&kvm->slots_lock); return r; } @@ -212,6 +214,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm, iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); } +void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) +{ + kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); +} + static int kvm_iommu_unmap_memslots(struct kvm *kvm) { int i; @@ -220,8 +227,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) slots = rcu_dereference(kvm->memslots); for (i = 0; i < slots->nmemslots; i++) { - kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, - slots->memslots[i].npages); + kvm_iommu_unmap_pages(kvm, &slots->memslots[i]); } return 0; @@ -235,7 +241,11 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) if (!domain) return 0; + mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_domain = NULL; + mutex_unlock(&kvm->slots_lock); + iommu_domain_free(domain); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b624139aea6e..3d2974fab62e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -697,12 +697,13 @@ skip_lpage: goto out_free; #ifdef CONFIG_DMAR - /* map the pages in iommu page table */ + /* map/unmap the pages in iommu page table */ if (npages) { r = kvm_iommu_map_pages(kvm, &new); if (r) goto out_free; - } + } else + kvm_iommu_unmap_pages(kvm, &old); #endif r = -ENOMEM;