diff --git a/Makefile b/Makefile index c45298b8b2d5..a01f2573731d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 12 -SUBLEVEL = 47 +SUBLEVEL = 48 EXTRAVERSION = NAME = One Giant Leap for Frogkind diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 560227b817fe..01c7270b5e84 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2810,34 +2810,51 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf) static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { + struct driver_data *dd = (struct driver_data *)f->private_data; int size = *offset; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; + int rv = 0; if (!len || *offset) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: status buffer\n"); + return -ENOMEM; + } + size += show_device_status(NULL, buf); *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { struct driver_data *dd = (struct driver_data *)f->private_data; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; u32 group_allocated; int size = *offset; - int n; + int n, rv = 0; if (!len || size) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: register buffer\n"); + return -ENOMEM; + } + size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) @@ -2892,21 +2909,30 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { struct driver_data *dd = (struct driver_data *)f->private_data; - char buf[MTIP_DFS_MAX_BUF_SIZE]; + char *buf; int size = *offset; + int rv = 0; if (!len || size) return 0; + buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(&dd->pdev->dev, + "Memory allocation: flag buffer\n"); + return -ENOMEM; + } + size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", dd->port->flags); size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", @@ -2915,9 +2941,10 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, *offset = size <= len ? size : len; size = copy_to_user(ubuf, buf, *offset); if (size) - return -EFAULT; + rv = -EFAULT; - return *offset; + kfree(buf); + return rv ? rv : *offset; } static const struct file_operations mtip_device_status_fops = { diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 4a2d91536a8d..c843cf0aa623 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -73,6 +73,11 @@ static void radeon_hotplug_work_func(struct work_struct *work) struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + /* we can race here at startup, some boards seem to trigger + * hotplug irqs when they shouldn't. */ + if (!rdev->mode_info.mode_config_initialized) + return; + mutex_lock(&mode_config->mutex); if (mode_config->num_connector) { list_for_each_entry(connector, &mode_config->connector_list, head) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8c91fd5eb6fd..3ac9c4194814 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -524,9 +524,18 @@ gigaset_tty_open(struct tty_struct *tty) cs->hw.ser->tty = tty; atomic_set(&cs->hw.ser->refcnt, 1); init_completion(&cs->hw.ser->dead_cmp); - tty->disc_data = cs; + /* Set the amount of data we're willing to receive per call + * from the hardware driver to half of the input buffer size + * to leave some reserve. + * Note: We don't do flow control towards the hardware driver. + * If more data is received than will fit into the input buffer, + * it will be dropped and an error will be logged. This should + * never happen as the device is slow and the buffer size ample. + */ + tty->receive_room = RBUFSIZE/2; + /* OK.. Initialization of the datastructures and the HW is done.. Now * startup system and notify the LL that we are ready to run */ diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 4296155090b2..afcd18428945 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -855,7 +855,7 @@ static void mq_destroy(struct dm_cache_policy *p) struct mq_policy *mq = to_mq_policy(p); free_bitset(mq->allocation_bitset); - kfree(mq->table); + vfree(mq->table); free_entries(mq); kfree(mq); } @@ -1106,7 +1106,7 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); mq->hash_bits = ffs(mq->nr_buckets) - 1; - mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); + mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets); if (!mq->table) goto bad_alloc_table; diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index e775bfbc5e6e..5f187294c85a 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -872,8 +872,8 @@ gpe0_done: lpc_ich_enable_gpio_space(dev); lpc_ich_finalize_cell(dev, &lpc_ich_cells[LPC_GPIO]); - ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_GPIO], - 1, NULL, 0, NULL); + ret = mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, + &lpc_ich_cells[LPC_GPIO], 1, NULL, 0, NULL); gpio_done: if (acpi_conflict) @@ -932,8 +932,8 @@ static int lpc_ich_init_wdt(struct pci_dev *dev) } lpc_ich_finalize_cell(dev, &lpc_ich_cells[LPC_WDT]); - ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_WDT], - 1, NULL, 0, NULL); + ret = mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, + &lpc_ich_cells[LPC_WDT], 1, NULL, 0, NULL); wdt_done: return ret; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5f95537d4896..b3892b0d2e61 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -671,6 +671,22 @@ static void bond_set_dev_addr(struct net_device *bond_dev, call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); } +static struct slave *bond_get_old_active(struct bonding *bond, + struct slave *new_active) +{ + struct slave *slave; + + bond_for_each_slave(bond, slave) { + if (slave == new_active) + continue; + + if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) + return slave; + } + + return NULL; +} + /* * bond_do_fail_over_mac * @@ -712,6 +728,9 @@ static void bond_do_fail_over_mac(struct bonding *bond, write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); + if (!old_active) + old_active = bond_get_old_active(bond, new_active); + if (old_active) { memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); memcpy(saddr.sa_data, old_active->dev->dev_addr, @@ -1917,6 +1936,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; pr_info("%s: destroying bond %s.\n", bond_dev->name, bond_dev->name); + bond_remove_proc_entry(bond); unregister_netdevice(bond_dev); } return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 0416c5b3b35c..3990b435a081 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -558,7 +558,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) mlx4_slave_event(dev, i, eqe); } else { /* IB port */ @@ -584,7 +584,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; - s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + s_info = &priv->mfunc.master.vf_oper[i].vport[port].state; if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) mlx4_slave_event(dev, i, eqe); } diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 0857ca981fae..6bc9b12ba42a 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -359,6 +359,56 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { .release = pci_vpd_pci22_release, }; +static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, + void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_read_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, + const void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_write_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static const struct pci_vpd_ops pci_vpd_f0_ops = { + .read = pci_vpd_f0_read, + .write = pci_vpd_f0_write, + .release = pci_vpd_pci22_release, +}; + +static int pci_vpd_f0_dev_check(struct pci_dev *dev) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + int ret = 0; + + if (!tdev) + return -ENODEV; + if (!tdev->vpd || !tdev->multifunction || + dev->class != tdev->class || dev->vendor != tdev->vendor || + dev->device != tdev->device) + ret = -ENODEV; + + pci_dev_put(tdev); + return ret; +} + int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -367,12 +417,21 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { + int ret = pci_vpd_f0_dev_check(dev); + + if (ret) + return ret; + } vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; vpd->base.len = PCI_VPD_PCI22_SIZE; - vpd->base.ops = &pci_vpd_pci22_ops; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + vpd->base.ops = &pci_vpd_f0_ops; + else + vpd->base.ops = &pci_vpd_pci22_ops; mutex_init(&vpd->lock); vpd->cap = cap; vpd->busy = false; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a7b7eeaf35e8..877bfbb4e55c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1849,6 +1849,15 @@ static void quirk_netmos(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_NETMOS, PCI_ANY_ID, PCI_CLASS_COMMUNICATION_SERIAL, 8, quirk_netmos); +static void quirk_f0_vpd_link(struct pci_dev *dev) +{ + if (!dev->multifunction || !PCI_FUNC(dev->devfn)) + return; + dev->dev_flags |= PCI_DEV_FLAGS_VPD_REF_F0; +} +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + PCI_CLASS_NETWORK_ETHERNET, 8, quirk_f0_vpd_link); + static void quirk_e100_interrupt(struct pci_dev *dev) { u16 command, pmcsr; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index c913e8cc3b26..ed7759980c47 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -3423,7 +3423,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) */ nseg = scsi_dma_map(scsi_cmnd); - if (unlikely(!nseg)) + if (unlikely(nseg <= 0)) return 1; sgl += 1; /* clear the last flag in the fcp_rsp map entry */ diff --git a/fs/aio.c b/fs/aio.c index 329e6c1f3a43..31a5cb74ae1f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -146,6 +146,7 @@ struct kioctx { struct { unsigned tail; + unsigned completed_events; spinlock_t completion_lock; } ____cacheline_aligned_in_smp; @@ -899,6 +900,68 @@ out: return ret; } +/* refill_reqs_available + * Updates the reqs_available reference counts used for tracking the + * number of free slots in the completion ring. This can be called + * from aio_complete() (to optimistically update reqs_available) or + * from aio_get_req() (the we're out of events case). It must be + * called holding ctx->completion_lock. + */ +static void refill_reqs_available(struct kioctx *ctx, unsigned head, + unsigned tail) +{ + unsigned events_in_ring, completed; + + /* Clamp head since userland can write to it. */ + head %= ctx->nr_events; + if (head <= tail) + events_in_ring = tail - head; + else + events_in_ring = ctx->nr_events - (head - tail); + + completed = ctx->completed_events; + if (events_in_ring < completed) + completed -= events_in_ring; + else + completed = 0; + + if (!completed) + return; + + ctx->completed_events -= completed; + put_reqs_available(ctx, completed); +} + +/* user_refill_reqs_available + * Called to refill reqs_available when aio_get_req() encounters an + * out of space in the completion ring. + */ +static void user_refill_reqs_available(struct kioctx *ctx) +{ + spin_lock_irq(&ctx->completion_lock); + if (ctx->completed_events) { + struct aio_ring *ring; + unsigned head; + + /* Access of ring->head may race with aio_read_events_ring() + * here, but that's okay since whether we read the old version + * or the new version, and either will be valid. The important + * part is that head cannot pass tail since we prevent + * aio_complete() from updating tail by holding + * ctx->completion_lock. Even if head is invalid, the check + * against ctx->completed_events below will make sure we do the + * safe/right thing. + */ + ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; + kunmap_atomic(ring); + + refill_reqs_available(ctx, head, ctx->tail); + } + + spin_unlock_irq(&ctx->completion_lock); +} + /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. @@ -907,8 +970,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) { struct kiocb *req; - if (!get_reqs_available(ctx)) - return NULL; + if (!get_reqs_available(ctx)) { + user_refill_reqs_available(ctx); + if (!get_reqs_available(ctx)) + return NULL; + } req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); if (unlikely(!req)) @@ -967,8 +1033,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) struct kioctx *ctx = iocb->ki_ctx; struct aio_ring *ring; struct io_event *ev_page, *event; + unsigned tail, pos, head; unsigned long flags; - unsigned tail, pos; /* * Special case handling for sync iocbs: @@ -1029,10 +1095,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) ctx->tail = tail; ring = kmap_atomic(ctx->ring_pages[0]); + head = ring->head; ring->tail = tail; kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + ctx->completed_events++; + if (ctx->completed_events > 1) + refill_reqs_available(ctx, head, tail); spin_unlock_irqrestore(&ctx->completion_lock, flags); pr_debug("added to ring %p at [%u]\n", iocb, tail); @@ -1047,7 +1117,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* everything turned out well, dispose of the aiocb. */ kiocb_free(iocb); - put_reqs_available(ctx, 1); /* * We have to order our ring_info tail store above and test diff --git a/fs/bio.c b/fs/bio.c index ea5035da4d9a..e7fb3f82f5f5 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, - unsigned short max_sectors) + unsigned int max_sectors) { int retried_segments = 0; struct bio_vec *bvec; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 89b5519085c2..ebad721656f3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2245,6 +2245,8 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->status == CifsExiting) + continue; if (!match_session(ses, vol)) continue; ++ses->ses_count; @@ -2258,24 +2260,37 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) static void cifs_put_smb_ses(struct cifs_ses *ses) { - unsigned int xid; + unsigned int rc, xid; struct TCP_Server_Info *server = ses->server; cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); + spin_lock(&cifs_tcp_ses_lock); + if (ses->status == CifsExiting) { + spin_unlock(&cifs_tcp_ses_lock); + return; + } if (--ses->ses_count > 0) { spin_unlock(&cifs_tcp_ses_lock); return; } - - list_del_init(&ses->smb_ses_list); + if (ses->status == CifsGood) + ses->status = CifsExiting; spin_unlock(&cifs_tcp_ses_lock); - if (ses->status == CifsGood && server->ops->logoff) { + if (ses->status == CifsExiting && server->ops->logoff) { xid = get_xid(); - server->ops->logoff(xid, ses); + rc = server->ops->logoff(xid, ses); + if (rc) + cifs_dbg(VFS, "%s: Session Logoff failure rc=%d\n", + __func__, rc); _free_xid(xid); } + + spin_lock(&cifs_tcp_ses_lock); + list_del_init(&ses->smb_ses_list); + spin_unlock(&cifs_tcp_ses_lock); + sesInfoFree(ses); cifs_put_tcp_session(server); } diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 340abca3aa52..ee1963b2e5a7 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -516,13 +516,19 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf, return -EAGAIN; } - if (ses->status != CifsGood) { - /* check if SMB2 session is bad because we are setting it up */ + if (ses->status == CifsNew) { if ((buf->Command != SMB2_SESSION_SETUP) && (buf->Command != SMB2_NEGOTIATE)) return -EAGAIN; /* else ok - we are setting up session */ } + + if (ses->status == CifsExiting) { + if (buf->Command != SMB2_LOGOFF) + return -EAGAIN; + /* else ok - we are shutting down the session */ + } + *mid = smb2_mid_entry_alloc(buf, ses->server); if (*mid == NULL) return -ENOMEM; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 800b938e4061..ebb46e311e0b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -431,13 +431,20 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, return -EAGAIN; } - if (ses->status != CifsGood) { - /* check if SMB session is bad because we are setting it up */ + if (ses->status == CifsNew) { if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) return -EAGAIN; /* else ok - we are setting up session */ } + + if (ses->status == CifsExiting) { + /* check if SMB session is bad because we are setting it up */ + if (in_buf->Command != SMB_COM_LOGOFF_ANDX) + return -EAGAIN; + /* else ok - we are shutting down session */ + } + *ppmidQ = AllocMidQEntry(in_buf, ses->server); if (*ppmidQ == NULL) return -ENOMEM; diff --git a/include/linux/pci.h b/include/linux/pci.h index 573c04929bd1..b11e6e280f15 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -170,6 +170,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Get VPD from function 0 VPD */ + PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), }; enum pci_irq_reroute_variant { diff --git a/include/net/ip.h b/include/net/ip.h index 1b1269e13596..553c07514a05 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -141,6 +141,7 @@ static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) } /* datagram.c */ +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); extern int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0c1288a50e8b..a68a061882f4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -293,6 +293,8 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); +void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index b7b1914dfa25..27cf128ebc15 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -347,7 +347,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -ENOMEM; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); return 0; } @@ -370,6 +369,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) ip.u.ip4 = entry->addr.u.ip4; @@ -416,6 +416,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!netif_running(br->dev) || br->multicast_disabled) return -EINVAL; + memset(&ip, 0, sizeof(ip)); ip.proto = entry->addr.proto; if (ip.proto == htons(ETH_P_IP)) { if (timer_pending(&br->ip4_querier.timer)) diff --git a/net/core/datagram.c b/net/core/datagram.c index af814e764206..98e3d61e7476 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -130,6 +130,35 @@ out_noerr: goto out; } +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) +{ + struct sk_buff *nskb; + + if (skb->peeked) + return skb; + + /* We have to unshare an skb before modifying it. */ + if (!skb_shared(skb)) + goto done; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return ERR_PTR(-ENOMEM); + + skb->prev->next = nskb; + skb->next->prev = nskb; + nskb->prev = skb->prev; + nskb->next = skb->next; + + consume_skb(skb); + skb = nskb; + +done: + skb->peeked = 1; + + return skb; +} + /** * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket @@ -164,7 +193,9 @@ out_noerr: struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, int *peeked, int *off, int *err) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct sk_buff *skb, *last; + unsigned long cpu_flags; long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -183,8 +214,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ - unsigned long cpu_flags; - struct sk_buff_head *queue = &sk->sk_receive_queue; int _off = *off; last = (struct sk_buff *)queue; @@ -198,7 +227,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, _off -= skb->len; continue; } - skb->peeked = 1; + + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) + goto unlock_err; + atomic_inc(&skb->users); } else __skb_unlink(skb, queue); @@ -222,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, return NULL; +unlock_err: + spin_unlock_irqrestore(&queue->lock, cpu_flags); no_packet: *err = error; return NULL; @@ -742,7 +778,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!skb_shared(skb)) + skb->ip_summed = CHECKSUM_UNNECESSARY; } return sum; } diff --git a/net/core/dev.c b/net/core/dev.c index 3ca487e14080..f991f5d3371d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3193,6 +3193,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, local_irq_save(flags); rps_lock(sd); + if (!netif_running(skb->dev)) + goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { @@ -3214,6 +3216,7 @@ enqueue: goto enqueue; } +drop: sd->dropped++; rps_unlock(sd); @@ -3518,8 +3521,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) pt_prev = NULL; - rcu_read_lock(); - another_round: skb->skb_iif = skb->dev->ifindex; @@ -3529,7 +3530,7 @@ another_round: skb->protocol == cpu_to_be16(ETH_P_8021AD)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) - goto unlock; + goto out; } #ifdef CONFIG_NET_CLS_ACT @@ -3554,7 +3555,7 @@ skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto unlock; + goto out; ncls: #endif @@ -3569,7 +3570,7 @@ ncls: if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) - goto unlock; + goto out; } rx_handler = rcu_dereference(skb->dev->rx_handler); @@ -3581,7 +3582,7 @@ ncls: switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; - goto unlock; + goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3633,8 +3634,6 @@ drop: ret = NET_RX_DROP; } -unlock: - rcu_read_unlock(); out: return ret; } @@ -3681,29 +3680,30 @@ static int __netif_receive_skb(struct sk_buff *skb) */ int netif_receive_skb(struct sk_buff *skb) { + int ret; + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; + rcu_read_lock(); + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; - int cpu, ret; - - rcu_read_lock(); - - cpu = get_rps_cpu(skb->dev, skb, &rflow); + int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); return ret; } - rcu_read_unlock(); } #endif - return __netif_receive_skb(skb); + ret = __netif_receive_skb(skb); + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(netif_receive_skb); @@ -4113,8 +4113,10 @@ static int process_backlog(struct napi_struct *napi, int quota) unsigned int qlen; while ((skb = __skb_dequeue(&sd->process_queue))) { + rcu_read_lock(); local_irq_enable(); __netif_receive_skb(skb); + rcu_read_unlock(); local_irq_disable(); input_queue_head_incr(sd); if (++work >= quota) { @@ -5302,6 +5304,7 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; + on_each_cpu(flush_backlog, dev, 1); } synchronize_net(); @@ -5559,7 +5562,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev) struct netdev_queue *tx; size_t sz = count * sizeof(*tx); - BUG_ON(count < 1 || count > 0xffff); + if (count < 1 || count > 0xffff) + return -EINVAL; tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!tx) { @@ -5917,8 +5921,6 @@ void netdev_run_todo(void) dev->reg_state = NETREG_UNREGISTERED; - on_each_cpu(flush_backlog, dev, 1); - netdev_wait_allrefs(dev); /* paranoia */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a104ba3c5768..cea47344d535 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3423,8 +3423,10 @@ static int pktgen_thread_worker(void *arg) pktgen_rem_thread(t); /* Wait for kthread_stop */ - while (!kthread_should_stop()) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; schedule(); } __set_current_state(TASK_RUNNING); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 76cc27f3f991..fd3a16e45dd9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1197,10 +1197,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; -static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { - [IFLA_VF_INFO] = { .type = NLA_NESTED }, -}; - static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, @@ -1274,67 +1270,66 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } -static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { - int rem, err = -EINVAL; - struct nlattr *vf; const struct net_device_ops *ops = dev->netdev_ops; + int err = -EINVAL; - nla_for_each_nested(vf, attr, rem) { - switch (nla_type(vf)) { - case IFLA_VF_MAC: { - struct ifla_vf_mac *ivm; - ivm = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, - ivm->mac); - break; - } - case IFLA_VF_VLAN: { - struct ifla_vf_vlan *ivv; - ivv = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - break; - } - case IFLA_VF_TX_RATE: { - struct ifla_vf_tx_rate *ivt; - ivt = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, - ivt->rate); - break; - } - case IFLA_VF_SPOOFCHK: { - struct ifla_vf_spoofchk *ivs; - ivs = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_spoofchk) - err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, - ivs->setting); - break; - } - case IFLA_VF_LINK_STATE: { - struct ifla_vf_link_state *ivl; - ivl = nla_data(vf); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_link_state) - err = ops->ndo_set_vf_link_state(dev, ivl->vf, - ivl->link_state); - break; - } - default: - err = -EINVAL; - break; - } - if (err) - break; + if (tb[IFLA_VF_MAC]) { + struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN]) { + struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, + ivv->qos); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_TX_RATE]) { + struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_tx_rate) + err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, + ivt->rate); + if (err < 0) + return err; } + + if (tb[IFLA_VF_SPOOFCHK]) { + struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_LINK_STATE]) { + struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + if (err < 0) + return err; + } + return err; } @@ -1517,14 +1512,21 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) { + if (nla_type(attr) != IFLA_VF_INFO || + nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } - err = do_setvfinfo(dev, attr); + err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, + ifla_vf_policy); + if (err < 0) + goto errout; + err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; modified = 1; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 5f3dc1df04bf..291b0821d1ac 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -20,7 +20,7 @@ #include #include -int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; @@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); - lock_sock(sk); - oif = sk->sk_bound_dev_if; saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { @@ -81,9 +79,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_set(sk, &rt->dst); err = 0; out: - release_sock(sk); return err; } +EXPORT_SYMBOL(__ip4_datagram_connect); + +int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL(ip4_datagram_connect); /* Because UDP xmit path can manipulate sk_dst_cache without holding diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4c1884fed548..4d98a6b80b04 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -356,7 +356,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) ihl = ip_hdrlen(skb); /* Determine the position of this fragment. */ - end = offset + skb->len - ihl; + end = offset + skb->len - skb_network_offset(skb) - ihl; err = -EINVAL; /* Is this the final fragment? */ @@ -386,7 +386,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) goto err; err = -ENOMEM; - if (pskb_pull(skb, ihl) == NULL) + if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) goto err; err = pskb_trim_rcsum(skb, end - offset); @@ -627,6 +627,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; iph->tot_len = htons(len); iph->tos |= ecn; + + ip_send_check(iph); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; qp->q.fragments_tail = NULL; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index edd5a8171357..6913e2fdc12c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -476,7 +476,8 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -493,7 +494,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; @@ -611,7 +613,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9f9ad99fcfdd..da44cb4f51d1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -99,9 +99,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 51d54dc376f3..05c94d9c3776 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -329,10 +329,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5d892febd64c..cf9bfc5ddb34 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -318,6 +318,21 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); } +static inline bool +nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, + const struct nf_conntrack_tuple *tuple, + u16 zone) +{ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + /* A conntrack can be recreated with the equal tuple, + * so we need to check that the conntrack is confirmed + */ + return nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone && + nf_ct_is_confirmed(ct); +} + /* * Warning : * - Caller must take a reference on returned object @@ -339,8 +354,7 @@ ____nf_conntrack_find(struct net *net, u16 zone, local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { + if (nf_ct_key_equal(h, tuple, zone)) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -387,8 +401,7 @@ begin: !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || - nf_ct_zone(ct) != zone)) { + if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { nf_ct_put(ct); goto begin; } @@ -450,7 +463,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto out; add_timer(&ct->timeout); - nf_conntrack_get(&ct->ct_general); + smp_wmb(); + /* The caller holds a reference to this object */ + atomic_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); @@ -464,6 +479,21 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); +/* deletion from this larval template list happens via nf_ct_put() */ +void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) +{ + __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + nf_conntrack_get(&tmpl->ct_general); + + spin_lock_bh(&nf_conntrack_lock); + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &net->ct.tmpl); + spin_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -735,11 +765,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone, nf_ct_zone->id = zone; } #endif - /* - * changes to lookup keys must be done before setting refcnt to 1 + /* Because we use RCU lookups, we set ct_general.use to zero before + * this is inserted in any list. */ - smp_wmb(); - atomic_set(&ct->ct_general.use, 1); + atomic_set(&ct->ct_general.use, 0); return ct; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -763,6 +792,11 @@ void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + /* A freed object has refcnt == 0, that's + * the golden rule for SLAB_DESTROY_BY_RCU + */ + NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); + nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); nf_ct_ext_free(ct); @@ -857,6 +891,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, NF_CT_STAT_INC(net, new); } + /* Now it is inserted into the unconfirmed list, bump refcount */ + nf_conntrack_get(&ct->ct_general); + /* Overload tuple linked list to put us in unconfirmed list. */ hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &net->ct.unconfirmed); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index cdf4567ba9b3..bf6e9a144dac 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -362,9 +362,8 @@ static int __net_init synproxy_net_init(struct net *net) goto err2; if (!nfct_synproxy_ext_add(ct)) goto err2; - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_tmpl_insert(net, ct); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); @@ -389,7 +388,7 @@ static void __net_exit synproxy_net_exit(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - nf_conntrack_free(snet->tmpl); + nf_ct_put(snet->tmpl); synproxy_proc_exit(net); free_percpu(snet->stats); } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index da35ac06a975..889960193544 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -226,12 +226,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err3; } - __set_bit(IPS_TEMPLATE_BIT, &ct->status); - __set_bit(IPS_CONFIRMED_BIT, &ct->status); - - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &par->net->ct.tmpl); + nf_conntrack_tmpl_insert(par->net, ct); out: info->ct = ct; return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5a75a1eb3ae7..22e0f478a2a3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -342,25 +342,52 @@ err1: return NULL; } + +static void +__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, + unsigned int order) +{ + struct netlink_sock *nlk = nlk_sk(sk); + struct sk_buff_head *queue; + struct netlink_ring *ring; + + queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; + + spin_lock_bh(&queue->lock); + + ring->frame_max = req->nm_frame_nr - 1; + ring->head = 0; + ring->frame_size = req->nm_frame_size; + ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; + + swap(ring->pg_vec_len, req->nm_block_nr); + swap(ring->pg_vec_order, order); + swap(ring->pg_vec, pg_vec); + + __skb_queue_purge(queue); + spin_unlock_bh(&queue->lock); + + WARN_ON(atomic_read(&nlk->mapped)); + + if (pg_vec) + free_pg_vec(pg_vec, order, req->nm_block_nr); +} + static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool closing, bool tx_ring) + bool tx_ring) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ring *ring; - struct sk_buff_head *queue; void **pg_vec = NULL; unsigned int order = 0; - int err; ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - if (!closing) { - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - } + if (atomic_read(&nlk->mapped)) + return -EBUSY; + if (atomic_read(&ring->pending)) + return -EBUSY; if (req->nm_block_nr) { if (ring->pg_vec != NULL) @@ -392,31 +419,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, return -EINVAL; } - err = -EBUSY; mutex_lock(&nlk->pg_vec_lock); - if (closing || atomic_read(&nlk->mapped) == 0) { - err = 0; - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); + if (atomic_read(&nlk->mapped) == 0) { + __netlink_set_ring(sk, req, tx_ring, pg_vec, order); + mutex_unlock(&nlk->pg_vec_lock); + return 0; } + mutex_unlock(&nlk->pg_vec_lock); if (pg_vec) free_pg_vec(pg_vec, order, req->nm_block_nr); - return err; + + return -EBUSY; } static void netlink_mm_open(struct vm_area_struct *vma) @@ -885,10 +900,10 @@ static void netlink_sock_destruct(struct sock *sk) memset(&req, 0, sizeof(req)); if (nlk->rx_ring.pg_vec) - netlink_set_ring(sk, &req, true, false); + __netlink_set_ring(sk, &req, false, NULL, 0); memset(&req, 0, sizeof(req)); if (nlk->tx_ring.pg_vec) - netlink_set_ring(sk, &req, true, true); + __netlink_set_ring(sk, &req, true, NULL, 0); } #endif /* CONFIG_NETLINK_MMAP */ @@ -2182,7 +2197,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; - err = netlink_set_ring(sk, &req, false, + err = netlink_set_ring(sk, &req, optname == NETLINK_TX_RING); break; } diff --git a/net/rds/info.c b/net/rds/info.c index 9a6b4f66187c..140a44a5f7b7 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index dffdbeac18ca..d1233088f953 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1607,6 +1607,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; + security_sk_clone(sock->sk, new_sock->sk); new_sk = new_sock->sk; new_tsock = tipc_sk(new_sk);