From: Christoph Lameter NUMA awareness for the e1000 driver. Allocate tx and rx descriptors on the node of the device. It is safe to replace vmalloc by kmalloc node since only the descriptors are allocated in a NUMA aware way. These will not be so large that the use of vmalloc becomes necesssary. The patch includes a modification to slab.h to revert from inline functions for kmalloc_node/kmem_cache_alloc_node to a macro so that an undefined variable may be specified. Is that ok? If so then I probably need to spin a separate patch just for slab.h. V1-V2: - Patch against 2.6.12-rc5-mm1 - Do not defined netdev->node for non NUMA case - Change kmem_cache_alloc_node and kmalloc_node to fall back to macro definitions for the non numa case so that an undefined variable can be specified. References to earlier discussions: http://marc.theaimsgroup.com/?t=111638151000001&r=1&w=2 Note that i386 pci_alloc_coherent also needs to be made NUMA aware. Signed-off-by: Christoph Lameter Signed-off-by: Justin M. Forbes Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton --- drivers/net/e1000/e1000_main.c | 27 ++++++++++++++++----------- include/linux/netdevice.h | 4 +++- include/linux/slab.h | 16 ++++++++-------- 3 files changed, 27 insertions(+), 20 deletions(-) diff -puN drivers/net/e1000/e1000_main.c~e1000-numa-aware-allocation-of-descriptors-v2 drivers/net/e1000/e1000_main.c --- devel/drivers/net/e1000/e1000_main.c~e1000-numa-aware-allocation-of-descriptors-v2 2005-07-30 00:44:06.000000000 -0700 +++ devel-akpm/drivers/net/e1000/e1000_main.c 2005-07-30 00:44:06.000000000 -0700 @@ -559,7 +559,9 @@ e1000_probe(struct pci_dev *pdev, netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len; netdev->base_addr = adapter->hw.io_base; - +#ifdef CONFIG_NUMA + netdev->node = pcibus_to_node(pdev->bus); +#endif adapter->bd_number = cards_found; /* setup the private structure */ @@ -963,7 +965,9 @@ e1000_setup_tx_resources(struct e1000_ad int size; size = sizeof(struct e1000_buffer) * txdr->count; - txdr->buffer_info = vmalloc(size); + + txdr->buffer_info = kmalloc_node(size, GFP_KERNEL, adapter->netdev->node ); + if(!txdr->buffer_info) { DPRINTK(PROBE, ERR, "Unable to allocate memory for the transmit descriptor ring\n"); @@ -979,7 +983,7 @@ e1000_setup_tx_resources(struct e1000_ad txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma); if(!txdr->desc) { setup_tx_desc_die: - vfree(txdr->buffer_info); + kfree(txdr->buffer_info); DPRINTK(PROBE, ERR, "Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; @@ -1007,7 +1011,7 @@ setup_tx_desc_die: DPRINTK(PROBE, ERR, "Unable to allocate aligned memory " "for the transmit descriptor ring\n"); - vfree(txdr->buffer_info); + kfree(txdr->buffer_info); return -ENOMEM; } else { /* Free old allocation, new allocation was successful */ @@ -1115,7 +1119,8 @@ e1000_setup_rx_resources(struct e1000_ad int size, desc_len; size = sizeof(struct e1000_buffer) * rxdr->count; - rxdr->buffer_info = vmalloc(size); + rxdr->buffer_info = kmalloc_node(size, GFP_KERNEL, adapter->netdev->node); + if(!rxdr->buffer_info) { DPRINTK(PROBE, ERR, "Unable to allocate memory for the receive descriptor ring\n"); @@ -1126,7 +1131,7 @@ e1000_setup_rx_resources(struct e1000_ad size = sizeof(struct e1000_ps_page) * rxdr->count; rxdr->ps_page = kmalloc(size, GFP_KERNEL); if(!rxdr->ps_page) { - vfree(rxdr->buffer_info); + kfree(rxdr->buffer_info); DPRINTK(PROBE, ERR, "Unable to allocate memory for the receive descriptor ring\n"); return -ENOMEM; @@ -1136,7 +1141,7 @@ e1000_setup_rx_resources(struct e1000_ad size = sizeof(struct e1000_ps_page_dma) * rxdr->count; rxdr->ps_page_dma = kmalloc(size, GFP_KERNEL); if(!rxdr->ps_page_dma) { - vfree(rxdr->buffer_info); + kfree(rxdr->buffer_info); kfree(rxdr->ps_page); DPRINTK(PROBE, ERR, "Unable to allocate memory for the receive descriptor ring\n"); @@ -1158,7 +1163,7 @@ e1000_setup_rx_resources(struct e1000_ad if(!rxdr->desc) { setup_rx_desc_die: - vfree(rxdr->buffer_info); + kfree(rxdr->buffer_info); kfree(rxdr->ps_page); kfree(rxdr->ps_page_dma); DPRINTK(PROBE, ERR, @@ -1188,7 +1193,7 @@ setup_rx_desc_die: DPRINTK(PROBE, ERR, "Unable to allocate aligned memory " "for the receive descriptor ring\n"); - vfree(rxdr->buffer_info); + kfree(rxdr->buffer_info); kfree(rxdr->ps_page); kfree(rxdr->ps_page_dma); return -ENOMEM; @@ -1385,7 +1390,7 @@ e1000_free_tx_resources(struct e1000_ada e1000_clean_tx_ring(adapter); - vfree(adapter->tx_ring.buffer_info); + kfree(adapter->tx_ring.buffer_info); adapter->tx_ring.buffer_info = NULL; pci_free_consistent(pdev, adapter->tx_ring.size, @@ -1465,7 +1470,7 @@ e1000_free_rx_resources(struct e1000_ada e1000_clean_rx_ring(adapter); - vfree(rx_ring->buffer_info); + kfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; kfree(rx_ring->ps_page); rx_ring->ps_page = NULL; diff -puN include/linux/netdevice.h~e1000-numa-aware-allocation-of-descriptors-v2 include/linux/netdevice.h --- devel/include/linux/netdevice.h~e1000-numa-aware-allocation-of-descriptors-v2 2005-07-30 00:44:06.000000000 -0700 +++ devel-akpm/include/linux/netdevice.h 2005-07-30 00:44:06.000000000 -0700 @@ -273,7 +273,9 @@ struct net_device unsigned long mem_start; /* shared mem start */ unsigned long base_addr; /* device I/O address */ unsigned int irq; /* device IRQ number */ - +#ifdef CONFIG_NUMA + unsigned int node; /* device node number */ +#endif /* * Some hardware also needs these fields, but they are not * part of the usual set specified in Space.c. diff -puN include/linux/slab.h~e1000-numa-aware-allocation-of-descriptors-v2 include/linux/slab.h --- devel/include/linux/slab.h~e1000-numa-aware-allocation-of-descriptors-v2 2005-07-30 00:44:06.000000000 -0700 +++ devel-akpm/include/linux/slab.h 2005-07-30 00:44:06.000000000 -0700 @@ -107,14 +107,14 @@ extern unsigned int ksize(const void *); extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node); extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); #else -static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) -{ - return kmem_cache_alloc(cachep, flags); -} -static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) -{ - return kmalloc(size, flags); -} +/* + * The definitions are macros here to allow the use of an undefined variable + * for the node. The variable may only be defined if CONFIG_NUMA is set. + */ +#define kmem_cache_alloc_node(__cachep, __flags, __node) \ + kmem_cache_alloc(__cachep, __flags) +#define kmalloc_node(__size, __flags, __node) \ + kmalloc(__size, __flags) #endif extern int FASTCALL(kmem_cache_reap(int)); _