From 900e0143a575406146ac531fcb91790f166ce52f Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Tue, 11 Oct 2005 08:22:33 +0100 Subject: [PATCH 01/14] [DECNET]: Remove some redundant ifdeffed code Signed-off-by: Patrick Caulfield Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/decnet/af_decnet.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 1186dc44cdff..3f25cadccddd 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -719,22 +719,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (saddr->sdn_flags & ~SDF_WILD) return -EINVAL; -#if 1 if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD))) return -EACCES; -#else - /* - * Maybe put the default actions in the default security ops for - * dn_prot_sock ? Would be nice if the capable call would go there - * too. - */ - if (security_dn_prot_sock(saddr) && - !capable(CAP_NET_BIND_SERVICE) || - saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD)) - return -EACCES; -#endif - if (!(saddr->sdn_flags & SDF_WILD)) { if (dn_ntohs(saddr->sdn_nodeaddrl)) { From 670c02c2bfd2c8a305a90f5285409a7b0a8fd630 Mon Sep 17 00:00:00 2001 From: John Hawkes Date: Thu, 13 Oct 2005 09:30:31 -0700 Subject: [PATCH 02/14] [NET]: Wider use of for_each_*cpu() In 'net' change the explicit use of for-loops and NR_CPUS into the general for_each_cpu() or for_each_online_cpu() constructs, as appropriate. This widens the scope of potential future optimizations of the general constructs, as well as takes advantage of the existing optimizations of first_cpu() and next_cpu(), which is advantageous when the true CPU count is much smaller than NR_CPUS. Signed-off-by: John Hawkes Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/core/neighbour.c | 5 +---- net/core/pktgen.c | 5 +---- net/ipv4/icmp.c | 5 +---- net/ipv4/proc.c | 4 +--- net/ipv6/icmp.c | 9 ++------- net/ipv6/proc.c | 4 +--- net/sctp/proc.c | 4 +--- 7 files changed, 8 insertions(+), 28 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1dcf7fa1f0fe..e68700f950a5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1625,12 +1625,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, memset(&ndst, 0, sizeof(ndst)); - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { struct neigh_statistics *st; - if (!cpu_possible(cpu)) - continue; - st = per_cpu_ptr(tbl->stats, cpu); ndst.ndts_allocs += st->allocs; ndst.ndts_destroys += st->destroys; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5f043d346694..00116d8b3b28 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3065,12 +3065,9 @@ static int __init pg_init(void) /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); - for (cpu = 0; cpu < NR_CPUS ; cpu++) { + for_each_online_cpu(cpu) { char buf[30]; - if (!cpu_online(cpu)) - continue; - sprintf(buf, "kpktgend_%i", cpu); pktgen_create_thread(buf, cpu); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 90dca711ac9f..175e093ec564 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops) struct inet_sock *inet; int i; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { int err; - if (!cpu_possible(i)) - continue; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f7943ba1f43c..a65e508fbd40 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -90,9 +90,7 @@ fold_field(void *mib[], int offt) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b7185fb3377c..23e540365a14 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -700,10 +700,7 @@ int __init icmpv6_init(struct net_proto_family *ops) struct sock *sk; int err, i, j; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - + for_each_cpu(i) { err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &per_cpu(__icmpv6_socket, i)); if (err < 0) { @@ -749,9 +746,7 @@ void icmpv6_cleanup(void) { int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { sock_release(per_cpu(__icmpv6_socket, i)); } inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 334a5967831e..50a13e75d70e 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -140,9 +140,7 @@ fold_field(void *mib[], int offt) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt); res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index b74f7772b576..6e4dc28874d7 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -69,9 +69,7 @@ fold_field(void *mib[], int nr) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *((unsigned long *) (((void *) per_cpu_ptr(mib[0], i)) + sizeof (unsigned long) * nr)); From b7c8921bf1a8a9c1907b1eeb029d3f167be226f3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Oct 2005 15:26:34 -0700 Subject: [PATCH 03/14] [PKTGEN]: Sleeping function called under lock pktgen is calling kmalloc GFP_KERNEL and vmalloc with lock held. The simplest fix is to turn the lock into a semaphore, since the thread lock is only used for admin control from user context. Signed-off-by: Stephen Hemminger Signed-off-by: Robert Olsson Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/core/pktgen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 00116d8b3b28..8a90bf79261a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -177,8 +177,8 @@ #define T_REMDEV (1<<3) /* Remove all devs */ /* Locks */ -#define thread_lock() spin_lock(&_thread_lock) -#define thread_unlock() spin_unlock(&_thread_lock) +#define thread_lock() down(&pktgen_sem) +#define thread_unlock() up(&pktgen_sem) /* If lock -- can be removed after some work */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -503,7 +503,7 @@ static int pg_delay_d = 0; static int pg_clone_skb_d = 0; static int debug = 0; -static DEFINE_SPINLOCK(_thread_lock); +static DECLARE_MUTEX(pktgen_sem); static struct pktgen_thread *pktgen_threads = NULL; static char module_fname[128]; From 2845b63b504b051a9cb4d78bed8b3594451a1f6f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Oct 2005 15:29:48 -0700 Subject: [PATCH 04/14] [PKTGEN]: Use kzalloc These are cleanup patches for pktgen that can go in 2.6.15 Can use kzalloc in a couple of places. Signed-off-by: Stephen Hemminger Signed-off-by: Robert Olsson Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/core/pktgen.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8a90bf79261a..b597ef1771f4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2869,12 +2869,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { - pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); if (!pkt_dev) return -ENOMEM; - memset(pkt_dev, 0, sizeof(struct pktgen_dev)); - pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); if (pkt_dev->flows == NULL) { kfree(pkt_dev); @@ -2958,13 +2956,12 @@ static int pktgen_create_thread(const char* name, int cpu) return -EINVAL; } - t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); + t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { printk("pktgen: ERROR: out of memory, can't create new thread.\n"); return -ENOMEM; } - memset(t, 0, sizeof(struct pktgen_thread)); strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; From b4099fab75d5e3eae8d207c0d7159e2f3348686e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Oct 2005 15:32:22 -0700 Subject: [PATCH 05/14] [PKTGEN]: Spelling and white space Fix some cosmetic issues. Indentation, spelling errors, and some whitespace. Signed-off-by: Stephen Hemminger Signed-off-by: Robert Olsson Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/core/pktgen.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b597ef1771f4..ad053c8cb7fc 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -75,7 +75,7 @@ * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" * to /proc gives result code thats should be read be the "writer". - * For pratical use this should be no problem. + * For practical use this should be no problem. * * Note when adding devices to a specific CPU there good idea to also assign * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. @@ -96,7 +96,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * 040923 * - * Rany Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler waring * * Remove FCS from BW calculation. Lennert Buytenhek * New time handling. Lennert Buytenhek 041213 @@ -244,7 +244,7 @@ struct pktgen_dev { __u32 seq_num; int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many coppies of the same + * is greater than 1, then that many copies of the same * packet will be sent before a new packet is allocated. * For instance, if you want to send 1024 identical packets * before creating a new packet, set clone_skb to 1024. @@ -396,7 +396,7 @@ static inline s64 divremdi3(s64 x, s64 y, int type) /* End of hacks to deal with 64-bit math on x86 */ -/** Convert to miliseconds */ +/** Convert to milliseconds */ static inline __u64 tv_to_ms(const struct timeval* tv) { __u64 ms = tv->tv_usec / 1000; @@ -425,7 +425,7 @@ static inline __u64 pg_div64(__u64 n, __u64 base) { __u64 tmp = n; /* - * How do we know if the architectrure we are running on + * How do we know if the architecture we are running on * supports division with 64 bit base? * */ @@ -544,12 +544,12 @@ static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, len = strlen(data); if(len > count) { - len =-EFAULT; + len = -EFAULT; goto out; } if (copy_to_user(buf, data, len)) { - len =-EFAULT; + len = -EFAULT; goto out; } @@ -578,7 +578,7 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, goto out; } if (copy_from_user(data, buf, count)) { - err =-EFAULT; + err = -EFAULT; goto out_free; } data[count-1] = 0; /* Make string */ @@ -1702,7 +1702,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) start = now = getCurUs(); printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); while (now < spin_until_us) { - /* TODO: optimise sleeping behavior */ + /* TODO: optimize sleeping behavior */ if (spin_until_us - now > jiffies_to_usecs(1)+1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { @@ -2361,7 +2361,7 @@ static void pktgen_stop_all_threads_ifs(void) pktgen_stop(t); t = t->next; } - thread_unlock(); + thread_unlock(); } static int thread_is_running(struct pktgen_thread *t ) @@ -2555,7 +2555,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) if (strlen(t->fname)) remove_proc_entry(t->fname, NULL); - thread_lock(); + thread_lock(); if (tmp == t) pktgen_threads = tmp->next; @@ -2929,7 +2929,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name) { struct pktgen_thread *t = NULL; - thread_lock(); + thread_lock(); t = pktgen_threads; while (t) { From d50a6b56f0f239cf061630c85add121dc3555339 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 14 Oct 2005 15:42:33 -0700 Subject: [PATCH 06/14] [PKTGEN]: proc interface revision The code to handle the /proc interface can be cleaned up in several places: * use seq_file for read * don't need to remember all the filenames separately * use for_online_cpu's * don't vmalloc a buffer for small command from user. Committer note: This patch clashed with John Hawkes's "[NET]: Wider use of for_each_*cpu()", so I fixed it up manually. Signed-off-by: Stephen Hemminger Signed-off-by: Robert Olsson Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/core/pktgen.c | 472 +++++++++++++++++++++------------------------- 1 file changed, 215 insertions(+), 257 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ad053c8cb7fc..7fc3e9e28c34 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -137,6 +137,7 @@ #include #include #include +#include #include #include #include @@ -151,7 +152,7 @@ #include -#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.63: Packet Generator for packet performance testing.\n" /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) @@ -186,7 +187,9 @@ /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "net/pktgen" +#define PG_PROC_DIR "pktgen" +#define PGCTRL "pgctrl" +static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 @@ -202,11 +205,8 @@ struct pktgen_dev { * Try to keep frequent/infrequent used vars. separated. */ - char ifname[32]; - struct proc_dir_entry *proc_ent; + char ifname[IFNAMSIZ]; char result[512]; - /* proc file names */ - char fname[80]; struct pktgen_thread* pg_thread; /* the owner */ struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ @@ -330,8 +330,6 @@ struct pktgen_thread { struct pktgen_dev *if_list; /* All device here */ struct pktgen_thread* next; char name[32]; - char fname[128]; /* name of proc file */ - struct proc_dir_entry *proc_ent; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -473,16 +471,6 @@ static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) static char version[] __initdata = VERSION; -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos); -static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos); -static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); - -static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); -static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int create_proc_dir(void); -static int remove_proc_dir(void); - static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); static struct pktgen_thread* pktgen_find_thread(const char* name); @@ -506,80 +494,38 @@ static int debug = 0; static DECLARE_MUTEX(pktgen_sem); static struct pktgen_thread *pktgen_threads = NULL; -static char module_fname[128]; -static struct proc_dir_entry *module_proc_ent = NULL; - static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, }; -static struct file_operations pktgen_fops = { - .read = proc_pgctrl_read, - .write = proc_pgctrl_write, - /* .ioctl = pktgen_ioctl, later maybe */ -}; - /* * /proc handling functions * */ -static struct proc_dir_entry *pg_proc_dir = NULL; -static int proc_pgctrl_read_eof=0; - -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, - size_t count, loff_t *ppos) +static int pgctrl_show(struct seq_file *seq, void *v) { - char data[200]; - int len = 0; - - if(proc_pgctrl_read_eof) { - proc_pgctrl_read_eof=0; - len = 0; - goto out; - } - - sprintf(data, "%s", VERSION); - - len = strlen(data); - - if(len > count) { - len = -EFAULT; - goto out; - } - - if (copy_to_user(buf, data, len)) { - len = -EFAULT; - goto out; - } - - *ppos += len; - proc_pgctrl_read_eof=1; /* EOF next call */ - - out: - return len; + seq_puts(seq, VERSION); + return 0; } -static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t pgctrl_write(struct file* file,const char __user * buf, + size_t count, loff_t *ppos) { - char *data = NULL; int err = 0; + char data[128]; if (!capable(CAP_NET_ADMIN)){ err = -EPERM; goto out; } - data = (void*)vmalloc ((unsigned int)count); + if (count > sizeof(data)) + count = sizeof(data); - if(!data) { - err = -ENOMEM; - goto out; - } if (copy_from_user(data, buf, count)) { err = -EFAULT; - goto out_free; + goto out; } data[count-1] = 0; /* Make string */ @@ -594,31 +540,40 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, err = count; - out_free: - vfree (data); out: return err; } -static int proc_if_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pgctrl_open(struct inode *inode, struct file *file) +{ + return single_open(file, pgctrl_show, PDE(inode)->data); +} + +static struct file_operations pktgen_fops = { + .owner = THIS_MODULE, + .open = pgctrl_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pgctrl_write, + .release = single_release, +}; + +static int pktgen_if_show(struct seq_file *seq, void *v) { - char *p; int i; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); + struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); - p = buf; - p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", - (unsigned long long) pkt_dev->count, - pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); + seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", + (unsigned long long) pkt_dev->count, + pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); - p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); + seq_printf(seq, " frags: %d delay: %u clone_skb: %d ifname: %s\n", + pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); - p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); if(pkt_dev->flags & F_IPV6) { @@ -626,19 +581,19 @@ static int proc_if_read(char *buf , char **start, off_t offset, fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); - p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); + seq_printf(seq, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); + seq_printf(seq, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); } else - p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); + seq_printf(seq," dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", + pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); - p += sprintf(p, " src_mac: "); + seq_puts(seq, " src_mac: "); if ((pkt_dev->src_mac[0] == 0) && (pkt_dev->src_mac[1] == 0) && @@ -648,89 +603,89 @@ static int proc_if_read(char *buf , char **start, off_t offset, (pkt_dev->src_mac[5] == 0)) for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); else for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); - p += sprintf(p, "dst_mac: "); + seq_printf(seq, "dst_mac: "); for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); + seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); - p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", - pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, - pkt_dev->udp_dst_max); + seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", + pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, + pkt_dev->udp_dst_max); - p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", - pkt_dev->src_mac_count, pkt_dev->dst_mac_count); + seq_printf(seq, " src_mac_count: %d dst_mac_count: %d \n Flags: ", + pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->flags & F_IPV6) - p += sprintf(p, "IPV6 "); + seq_printf(seq, "IPV6 "); if (pkt_dev->flags & F_IPSRC_RND) - p += sprintf(p, "IPSRC_RND "); + seq_printf(seq, "IPSRC_RND "); if (pkt_dev->flags & F_IPDST_RND) - p += sprintf(p, "IPDST_RND "); + seq_printf(seq, "IPDST_RND "); if (pkt_dev->flags & F_TXSIZE_RND) - p += sprintf(p, "TXSIZE_RND "); + seq_printf(seq, "TXSIZE_RND "); if (pkt_dev->flags & F_UDPSRC_RND) - p += sprintf(p, "UDPSRC_RND "); + seq_printf(seq, "UDPSRC_RND "); if (pkt_dev->flags & F_UDPDST_RND) - p += sprintf(p, "UDPDST_RND "); + seq_printf(seq, "UDPDST_RND "); if (pkt_dev->flags & F_MACSRC_RND) - p += sprintf(p, "MACSRC_RND "); + seq_printf(seq, "MACSRC_RND "); if (pkt_dev->flags & F_MACDST_RND) - p += sprintf(p, "MACDST_RND "); + seq_printf(seq, "MACDST_RND "); - p += sprintf(p, "\n"); + seq_puts(seq, "\n"); sa = pkt_dev->started_at; stopped = pkt_dev->stopped_at; if (pkt_dev->running) stopped = now; /* not really stopped, more like last-running-at */ - p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", - (unsigned long long) pkt_dev->sofar, - (unsigned long long) pkt_dev->errors, - (unsigned long long) sa, - (unsigned long long) stopped, - (unsigned long long) pkt_dev->idle_acc); + seq_printf(seq, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + (unsigned long long) pkt_dev->sofar, + (unsigned long long) pkt_dev->errors, + (unsigned long long) sa, + (unsigned long long) stopped, + (unsigned long long) pkt_dev->idle_acc); - p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", - pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); + seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", + pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, + pkt_dev->cur_src_mac_offset); if(pkt_dev->flags & F_IPV6) { char b1[128], b2[128]; fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); + seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); } else - p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); + seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", + pkt_dev->cur_saddr, pkt_dev->cur_daddr); - p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", - pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", + pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); - p += sprintf(p, " flows: %u\n", pkt_dev->nflows); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) - p += sprintf(p, "Result: %s\n", pkt_dev->result); + seq_printf(seq, "Result: %s\n", pkt_dev->result); else - p += sprintf(p, "Result: Idle\n"); - *eof = 1; + seq_printf(seq, "Result: Idle\n"); - return p - buf; + return 0; } @@ -802,13 +757,14 @@ static int strn_len(const char __user *user_buffer, unsigned int maxlen) return i; } -static int proc_if_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_dev *pkt_dev = seq->private; int i = 0, max, len; char name[16], valstr[32]; unsigned long value = 0; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); char* pg_result = NULL; int tmp = 0; char buf[128]; @@ -849,7 +805,8 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); + printk("pktgen: %s,%lu buffer -:%s:-\n", name, + (unsigned long) count, tb); } if (!strcmp(name, "min_pkt_size")) { @@ -1335,92 +1292,98 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, return -EINVAL; } -static int proc_thread_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pktgen_if_open(struct inode *inode, struct file *file) { - char *p; - struct pktgen_thread *t = (struct pktgen_thread*)(data); + return single_open(file, pktgen_if_show, PDE(inode)->data); +} + +static struct file_operations pktgen_if_fops = { + .owner = THIS_MODULE, + .open = pktgen_if_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_if_write, + .release = single_release, +}; + +static int pktgen_thread_show(struct seq_file *seq, void *v) +{ + struct pktgen_thread *t = seq->private; struct pktgen_dev *pkt_dev = NULL; + BUG_ON(!t); - if (!t) { - printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); - return -EINVAL; - } - - p = buf; - p += sprintf(p, "Name: %s max_before_softirq: %d\n", + seq_printf(seq, "Name: %s max_before_softirq: %d\n", t->name, t->max_before_softirq); - p += sprintf(p, "Running: "); + seq_printf(seq, "Running: "); if_lock(t); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); - p += sprintf(p, "\nStopped: "); + seq_printf(seq, "\nStopped: "); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(!pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); if (t->result[0]) - p += sprintf(p, "\nResult: %s\n", t->result); + seq_printf(seq, "\nResult: %s\n", t->result); else - p += sprintf(p, "\nResult: NA\n"); - - *eof = 1; + seq_printf(seq, "\nResult: NA\n"); if_unlock(t); - return p - buf; + return 0; } -static int proc_thread_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_thread_write(struct file *file, + const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_thread *t = seq->private; int i = 0, max, len, ret; char name[40]; - struct pktgen_thread *t; char *pg_result; unsigned long value = 0; - + if (count < 1) { // sprintf(pg_result, "Wrong command format"); return -EINVAL; } - + max = count - i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - + /* Read variable name */ len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) - return len; + if (len < 0) + return len; memset(name, 0, sizeof(name)); if (copy_from_user(name, &user_buffer[i], len)) return -EFAULT; i += len; - + max = count -i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - if (debug) - printk("pktgen: t=%s, count=%lu\n", name, count); - + if (debug) + printk("pktgen: t=%s, count=%lu\n", name, + (unsigned long) count); - t = (struct pktgen_thread*)(data); if(!t) { printk("pktgen: ERROR: No thread\n"); ret = -EINVAL; @@ -1474,21 +1437,19 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, return ret; } -static int create_proc_dir(void) +static int pktgen_thread_open(struct inode *inode, struct file *file) { - pg_proc_dir = proc_mkdir(PG_PROC_DIR, NULL); - - if (!pg_proc_dir) - return -ENODEV; - - return 0; + return single_open(file, pktgen_thread_show, PDE(inode)->data); } -static int remove_proc_dir(void) -{ - remove_proc_entry(PG_PROC_DIR, NULL); - return 0; -} +static struct file_operations pktgen_thread_fops = { + .owner = THIS_MODULE, + .open = pktgen_thread_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_thread_write, + .release = single_release, +}; /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) @@ -2552,8 +2513,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) struct pktgen_thread *tmp = pktgen_threads; - if (strlen(t->fname)) - remove_proc_entry(t->fname, NULL); + remove_proc_entry(t->name, pg_proc_dir); thread_lock(); @@ -2825,7 +2785,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* i if_lock(t); for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { - if (strcmp(pkt_dev->ifname, ifname) == 0) { + if (strncmp(pkt_dev->ifname, ifname, IFNAMSIZ) == 0) { break; } } @@ -2864,65 +2824,63 @@ static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) { struct pktgen_dev *pkt_dev; + struct proc_dir_entry *pe; /* We don't allow a device to be on several threads */ - if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { - - pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); - if (!pkt_dev) - return -ENOMEM; - - pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); - if (pkt_dev->flows == NULL) { - kfree(pkt_dev); - return -ENOMEM; - } - memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - - pkt_dev->min_pkt_size = ETH_ZLEN; - pkt_dev->max_pkt_size = ETH_ZLEN; - pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; - pkt_dev->count = pg_count_d; - pkt_dev->sofar = 0; - pkt_dev->udp_src_min = 9; /* sink port */ - pkt_dev->udp_src_max = 9; - pkt_dev->udp_dst_min = 9; - pkt_dev->udp_dst_max = 9; - - strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "%s/%s", PG_PROC_DIR, ifname); - - if (! pktgen_setup_dev(pkt_dev)) { - printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -ENODEV; - } - - pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); - if (!pkt_dev->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -EINVAL; - } - pkt_dev->proc_ent->read_proc = proc_if_read; - pkt_dev->proc_ent->write_proc = proc_if_write; - pkt_dev->proc_ent->data = (void*)(pkt_dev); - pkt_dev->proc_ent->owner = THIS_MODULE; - - return add_dev_to_thread(t, pkt_dev); - } - else { + pkt_dev = __pktgen_NN_threads(ifname, FIND); + if (pkt_dev) { printk("pktgen: ERROR: interface already used.\n"); return -EBUSY; } + + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + if (!pkt_dev) + return -ENOMEM; + + pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); + if (pkt_dev->flows == NULL) { + kfree(pkt_dev); + return -ENOMEM; + } + memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); + + pkt_dev->min_pkt_size = ETH_ZLEN; + pkt_dev->max_pkt_size = ETH_ZLEN; + pkt_dev->nfrags = 0; + pkt_dev->clone_skb = pg_clone_skb_d; + pkt_dev->delay_us = pg_delay_d / 1000; + pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->count = pg_count_d; + pkt_dev->sofar = 0; + pkt_dev->udp_src_min = 9; /* sink port */ + pkt_dev->udp_src_max = 9; + pkt_dev->udp_dst_min = 9; + pkt_dev->udp_dst_max = 9; + + strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); + + if (! pktgen_setup_dev(pkt_dev)) { + printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -ENODEV; + } + + pe = create_proc_entry(ifname, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, ifname); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -EINVAL; + } + pe->proc_fops = &pktgen_if_fops; + pe->data = pkt_dev; + + return add_dev_to_thread(t, pkt_dev); } static struct pktgen_thread *pktgen_find_thread(const char* name) @@ -2945,6 +2903,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name) static int pktgen_create_thread(const char* name, int cpu) { struct pktgen_thread *t = NULL; + struct proc_dir_entry *pe; if (strlen(name) > 31) { printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); @@ -2966,17 +2925,16 @@ static int pktgen_create_thread(const char* name, int cpu) spin_lock_init(&t->if_lock); t->cpu = cpu; - sprintf(t->fname, "%s/%s", PG_PROC_DIR, t->name); - t->proc_ent = create_proc_entry(t->fname, 0600, NULL); - if (!t->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", t->fname); + pe = create_proc_entry(t->name, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, t->name); kfree(t); return -EINVAL; } - t->proc_ent->read_proc = proc_thread_read; - t->proc_ent->write_proc = proc_thread_write; - t->proc_ent->data = (void*)(t); - t->proc_ent->owner = THIS_MODULE; + + pe->proc_fops = &pktgen_thread_fops; + pe->data = t; t->next = pktgen_threads; pktgen_threads = t; @@ -3031,8 +2989,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ /* Clean up proc file system */ - if (strlen(pkt_dev->fname)) - remove_proc_entry(pkt_dev->fname, NULL); + remove_proc_entry(pkt_dev->ifname, pg_proc_dir); if (pkt_dev->flows) vfree(pkt_dev->flows); @@ -3043,21 +3000,24 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ static int __init pg_init(void) { int cpu; + struct proc_dir_entry *pe; + printk(version); - module_fname[0] = 0; + pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + if (!pg_proc_dir) + return -ENODEV; + pg_proc_dir->owner = THIS_MODULE; - create_proc_dir(); - - sprintf(module_fname, "%s/pgctrl", PG_PROC_DIR); - module_proc_ent = create_proc_entry(module_fname, 0600, NULL); - if (!module_proc_ent) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); + pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); + if (pe == NULL) { + printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); + proc_net_remove(PG_PROC_DIR); return -EINVAL; } - module_proc_ent->proc_fops = &pktgen_fops; - module_proc_ent->data = NULL; + pe->proc_fops = &pktgen_fops; + pe->data = NULL; /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); @@ -3089,10 +3049,8 @@ static void __exit pg_cleanup(void) unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ - - remove_proc_entry(module_fname, NULL); - - remove_proc_dir(); + remove_proc_entry(PGCTRL, pg_proc_dir); + proc_net_remove(PG_PROC_DIR); } From eed75f191d8318a2b144da8aae9774e1cfcae492 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sun, 16 Oct 2005 14:22:59 +0200 Subject: [PATCH 07/14] [NETFILTER] ip_conntrack: Make "hashsize" conntrack parameter writable It's fairly simple to resize the hash table, but currently you need to remove and reinsert the module. That's bad (we lose connection state). Harald has even offered to write a daemon which sets this based on load. Signed-off-by: Rusty Russell Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv4/netfilter/ip_conntrack_core.c | 132 ++++++++++++++++++------- 1 file changed, 95 insertions(+), 37 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 07a80b56e8dc..422ab68ee7fb 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -50,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.3" +#define IP_CONNTRACK_VERSION "2.4" #if 0 #define DEBUGP printk @@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; -static u_int32_t -hash_conntrack(const struct ip_conntrack_tuple *tuple) +static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, + unsigned int size, unsigned int rnd) { -#if 0 - dump_tuple(tuple); -#endif return (jhash_3words(tuple->src.ip, (tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), - ip_conntrack_hash_rnd) % ip_conntrack_htable_size); + rnd) % size); +} + +static u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ + return __hash_conntrack(tuple, ip_conntrack_htable_size, + ip_conntrack_hash_rnd); } int @@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data) return 1; } -static void free_conntrack_hash(void) +static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) { - if (ip_conntrack_vmalloc) - vfree(ip_conntrack_hash); + if (vmalloced) + vfree(hash); else - free_pages((unsigned long)ip_conntrack_hash, - get_order(sizeof(struct list_head) - * ip_conntrack_htable_size)); + free_pages((unsigned long)hash, + get_order(sizeof(struct list_head) * size)); } void ip_conntrack_flush() @@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void) ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); nf_unregister_sockopt(&so_getorigdst); } -static int hashsize; -module_param(hashsize, int, 0400); +static struct list_head *alloc_hashtable(int size, int *vmalloced) +{ + struct list_head *hash; + unsigned int i; + + *vmalloced = 0; + hash = (void*)__get_free_pages(GFP_KERNEL, + get_order(sizeof(struct list_head) + * size)); + if (!hash) { + *vmalloced = 1; + printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); + hash = vmalloc(sizeof(struct list_head) * size); + } + + if (hash) + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&hash[i]); + + return hash; +} + +int set_hashsize(const char *val, struct kernel_param *kp) +{ + int i, bucket, hashsize, vmalloced; + int old_vmalloced, old_size; + int rnd; + struct list_head *hash, *old_hash; + struct ip_conntrack_tuple_hash *h; + + /* On boot, we can set this without any fancy locking. */ + if (!ip_conntrack_htable_size) + return param_set_int(val, kp); + + hashsize = simple_strtol(val, NULL, 0); + if (!hashsize) + return -EINVAL; + + hash = alloc_hashtable(hashsize, &vmalloced); + if (!hash) + return -ENOMEM; + + /* We have to rehash for the new table anyway, so we also can + * use a new random seed */ + get_random_bytes(&rnd, 4); + + write_lock_bh(&ip_conntrack_lock); + for (i = 0; i < ip_conntrack_htable_size; i++) { + while (!list_empty(&ip_conntrack_hash[i])) { + h = list_entry(ip_conntrack_hash[i].next, + struct ip_conntrack_tuple_hash, list); + list_del(&h->list); + bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + list_add_tail(&h->list, &hash[bucket]); + } + } + old_size = ip_conntrack_htable_size; + old_vmalloced = ip_conntrack_vmalloc; + old_hash = ip_conntrack_hash; + + ip_conntrack_htable_size = hashsize; + ip_conntrack_vmalloc = vmalloced; + ip_conntrack_hash = hash; + ip_conntrack_hash_rnd = rnd; + write_unlock_bh(&ip_conntrack_lock); + + free_conntrack_hash(old_hash, old_vmalloced, old_size); + return 0; +} + +module_param_call(hashsize, set_hashsize, param_get_uint, + &ip_conntrack_htable_size, 0600); int __init ip_conntrack_init(void) { @@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void) /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ - if (hashsize) { - ip_conntrack_htable_size = hashsize; - } else { + if (!ip_conntrack_htable_size) { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); @@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void) return ret; } - /* AK: the hash table is twice as big than needed because it - uses list_head. it would be much nicer to caches to use a - single pointer list head here. */ - ip_conntrack_vmalloc = 0; - ip_conntrack_hash - =(void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) - *ip_conntrack_htable_size)); - if (!ip_conntrack_hash) { - ip_conntrack_vmalloc = 1; - printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n"); - ip_conntrack_hash = vmalloc(sizeof(struct list_head) - * ip_conntrack_htable_size); - } + ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, + &ip_conntrack_vmalloc); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; @@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void) ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; write_unlock_bh(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) - INIT_LIST_HEAD(&ip_conntrack_hash[i]); - /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; @@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void) err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep); err_free_hash: - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); From 1371e37da299d4df6267ad0ddf010435782c28e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 15 Oct 2005 09:42:39 +1000 Subject: [PATCH 08/14] [IPV4]: Kill redundant rcu_dereference on fa_info This patch kills a redundant rcu_dereference on fa->fa_info in fib_trie.c. As this dereference directly follows a list_for_each_entry_rcu line, we have already taken a read barrier with respect to getting an entry from the list. This read barrier guarantees that all values read out of fa are valid. In particular, the contents of structure pointed to by fa->fa_info is initialised before fa->fa_info is actually set (see fn_trie_insert); the setting of fa->fa_info itself is further separated with a write barrier from the insertion of fa into the list. Therefore by taking a read barrier after obtaining fa from the list (which is given by list_for_each_entry_rcu), we can be sure that fa->fa_info contains a valid pointer, as well as the fact that the data pointed to by fa->fa_info is itself valid. Signed-off-by: Herbert Xu Acked-by: Paul E. McKenney Signed-off-by: David S. Miller Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0093ea08c7f5..66247f38b371 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) prefix = htonl(l->key); list_for_each_entry_rcu(fa, &li->falh, fa_list) { - const struct fib_info *fi = rcu_dereference(fa->fa_info); + const struct fib_info *fi = fa->fa_info; unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); if (fa->fa_type == RTN_BROADCAST From 80b30c1023dbd795faf948dee0cfb3b270b56d47 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 15 Oct 2005 10:58:30 +1000 Subject: [PATCH 09/14] [IPSEC]: Kill obsolete get_mss function Now that we've switched over to storing MTUs in the xfrm_dst entries, we no longer need the dst's get_mss methods. This patch gets rid of them. It also documents the fact that our MTU calculation is not optimal for ESP. Signed-off-by: Herbert Xu Signed-off-by: Arnaldo Carvalho de Melo --- include/net/dst.h | 1 - net/xfrm/xfrm_policy.c | 43 ------------------------------------------ net/xfrm/xfrm_state.c | 6 ++++++ 3 files changed, 6 insertions(+), 44 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 4a056a682435..6c196a5baf24 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -94,7 +94,6 @@ struct dst_ops struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); - int (*get_mss)(struct dst_entry *dst, u32 mtu); int entry_size; atomic_t entries; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cbb0ba34a600..0db9e57013fd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1192,46 +1192,6 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) EXPORT_SYMBOL(xfrm_bundle_ok); -/* Well... that's _TASK_. We need to scan through transformation - * list and figure out what mss tcp should generate in order to - * final datagram fit to mtu. Mama mia... :-) - * - * Apparently, some easy way exists, but we used to choose the most - * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. - * - * Consider this function as something like dark humour. :-) - */ -static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) -{ - int res = mtu - dst->header_len; - - for (;;) { - struct dst_entry *d = dst; - int m = res; - - do { - struct xfrm_state *x = d->xfrm; - if (x) { - spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_VALID && - x->type && x->type->get_max_size) - m = x->type->get_max_size(d->xfrm, m); - else - m += x->props.header_len; - spin_unlock_bh(&x->lock); - } - } while ((d = d->child) != NULL); - - if (m <= mtu) - break; - res -= (m - mtu); - if (res < 88) - return mtu; - } - - return res + dst->header_len; -} - int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -1252,8 +1212,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; - if (likely(dst_ops->get_mss == NULL)) - dst_ops->get_mss = xfrm_get_mss; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = __xfrm_garbage_collect; xfrm_policy_afinfo[afinfo->family] = afinfo; @@ -1281,7 +1239,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = NULL; dst_ops->negative_advice = NULL; dst_ops->link_failure = NULL; - dst_ops->get_mss = NULL; afinfo->garbage_collect = NULL; } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9d206c282cf1..8b9a4747417d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,6 +1026,12 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); +/* + * This function is NOT optimal. For example, with ESP it will give an + * MTU that's usually two bytes short of being optimal. However, it will + * usually give an answer that's a multiple of 4 provided the input is + * also a multiple of 4. + */ int xfrm_state_mtu(struct xfrm_state *x, int mtu) { int res = mtu; From ea7ce406490cb248f44f510f7c0bcc357184640a Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Thu, 13 Oct 2005 11:43:05 -0700 Subject: [PATCH 10/14] [NETLINK]: Remove dead code in af_netlink.c Remove the variable nlk & call to nlk_sk as it does not have any side effect. Signed-off-by: Jayachandran C. Acked-by: James Morris Signed-off-by: Arnaldo Carvalho de Melo --- net/netlink/af_netlink.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 678c3f2c0d0b..eab4942ac97d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -740,11 +740,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) { - struct netlink_sock *nlk; int len = skb->len; - nlk = nlk_sk(sk); - skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); sock_put(sk); From 0d0d2bba97cb091218ea0bcb3d8debcc7bf48397 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Thu, 13 Oct 2005 11:43:02 -0700 Subject: [PATCH 11/14] [IPV4]: Remove dead code from ip_output.c skb_prev is assigned from skb, which cannot be NULL. This patch removes the unnecessary NULL check. Signed-off-by: Jayachandran C. Acked-by: James Morris Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv4/ip_output.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1ad5202e556b..87e350069abb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1023,10 +1023,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int alloclen; skb_prev = skb; - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; - else - fraggap = 0; + fraggap = skb_prev->len - maxfraglen; alloclen = fragheaderlen + hh_len + fraggap + 15; skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); From c83c24861882758b9731e8550225cd1e52a4cd1c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 18 Oct 2005 22:07:41 -0700 Subject: [PATCH 12/14] [SK_BUFF] kernel-doc: fix skbuff warnings Add kernel-doc to skbuff.h, skbuff.c to eliminate kernel-doc warnings. Signed-off-by: Randy Dunlap Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/skbuff.h | 3 ++- net/core/skbuff.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8f5d9e7f8734..b756935da9c8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,7 +171,6 @@ enum { * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @list: List we are on * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by @@ -190,6 +189,7 @@ enum { * @cloned: Head may be cloned (check refcnt to be sure) * @nohdr: Payload reference only, must not modify header * @pkt_type: Packet class + * @fclone: skbuff clone status * @ip_summed: Driver fed us an IP checksum * @priority: Packet queueing priority * @users: User count - see {datagram,tcp}.c @@ -202,6 +202,7 @@ enum { * @destructor: Destruct function * @nfmark: Can be used for communication between hooks * @nfct: Associated connection, if any + * @ipvs_property: skbuff is owned by ipvs * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 02cd4cde2112..ef9d46b91eb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -122,6 +122,8 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask + * @fclone: allocate from fclone cache instead of head cache + * and allocate a cloned (child) skb * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. From 95df1c04ab3f7ca617774930df62c0893a188c2c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 18 Oct 2005 21:39:33 +0100 Subject: [PATCH 13/14] [AX.25]: Use constant instead of magic number Signed-off-by: Ralf Baechle DL5RB Signed-off-by: Arnaldo Carvalho de Melo --- net/rose/rose_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index e556d92c0bc4..b18fe5043019 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -727,7 +727,7 @@ int rose_rt_ioctl(unsigned int cmd, void __user *arg) } if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; - if (rose_route.ndigis > 8) /* No more than 8 digipeats */ + if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); dev_put(dev); From dcab5e1eeccf5e226c771ecc013631cde157435f Mon Sep 17 00:00:00 2001 From: David Engel Date: Fri, 21 Oct 2005 22:09:16 -0500 Subject: [PATCH 14/14] [IPV4]: Fix setting broadcast for SIOCSIFNETMASK Fix setting of the broadcast address when the netmask is set via SIOCSIFNETMASK in Linux 2.6. The code wanted the old value of ifa->ifa_mask but used it after it had already been overwritten with the new value. Signed-off-by: David Engel Signed-off-by: Arnaldo Carvalho de Melo --- net/ipv4/devinet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 74f2207e131a..4ec4b2ca6ab1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { + u32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if ((dev->flags & IFF_BROADCAST) && (ifa->ifa_prefixlen < 31) && (ifa->ifa_broadcast == - (ifa->ifa_local|~ifa->ifa_mask))) { + (ifa->ifa_local|~old_mask))) { ifa->ifa_broadcast = (ifa->ifa_local | ~sin->sin_addr.s_addr); }