patch-2.1.120 linux/net/ipv4/route.c
Next file: linux/net/ipv4/tcp.c
Previous file: linux/net/ipv4/raw.c
Back to the patch index
Back to the overall index
- Lines: 301
- Date:
Thu Aug 27 19:33:08 1998
- Orig file:
v2.1.119/linux/net/ipv4/route.c
- Orig date:
Tue Jul 28 14:21:10 1998
diff -u --recursive --new-file v2.1.119/linux/net/ipv4/route.c linux/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.54 1998/07/15 05:05:22 davem Exp $
+ * Version: $Id: route.c,v 1.57 1998/08/26 12:04:09 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -48,6 +48,7 @@
* route.c and rewritten from scratch.
* Andi Kleen : Load-limit warning messages.
* Vitaly E. Lavrov : Transparent proxy revived after year coma.
+ * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -90,6 +91,8 @@
#include <linux/sysctl.h>
#endif
+#define IP_MAX_MTU 0xFFF0
+
#define RT_GC_TIMEOUT (300*HZ)
int ip_rt_min_delay = 2*HZ;
@@ -166,7 +169,7 @@
* Route cache.
*/
-static struct rtable *rt_hash_table[RT_HASH_DIVISOR];
+struct rtable *rt_hash_table[RT_HASH_DIVISOR];
static struct rtable * rt_intern_hash(unsigned hash, struct rtable * rth);
@@ -246,6 +249,13 @@
dst_free(&rt->u.dst);
}
+static __inline__ int rt_fast_clean(struct rtable *rth)
+{
+ /* Kill broadcast/multicast entries very aggresively, if they
+ collide in hash table with more useful entries */
+ return ((rth->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
+ && rth->key.iif && rth->u.rt_next);
+}
static void rt_check_expire(unsigned long dummy)
{
@@ -255,43 +265,30 @@
unsigned long now = jiffies;
for (i=0; i<RT_HASH_DIVISOR/5; i++) {
+ unsigned tmo = ip_rt_gc_timeout;
+
rover = (rover + 1) & (RT_HASH_DIVISOR-1);
rthp = &rt_hash_table[rover];
while ((rth = *rthp) != NULL) {
- struct rtable * rth_next = rth->u.rt_next;
-
/*
* Cleanup aged off entries.
*/
if (!atomic_read(&rth->u.dst.use) &&
- (now - rth->u.dst.lastuse > ip_rt_gc_timeout)) {
- *rthp = rth_next;
-#if RT_CACHE_DEBUG >= 2
- printk("rt_check_expire clean %02x@%08x\n", rover, rth->rt_dst);
-#endif
+ (now - rth->u.dst.lastuse > tmo
+ || rt_fast_clean(rth))) {
+ *rthp = rth->u.rt_next;
rt_free(rth);
continue;
}
- if (!rth_next)
- break;
-
- if ( (long)(rth_next->u.dst.lastuse - rth->u.dst.lastuse) > RT_CACHE_BUBBLE_THRESHOLD ||
- ((long)(rth->u.dst.lastuse - rth_next->u.dst.lastuse) < 0 &&
- atomic_read(&rth->u.dst.refcnt) < atomic_read(&rth_next->u.dst.refcnt))) {
-#if RT_CACHE_DEBUG >= 2
- printk("rt_check_expire bubbled %02x@%08x<->%08x\n", rover, rth->rt_dst, rth_next->rt_dst);
-#endif
- *rthp = rth_next;
- rth->u.rt_next = rth_next->u.rt_next;
- rth_next->u.rt_next = rth;
- rthp = &rth_next->u.rt_next;
- continue;
- }
+ tmo >>= 1;
rthp = &rth->u.rt_next;
}
+
+ if ((jiffies - now) > 0)
+ break;
}
rt_periodic_timer.expires = now + ip_rt_gc_interval;
add_timer(&rt_periodic_timer);
@@ -305,21 +302,14 @@
rt_deadline = 0;
for (i=0; i<RT_HASH_DIVISOR; i++) {
- int nr=0;
-
if ((rth = xchg(&rt_hash_table[i], NULL)) == NULL)
continue;
for (; rth; rth=next) {
next = rth->u.rt_next;
- nr++;
rth->u.rt_next = NULL;
rt_free(rth);
}
-#if RT_CACHE_DEBUG >= 2
- if (nr > 0)
- printk("rt_cache_flush: %d@%02x\n", nr, i);
-#endif
}
}
@@ -384,17 +374,23 @@
expire++;
for (i=0; i<RT_HASH_DIVISOR; i++) {
+ unsigned tmo;
if (!rt_hash_table[i])
continue;
+ tmo = expire;
for (rthp=&rt_hash_table[i]; (rth=*rthp); rthp=&rth->u.rt_next) {
if (atomic_read(&rth->u.dst.use) ||
- now - rth->u.dst.lastuse < expire)
+ (now - rth->u.dst.lastuse < tmo && !rt_fast_clean(rth))) {
+ tmo >>= 1;
continue;
+ }
*rthp = rth->u.rt_next;
rth->u.rt_next = NULL;
rt_free(rth);
break;
}
+ if ((jiffies-now)>0)
+ break;
}
last_gc = now;
@@ -412,8 +408,6 @@
struct rtable *rth, **rthp;
unsigned long now = jiffies;
- rt->u.dst.priority = rt_tos2priority(rt->key.tos);
-
start_bh_atomic();
rthp = &rt_hash_table[hash];
@@ -793,19 +787,17 @@
if (fi) {
if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
-#ifndef CONFIG_RTNL_OLD_IFINFO
rt->u.dst.mxlock = fi->fib_metrics[RTAX_LOCK-1];
rt->u.dst.pmtu = fi->fib_mtu;
if (fi->fib_mtu == 0) {
rt->u.dst.pmtu = rt->u.dst.dev->mtu;
+ if (rt->u.dst.pmtu > IP_MAX_MTU)
+ rt->u.dst.pmtu = IP_MAX_MTU;
if (rt->u.dst.mxlock&(1<<RTAX_MTU) &&
rt->rt_gateway != rt->rt_dst &&
rt->u.dst.pmtu > 576)
rt->u.dst.pmtu = 576;
}
-#else
- rt->u.dst.pmtu = fi->fib_mtu ? : rt->u.dst.dev->mtu;
-#endif
rt->u.dst.window= fi->fib_window ? : 0;
rt->u.dst.rtt = fi->fib_rtt ? : TCP_TIMEOUT_INIT;
#ifdef CONFIG_NET_CLS_ROUTE
@@ -813,6 +805,8 @@
#endif
} else {
rt->u.dst.pmtu = rt->u.dst.dev->mtu;
+ if (rt->u.dst.pmtu > IP_MAX_MTU)
+ rt->u.dst.pmtu = IP_MAX_MTU;
rt->u.dst.window= 0;
rt->u.dst.rtt = TCP_TIMEOUT_INIT;
}
@@ -930,7 +924,7 @@
if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
goto martian_source;
- if (daddr == 0xFFFFFFFF)
+ if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0))
goto brd_input;
/* Accept zero addresses only to limited broadcast;
@@ -991,6 +985,11 @@
fib_select_multipath(&key, &res);
#endif
out_dev = FIB_RES_DEV(res)->ip_ptr;
+ if (out_dev == NULL) {
+ if (net_ratelimit())
+ printk(KERN_CRIT "Bug in ip_route_input_slow(). Please, report\n");
+ return -EINVAL;
+ }
err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst);
if (err < 0)
@@ -1312,15 +1311,14 @@
tables are looked up with only one purpose:
to catch if destination is gatewayed, rather than
direct. Moreover, if MSG_DONTROUTE is set,
- we send packet, no matter of routing tables
- of ifaddr state. --ANK
+ we send packet, ignoring both routing tables
+ and ifaddr state. --ANK
We could make it even if oif is unknown,
likely IPv6, but we do not.
*/
- printk(KERN_DEBUG "Dest not on link. Forcing...\n");
if (key.src == 0)
key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK);
goto make_route;
@@ -1475,7 +1473,7 @@
#ifdef CONFIG_RTNETLINK
-static int rt_fill_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, int nowait)
+static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait)
{
struct rtable *rt = (struct rtable*)skb->dst;
struct rtmsg *r;
@@ -1485,11 +1483,7 @@
#ifdef CONFIG_IP_MROUTE
struct rtattr *eptr;
#endif
-#ifdef CONFIG_RTNL_OLD_IFINFO
- unsigned char *o;
-#else
struct rtattr *mx;
-#endif
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
r = NLMSG_DATA(nlh);
@@ -1503,11 +1497,6 @@
r->rtm_scope = RT_SCOPE_UNIVERSE;
r->rtm_protocol = RTPROT_UNSPEC;
r->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED;
-#ifdef CONFIG_RTNL_OLD_IFINFO
- r->rtm_nhs = 0;
-
- o = skb->tail;
-#endif
RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
if (rt->key.src) {
r->rtm_src_len = 32;
@@ -1521,11 +1510,6 @@
RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
if (rt->rt_dst != rt->rt_gateway)
RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
-#ifdef CONFIG_RTNL_OLD_IFINFO
- RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
- RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
- RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
-#else
mx = (struct rtattr*)skb->tail;
RTA_PUT(skb, RTA_METRICS, 0, NULL);
if (rt->u.dst.mxlock)
@@ -1539,7 +1523,6 @@
mx->rta_len = skb->tail - (u8*)mx;
if (mx->rta_len == RTA_LENGTH(0))
skb_trim(skb, (u8*)mx - skb->data);
-#endif
ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
ci.rta_used = atomic_read(&rt->u.dst.refcnt);
ci.rta_clntref = atomic_read(&rt->u.dst.use);
@@ -1549,9 +1532,6 @@
eptr = (struct rtattr*)skb->tail;
#endif
RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-#ifdef CONFIG_RTNL_OLD_IFINFO
- r->rtm_optlen = skb->tail - o;
-#endif
if (rt->key.iif) {
#ifdef CONFIG_IP_MROUTE
u32 dst = rt->rt_dst;
@@ -1573,9 +1553,6 @@
#endif
{
RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif);
-#ifdef CONFIG_RTNL_OLD_IFINFO
- r->rtm_optlen = skb->tail - o;
-#endif
}
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov