From a7c75b3dca87f10a8c008b217bf5d3a29f20632c Mon Sep 17 00:00:00 2001
From: Corey Hickey <bugfood-ml@fatooh.org>
Date: Sun, 27 Jan 2008 17:50:34 -0800
Subject: [PATCH 10/10] Add conntrack hash types.

Signed-off-by: Corey Hickey <bugfood-ml@fatooh.org>
---
 include/linux/pkt_sched.h |    8 +++++
 net/sched/Kconfig         |   11 +++++++
 net/sched/sch_sfq.c       |   74 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index f75907f..85fd94b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -169,8 +169,17 @@ enum
         TCA_SFQ_HASH_DST,
         TCA_SFQ_HASH_SRC,
         TCA_SFQ_HASH_FWMARK,
+	/* conntrack */
+        TCA_SFQ_HASH_CTORIGDST,
+        TCA_SFQ_HASH_CTORIGSRC,
+        TCA_SFQ_HASH_CTREPLDST,
+        TCA_SFQ_HASH_CTREPLSRC,
+        TCA_SFQ_HASH_CTNATCHG,
+        __TCA_SFQ_HASH_MAX,
 };
 
+#define TCA_SFQ_HASH_MAX (__TCA_SFQ_HASH_MAX - 1)
+
 /* RED section */
 
 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c15c48..49df24e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -139,6 +139,17 @@ config NET_SCH_SFQ
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_sfq.
 
+config NET_SCH_SFQ_NFCT
+	bool "Connection Tracking Hash Types"
+	depends on NET_SCH_SFQ && NF_CONNTRACK_ENABLED=y || NET_SCH_SFQ=m && NF_CONNTRACK_ENABLED=m
+	---help---
+	  Say Y here to enable support for hashing based on netfilter connection
+	  tracking information. This is useful for a router that is also using
+	  NAT to connect privately-addressed hosts to the Internet. If you want
+	  to provide fair distribution of upstream bandwidth, SFQ must use 
+	  connection tracking information, since all outgoing packets will share
+	  the same source address.
+
 config NET_SCH_TEQL
 	tristate "True Link Equalizer (TEQL)"
 	---help---
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 9fb6d4b..99230a3 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -24,6 +24,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <linux/jhash.h>
+#include <net/netfilter/nf_conntrack.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -119,6 +120,10 @@ struct sfq_packet_info
 	u32     src;            /* source from packet header */
 	u32     dst;            /* destination from packet header */
 	u32     mark;           /* netfilter mark (fwmark) */
+	u32     ctorigsrc;      /* original source from conntrack */
+	u32     ctorigdst;      /* original destination from conntrack */
+	u32     ctreplsrc;      /* reply source from conntrack */
+	u32     ctrepldst;      /* reply destination from conntrack */
 };
 
 
@@ -127,6 +132,10 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	struct sfq_packet_info info;
 	u32 pert = q->perturbation;
 	unsigned mask = (1<<q->hash_divisor) - 1;
+#ifdef CONFIG_NET_SCH_SFQ_NFCT
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+#endif
 
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
@@ -172,6 +181,42 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 
 	info.mark = skb->mark;
 
+#ifdef CONFIG_NET_SCH_SFQ_NFCT
+	/* defaults if there is no conntrack info */
+	info.ctorigsrc = info.src;
+	info.ctorigdst = info.dst;
+	info.ctreplsrc = info.dst;
+	info.ctrepldst = info.src;
+	/* collect conntrack info */
+	if (ct && ct != &nf_conntrack_untracked) {
+		if (skb->protocol == __constant_htons(ETH_P_IP)) {
+			info.ctorigsrc =
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+			info.ctorigdst =
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
+			info.ctreplsrc =
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;
+			info.ctrepldst =
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;
+		}
+		else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
+			/* Again, hash ipv6 addresses into a single u32. */
+			info.ctorigsrc = jhash2(
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6,
+			    4, pert);
+			info.ctorigdst = jhash2(
+			    ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6,
+			    4, pert);
+			info.ctreplsrc = jhash2(
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6,
+			    4, pert);
+			info.ctrepldst = jhash2(
+			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6,
+			    4, pert);
+		}
+	}
+#endif
+
 	switch (q->hash_kind) {
 	case TCA_SFQ_HASH_CLASSIC:
 		return jhash_3words(info.dst, info.src, info.proto, pert) & mask;
@@ -181,9 +226,31 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		return jhash_1word(info.src, pert) & mask;
 	case TCA_SFQ_HASH_FWMARK:
 		return jhash_1word(info.mark, pert) & mask;
+#ifdef CONFIG_NET_SCH_ESFQ_NFCT
+	case TCA_SFQ_HASH_CTORIGDST:
+		return jhash_1word(info.ctorigdst, pert) & mask;
+	case TCA_SFQ_HASH_CTORIGSRC:
+		return jhash_1word(info.ctorigsrc, pert) & mask;
+	case TCA_SFQ_HASH_CTREPLDST:
+		return jhash_1word(info.ctrepldst, pert) & mask;
+	case TCA_SFQ_HASH_CTREPLSRC:
+		return jhash_1word(info.ctreplsrc, pert) & mask;
+	case TCA_SFQ_HASH_CTNATCHG:
+	{
+		if (info.ctorigdst == info.ctreplsrc)
+			return jhash_1word(info.ctorigsrc, pert) & mask;
+		return jhash_1word(info.ctreplsrc, pert) & mask;
 	}
-	/* sfq_q_init makes sure the hash is known,
-	 * so this should never happen */
+#else
+	case TCA_SFQ_HASH_CTORIGDST:
+	case TCA_SFQ_HASH_CTORIGSRC:
+	case TCA_SFQ_HASH_CTREPLDST:
+	case TCA_SFQ_HASH_CTREPLSRC:
+	case TCA_SFQ_HASH_CTNATCHG:
+	if (net_ratelimit())
+		printk(KERN_WARNING "SFQ: Conntrack support not enabled.");
+#endif
+ 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "SFQ: Unknown hash method. "
 		                    "Falling back to classic.\n");
@@ -546,7 +613,8 @@ sfq_q_init(struct sfq_sched_data *q, struct rtattr *opt)
 			goto rtattr_failure;
 
 		if (q->depth        > SFQ_MAX_DEPTH ||
-		    q->hash_divisor > SFQ_MAX_DIVISOR)
+		    q->hash_divisor > SFQ_MAX_DIVISOR ||
+		    q->hash_kind    > TCA_SFQ_HASH_MAX)
 			return -EINVAL;
 	}
 	q->limit = min_t(u32, q->limit, q->depth - 1);
-- 
1.5.3.8

