620-sched_esfq.patch 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. --- a/include/uapi/linux/pkt_sched.h
  2. +++ b/include/uapi/linux/pkt_sched.h
  3. @@ -226,6 +226,33 @@ struct tc_sfq_xstats {
  4. __s32 allot;
  5. };
  6. +/* ESFQ section */
  7. +
  8. +enum
  9. +{
  10. + /* traditional */
  11. + TCA_SFQ_HASH_CLASSIC,
  12. + TCA_SFQ_HASH_DST,
  13. + TCA_SFQ_HASH_SRC,
  14. + TCA_SFQ_HASH_FWMARK,
  15. + /* conntrack */
  16. + TCA_SFQ_HASH_CTORIGDST,
  17. + TCA_SFQ_HASH_CTORIGSRC,
  18. + TCA_SFQ_HASH_CTREPLDST,
  19. + TCA_SFQ_HASH_CTREPLSRC,
  20. + TCA_SFQ_HASH_CTNATCHG,
  21. +};
  22. +
  23. +struct tc_esfq_qopt
  24. +{
  25. + unsigned quantum; /* Bytes per round allocated to flow */
  26. + int perturb_period; /* Period of hash perturbation */
  27. + __u32 limit; /* Maximal packets in queue */
  28. + unsigned divisor; /* Hash divisor */
  29. + unsigned flows; /* Maximal number of flows */
  30. + unsigned hash_kind; /* Hash function to use for flow identification */
  31. +};
  32. +
  33. /* RED section */
  34. enum {
  35. --- a/net/sched/Kconfig
  36. +++ b/net/sched/Kconfig
  37. @@ -148,6 +148,37 @@ config NET_SCH_SFQ
  38. To compile this code as a module, choose M here: the
  39. module will be called sch_sfq.
  40. +config NET_SCH_ESFQ
  41. + tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"
  42. + ---help---
  43. + Say Y here if you want to use the Enhanced Stochastic Fairness
  44. + Queueing (ESFQ) packet scheduling algorithm for some of your network
  45. + devices or as a leaf discipline for a classful qdisc such as HTB or
  46. + CBQ (see the top of <file:net/sched/sch_esfq.c> for details and
  47. + references to the SFQ algorithm).
  48. +
  49. + This is an enchanced SFQ version which allows you to control some
  50. + hardcoded values in the SFQ scheduler.
  51. +
  52. + ESFQ also adds control of the hash function used to identify packet
  53. + flows. The original SFQ discipline hashes by connection; ESFQ add
  54. + several other hashing methods, such as by src IP or by dst IP, which
  55. + can be more fair to users in some networking situations.
  56. +
  57. + To compile this code as a module, choose M here: the
  58. + module will be called sch_esfq.
  59. +
  60. +config NET_SCH_ESFQ_NFCT
  61. + bool "Connection Tracking Hash Types"
  62. + depends on NET_SCH_ESFQ && NF_CONNTRACK
  63. + ---help---
  64. + Say Y here to enable support for hashing based on netfilter connection
  65. + tracking information. This is useful for a router that is also using
  66. + NAT to connect privately-addressed hosts to the Internet. If you want
  67. + to provide fair distribution of upstream bandwidth, ESFQ must use
  68. + connection tracking information, since all outgoing packets will share
  69. + the same source address.
  70. +
  71. config NET_SCH_TEQL
  72. tristate "True Link Equalizer (TEQL)"
  73. ---help---
  74. --- a/net/sched/Makefile
  75. +++ b/net/sched/Makefile
  76. @@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ing
  77. obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
  78. obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
  79. obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
  80. +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
  81. obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
  82. obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
  83. obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
  84. --- /dev/null
  85. +++ b/net/sched/sch_esfq.c
  86. @@ -0,0 +1,702 @@
  87. +/*
  88. + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
  89. + *
  90. + * This program is free software; you can redistribute it and/or
  91. + * modify it under the terms of the GNU General Public License
  92. + * as published by the Free Software Foundation; either version
  93. + * 2 of the License, or (at your option) any later version.
  94. + *
  95. + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  96. + *
  97. + * Changes: Alexander Atanasov, <alex@ssi.bg>
  98. + * Added dynamic depth,limit,divisor,hash_kind options.
  99. + * Added dst and src hashes.
  100. + *
  101. + * Alexander Clouter, <alex@digriz.org.uk>
  102. + * Ported ESFQ to Linux 2.6.
  103. + *
  104. + * Corey Hickey, <bugfood-c@fatooh.org>
  105. + * Maintenance of the Linux 2.6 port.
  106. + * Added fwmark hash (thanks to Robert Kurjata).
  107. + * Added usage of jhash.
  108. + * Added conntrack support.
  109. + * Added ctnatchg hash (thanks to Ben Pfountz).
  110. + */
  111. +
  112. +#include <linux/module.h>
  113. +#include <asm/uaccess.h>
  114. +#include <linux/bitops.h>
  115. +#include <linux/types.h>
  116. +#include <linux/kernel.h>
  117. +#include <linux/jiffies.h>
  118. +#include <linux/string.h>
  119. +#include <linux/mm.h>
  120. +#include <linux/socket.h>
  121. +#include <linux/sockios.h>
  122. +#include <linux/in.h>
  123. +#include <linux/errno.h>
  124. +#include <linux/interrupt.h>
  125. +#include <linux/if_ether.h>
  126. +#include <linux/inet.h>
  127. +#include <linux/netdevice.h>
  128. +#include <linux/etherdevice.h>
  129. +#include <linux/notifier.h>
  130. +#include <linux/init.h>
  131. +#include <net/ip.h>
  132. +#include <net/netlink.h>
  133. +#include <linux/ipv6.h>
  134. +#include <net/route.h>
  135. +#include <linux/skbuff.h>
  136. +#include <net/sock.h>
  137. +#include <net/pkt_sched.h>
  138. +#include <linux/jhash.h>
  139. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  140. +#include <net/netfilter/nf_conntrack.h>
  141. +#endif
  142. +
  143. +/* Stochastic Fairness Queuing algorithm.
  144. + For more comments look at sch_sfq.c.
  145. + The difference is that you can change limit, depth,
  146. + hash table size and choose alternate hash types.
  147. +
  148. + classic: same as in sch_sfq.c
  149. + dst: destination IP address
  150. + src: source IP address
  151. + fwmark: netfilter mark value
  152. + ctorigdst: original destination IP address
  153. + ctorigsrc: original source IP address
  154. + ctrepldst: reply destination IP address
  155. + ctreplsrc: reply source IP
  156. +
  157. +*/
  158. +
  159. +#define ESFQ_HEAD 0
  160. +#define ESFQ_TAIL 1
  161. +
  162. +/* This type should contain at least SFQ_DEPTH*2 values */
  163. +typedef unsigned int esfq_index;
  164. +
  165. +struct esfq_head
  166. +{
  167. + esfq_index next;
  168. + esfq_index prev;
  169. +};
  170. +
  171. +struct esfq_sched_data
  172. +{
  173. +/* Parameters */
  174. + int perturb_period;
  175. + unsigned quantum; /* Allotment per round: MUST BE >= MTU */
  176. + int limit;
  177. + unsigned depth;
  178. + unsigned hash_divisor;
  179. + unsigned hash_kind;
  180. +/* Variables */
  181. + struct timer_list perturb_timer;
  182. + int perturbation;
  183. + esfq_index tail; /* Index of current slot in round */
  184. + esfq_index max_depth; /* Maximal depth */
  185. +
  186. + esfq_index *ht; /* Hash table */
  187. + esfq_index *next; /* Active slots link */
  188. + short *allot; /* Current allotment per slot */
  189. + unsigned short *hash; /* Hash value indexed by slots */
  190. + struct sk_buff_head *qs; /* Slot queue */
  191. + struct esfq_head *dep; /* Linked list of slots, indexed by depth */
  192. +};
  193. +
  194. +/* This contains the info we will hash. */
  195. +struct esfq_packet_info
  196. +{
  197. + u32 proto; /* protocol or port */
  198. + u32 src; /* source from packet header */
  199. + u32 dst; /* destination from packet header */
  200. + u32 ctorigsrc; /* original source from conntrack */
  201. + u32 ctorigdst; /* original destination from conntrack */
  202. + u32 ctreplsrc; /* reply source from conntrack */
  203. + u32 ctrepldst; /* reply destination from conntrack */
  204. + u32 mark; /* netfilter mark (fwmark) */
  205. +};
  206. +
  207. +static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)
  208. +{
  209. + return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);
  210. +}
  211. +
  212. +static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)
  213. +{
  214. + return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);
  215. +}
  216. +
  217. +static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)
  218. +{
  219. + return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);
  220. +}
  221. +
  222. +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
  223. +{
  224. + struct esfq_packet_info info;
  225. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  226. + enum ip_conntrack_info ctinfo;
  227. + struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
  228. +#endif
  229. +
  230. + switch (skb->protocol) {
  231. + case __constant_htons(ETH_P_IP):
  232. + {
  233. + struct iphdr *iph = ip_hdr(skb);
  234. + info.dst = iph->daddr;
  235. + info.src = iph->saddr;
  236. + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
  237. + (iph->protocol == IPPROTO_TCP ||
  238. + iph->protocol == IPPROTO_UDP ||
  239. + iph->protocol == IPPROTO_SCTP ||
  240. + iph->protocol == IPPROTO_DCCP ||
  241. + iph->protocol == IPPROTO_ESP))
  242. + info.proto = *(((u32*)iph) + iph->ihl);
  243. + else
  244. + info.proto = iph->protocol;
  245. + break;
  246. + }
  247. + case __constant_htons(ETH_P_IPV6):
  248. + {
  249. + struct ipv6hdr *iph = ipv6_hdr(skb);
  250. + /* Hash ipv6 addresses into a u32. This isn't ideal,
  251. + * but the code is simple. */
  252. + info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation);
  253. + info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);
  254. + if (iph->nexthdr == IPPROTO_TCP ||
  255. + iph->nexthdr == IPPROTO_UDP ||
  256. + iph->nexthdr == IPPROTO_SCTP ||
  257. + iph->nexthdr == IPPROTO_DCCP ||
  258. + iph->nexthdr == IPPROTO_ESP)
  259. + info.proto = *(u32*)&iph[1];
  260. + else
  261. + info.proto = iph->nexthdr;
  262. + break;
  263. + }
  264. + default:
  265. + info.dst = (u32)(unsigned long)skb_dst(skb);
  266. + info.src = (u32)(unsigned long)skb->sk;
  267. + info.proto = skb->protocol;
  268. + }
  269. +
  270. + info.mark = skb->mark;
  271. +
  272. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  273. + /* defaults if there is no conntrack info */
  274. + info.ctorigsrc = info.src;
  275. + info.ctorigdst = info.dst;
  276. + info.ctreplsrc = info.dst;
  277. + info.ctrepldst = info.src;
  278. + /* collect conntrack info */
  279. + if (ct && ct != &nf_conntrack_untracked) {
  280. + if (skb->protocol == __constant_htons(ETH_P_IP)) {
  281. + info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
  282. + info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
  283. + info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;
  284. + info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;
  285. + }
  286. + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
  287. + /* Again, hash ipv6 addresses into a single u32. */
  288. + info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation);
  289. + info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation);
  290. + info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation);
  291. + info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation);
  292. + }
  293. +
  294. + }
  295. +#endif
  296. +
  297. + switch(q->hash_kind) {
  298. + case TCA_SFQ_HASH_CLASSIC:
  299. + return esfq_jhash_3words(q, info.dst, info.src, info.proto);
  300. + case TCA_SFQ_HASH_DST:
  301. + return esfq_jhash_1word(q, info.dst);
  302. + case TCA_SFQ_HASH_SRC:
  303. + return esfq_jhash_1word(q, info.src);
  304. + case TCA_SFQ_HASH_FWMARK:
  305. + return esfq_jhash_1word(q, info.mark);
  306. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  307. + case TCA_SFQ_HASH_CTORIGDST:
  308. + return esfq_jhash_1word(q, info.ctorigdst);
  309. + case TCA_SFQ_HASH_CTORIGSRC:
  310. + return esfq_jhash_1word(q, info.ctorigsrc);
  311. + case TCA_SFQ_HASH_CTREPLDST:
  312. + return esfq_jhash_1word(q, info.ctrepldst);
  313. + case TCA_SFQ_HASH_CTREPLSRC:
  314. + return esfq_jhash_1word(q, info.ctreplsrc);
  315. + case TCA_SFQ_HASH_CTNATCHG:
  316. + {
  317. + if (info.ctorigdst == info.ctreplsrc)
  318. + return esfq_jhash_1word(q, info.ctorigsrc);
  319. + return esfq_jhash_1word(q, info.ctreplsrc);
  320. + }
  321. +#endif
  322. + default:
  323. + if (net_ratelimit())
  324. + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
  325. + }
  326. + return esfq_jhash_3words(q, info.dst, info.src, info.proto);
  327. +}
  328. +
  329. +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
  330. +{
  331. + esfq_index p, n;
  332. + int d = q->qs[x].qlen + q->depth;
  333. +
  334. + p = d;
  335. + n = q->dep[d].next;
  336. + q->dep[x].next = n;
  337. + q->dep[x].prev = p;
  338. + q->dep[p].next = q->dep[n].prev = x;
  339. +}
  340. +
  341. +static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
  342. +{
  343. + esfq_index p, n;
  344. +
  345. + n = q->dep[x].next;
  346. + p = q->dep[x].prev;
  347. + q->dep[p].next = n;
  348. + q->dep[n].prev = p;
  349. +
  350. + if (n == p && q->max_depth == q->qs[x].qlen + 1)
  351. + q->max_depth--;
  352. +
  353. + esfq_link(q, x);
  354. +}
  355. +
  356. +static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
  357. +{
  358. + esfq_index p, n;
  359. + int d;
  360. +
  361. + n = q->dep[x].next;
  362. + p = q->dep[x].prev;
  363. + q->dep[p].next = n;
  364. + q->dep[n].prev = p;
  365. + d = q->qs[x].qlen;
  366. + if (q->max_depth < d)
  367. + q->max_depth = d;
  368. +
  369. + esfq_link(q, x);
  370. +}
  371. +
  372. +static unsigned int esfq_drop(struct Qdisc *sch)
  373. +{
  374. + struct esfq_sched_data *q = qdisc_priv(sch);
  375. + esfq_index d = q->max_depth;
  376. + struct sk_buff *skb;
  377. + unsigned int len;
  378. +
  379. + /* Queue is full! Find the longest slot and
  380. + drop a packet from it */
  381. +
  382. + if (d > 1) {
  383. + esfq_index x = q->dep[d+q->depth].next;
  384. + skb = q->qs[x].prev;
  385. + len = skb->len;
  386. + __skb_unlink(skb, &q->qs[x]);
  387. + kfree_skb(skb);
  388. + esfq_dec(q, x);
  389. + sch->q.qlen--;
  390. + sch->qstats.drops++;
  391. + sch->qstats.backlog -= len;
  392. + return len;
  393. + }
  394. +
  395. + if (d == 1) {
  396. + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
  397. + d = q->next[q->tail];
  398. + q->next[q->tail] = q->next[d];
  399. + q->allot[q->next[d]] += q->quantum;
  400. + skb = q->qs[d].prev;
  401. + len = skb->len;
  402. + __skb_unlink(skb, &q->qs[d]);
  403. + kfree_skb(skb);
  404. + esfq_dec(q, d);
  405. + sch->q.qlen--;
  406. + q->ht[q->hash[d]] = q->depth;
  407. + sch->qstats.drops++;
  408. + sch->qstats.backlog -= len;
  409. + return len;
  410. + }
  411. +
  412. + return 0;
  413. +}
  414. +
  415. +static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end)
  416. +{
  417. + unsigned hash = esfq_hash(q, skb);
  418. + unsigned depth = q->depth;
  419. + esfq_index x;
  420. +
  421. + x = q->ht[hash];
  422. + if (x == depth) {
  423. + q->ht[hash] = x = q->dep[depth].next;
  424. + q->hash[x] = hash;
  425. + }
  426. +
  427. + if (end == ESFQ_TAIL)
  428. + __skb_queue_tail(&q->qs[x], skb);
  429. + else
  430. + __skb_queue_head(&q->qs[x], skb);
  431. +
  432. + esfq_inc(q, x);
  433. + if (q->qs[x].qlen == 1) { /* The flow is new */
  434. + if (q->tail == depth) { /* It is the first flow */
  435. + q->tail = x;
  436. + q->next[x] = x;
  437. + q->allot[x] = q->quantum;
  438. + } else {
  439. + q->next[x] = q->next[q->tail];
  440. + q->next[q->tail] = x;
  441. + q->tail = x;
  442. + }
  443. + }
  444. +}
  445. +
  446. +static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  447. +{
  448. + struct esfq_sched_data *q = qdisc_priv(sch);
  449. + esfq_q_enqueue(skb, q, ESFQ_TAIL);
  450. + sch->qstats.backlog += skb->len;
  451. + if (++sch->q.qlen < q->limit-1) {
  452. + sch->bstats.bytes += skb->len;
  453. + sch->bstats.packets++;
  454. + return 0;
  455. + }
  456. +
  457. + sch->qstats.drops++;
  458. + esfq_drop(sch);
  459. + return NET_XMIT_CN;
  460. +}
  461. +
  462. +static struct sk_buff *esfq_peek(struct Qdisc* sch)
  463. +{
  464. + struct esfq_sched_data *q = qdisc_priv(sch);
  465. + esfq_index a;
  466. +
  467. + /* No active slots */
  468. + if (q->tail == q->depth)
  469. + return NULL;
  470. +
  471. + a = q->next[q->tail];
  472. + return skb_peek(&q->qs[a]);
  473. +}
  474. +
  475. +static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q)
  476. +{
  477. + struct sk_buff *skb;
  478. + unsigned depth = q->depth;
  479. + esfq_index a, old_a;
  480. +
  481. + /* No active slots */
  482. + if (q->tail == depth)
  483. + return NULL;
  484. +
  485. + a = old_a = q->next[q->tail];
  486. +
  487. + /* Grab packet */
  488. + skb = __skb_dequeue(&q->qs[a]);
  489. + esfq_dec(q, a);
  490. +
  491. + /* Is the slot empty? */
  492. + if (q->qs[a].qlen == 0) {
  493. + q->ht[q->hash[a]] = depth;
  494. + a = q->next[a];
  495. + if (a == old_a) {
  496. + q->tail = depth;
  497. + return skb;
  498. + }
  499. + q->next[q->tail] = a;
  500. + q->allot[a] += q->quantum;
  501. + } else if ((q->allot[a] -= skb->len) <= 0) {
  502. + q->tail = a;
  503. + a = q->next[a];
  504. + q->allot[a] += q->quantum;
  505. + }
  506. +
  507. + return skb;
  508. +}
  509. +
  510. +static struct sk_buff *esfq_dequeue(struct Qdisc* sch)
  511. +{
  512. + struct esfq_sched_data *q = qdisc_priv(sch);
  513. + struct sk_buff *skb;
  514. +
  515. + skb = esfq_q_dequeue(q);
  516. + if (skb == NULL)
  517. + return NULL;
  518. + sch->q.qlen--;
  519. + sch->qstats.backlog -= skb->len;
  520. + return skb;
  521. +}
  522. +
  523. +static void esfq_q_destroy(struct esfq_sched_data *q)
  524. +{
  525. + del_timer(&q->perturb_timer);
  526. + if(q->ht)
  527. + kfree(q->ht);
  528. + if(q->dep)
  529. + kfree(q->dep);
  530. + if(q->next)
  531. + kfree(q->next);
  532. + if(q->allot)
  533. + kfree(q->allot);
  534. + if(q->hash)
  535. + kfree(q->hash);
  536. + if(q->qs)
  537. + kfree(q->qs);
  538. +}
  539. +
  540. +static void esfq_destroy(struct Qdisc *sch)
  541. +{
  542. + struct esfq_sched_data *q = qdisc_priv(sch);
  543. + esfq_q_destroy(q);
  544. +}
  545. +
  546. +
  547. +static void esfq_reset(struct Qdisc* sch)
  548. +{
  549. + struct sk_buff *skb;
  550. +
  551. + while ((skb = esfq_dequeue(sch)) != NULL)
  552. + kfree_skb(skb);
  553. +}
  554. +
  555. +static void esfq_perturbation(unsigned long arg)
  556. +{
  557. + struct Qdisc *sch = (struct Qdisc*)arg;
  558. + struct esfq_sched_data *q = qdisc_priv(sch);
  559. +
  560. + q->perturbation = prandom_u32()&0x1F;
  561. +
  562. + if (q->perturb_period) {
  563. + q->perturb_timer.expires = jiffies + q->perturb_period;
  564. + add_timer(&q->perturb_timer);
  565. + }
  566. +}
  567. +
  568. +static unsigned int esfq_check_hash(unsigned int kind)
  569. +{
  570. + switch (kind) {
  571. + case TCA_SFQ_HASH_CTORIGDST:
  572. + case TCA_SFQ_HASH_CTORIGSRC:
  573. + case TCA_SFQ_HASH_CTREPLDST:
  574. + case TCA_SFQ_HASH_CTREPLSRC:
  575. + case TCA_SFQ_HASH_CTNATCHG:
  576. +#ifndef CONFIG_NET_SCH_ESFQ_NFCT
  577. + {
  578. + if (net_ratelimit())
  579. + printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n");
  580. + return TCA_SFQ_HASH_CLASSIC;
  581. + }
  582. +#endif
  583. + case TCA_SFQ_HASH_CLASSIC:
  584. + case TCA_SFQ_HASH_DST:
  585. + case TCA_SFQ_HASH_SRC:
  586. + case TCA_SFQ_HASH_FWMARK:
  587. + return kind;
  588. + default:
  589. + {
  590. + if (net_ratelimit())
  591. + printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n");
  592. + return TCA_SFQ_HASH_CLASSIC;
  593. + }
  594. + }
  595. +}
  596. +
  597. +static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt)
  598. +{
  599. + struct tc_esfq_qopt *ctl = nla_data(opt);
  600. + esfq_index p = ~0U/2;
  601. + int i;
  602. +
  603. + if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl)))
  604. + return -EINVAL;
  605. +
  606. + q->perturbation = 0;
  607. + q->hash_kind = TCA_SFQ_HASH_CLASSIC;
  608. + q->max_depth = 0;
  609. + if (opt == NULL) {
  610. + q->perturb_period = 0;
  611. + q->hash_divisor = 1024;
  612. + q->tail = q->limit = q->depth = 128;
  613. +
  614. + } else {
  615. + struct tc_esfq_qopt *ctl = nla_data(opt);
  616. + if (ctl->quantum)
  617. + q->quantum = ctl->quantum;
  618. + q->perturb_period = ctl->perturb_period*HZ;
  619. + q->hash_divisor = ctl->divisor ? : 1024;
  620. + q->tail = q->limit = q->depth = ctl->flows ? : 128;
  621. +
  622. + if ( q->depth > p - 1 )
  623. + return -EINVAL;
  624. +
  625. + if (ctl->limit)
  626. + q->limit = min_t(u32, ctl->limit, q->depth);
  627. +
  628. + if (ctl->hash_kind) {
  629. + q->hash_kind = esfq_check_hash(ctl->hash_kind);
  630. + }
  631. + }
  632. +
  633. + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
  634. + if (!q->ht)
  635. + goto err_case;
  636. + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
  637. + if (!q->dep)
  638. + goto err_case;
  639. + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
  640. + if (!q->next)
  641. + goto err_case;
  642. + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
  643. + if (!q->allot)
  644. + goto err_case;
  645. + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
  646. + if (!q->hash)
  647. + goto err_case;
  648. + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
  649. + if (!q->qs)
  650. + goto err_case;
  651. +
  652. + for (i=0; i< q->hash_divisor; i++)
  653. + q->ht[i] = q->depth;
  654. + for (i=0; i<q->depth; i++) {
  655. + skb_queue_head_init(&q->qs[i]);
  656. + q->dep[i+q->depth].next = i+q->depth;
  657. + q->dep[i+q->depth].prev = i+q->depth;
  658. + }
  659. +
  660. + for (i=0; i<q->depth; i++)
  661. + esfq_link(q, i);
  662. + return 0;
  663. +err_case:
  664. + esfq_q_destroy(q);
  665. + return -ENOBUFS;
  666. +}
  667. +
  668. +static int esfq_init(struct Qdisc *sch, struct nlattr *opt)
  669. +{
  670. + struct esfq_sched_data *q = qdisc_priv(sch);
  671. + int err;
  672. +
  673. + q->quantum = psched_mtu(qdisc_dev(sch)); /* default */
  674. + if ((err = esfq_q_init(q, opt)))
  675. + return err;
  676. +
  677. + init_timer(&q->perturb_timer);
  678. + q->perturb_timer.data = (unsigned long)sch;
  679. + q->perturb_timer.function = esfq_perturbation;
  680. + if (q->perturb_period) {
  681. + q->perturb_timer.expires = jiffies + q->perturb_period;
  682. + add_timer(&q->perturb_timer);
  683. + }
  684. +
  685. + return 0;
  686. +}
  687. +
  688. +static int esfq_change(struct Qdisc *sch, struct nlattr *opt)
  689. +{
  690. + struct esfq_sched_data *q = qdisc_priv(sch);
  691. + struct esfq_sched_data new;
  692. + struct sk_buff *skb;
  693. + int err;
  694. +
  695. + /* set up new queue */
  696. + memset(&new, 0, sizeof(struct esfq_sched_data));
  697. + new.quantum = psched_mtu(qdisc_dev(sch)); /* default */
  698. + if ((err = esfq_q_init(&new, opt)))
  699. + return err;
  700. +
  701. + /* copy all packets from the old queue to the new queue */
  702. + sch_tree_lock(sch);
  703. + while ((skb = esfq_q_dequeue(q)) != NULL)
  704. + esfq_q_enqueue(skb, &new, ESFQ_TAIL);
  705. +
  706. + /* clean up the old queue */
  707. + esfq_q_destroy(q);
  708. +
  709. + /* copy elements of the new queue into the old queue */
  710. + q->perturb_period = new.perturb_period;
  711. + q->quantum = new.quantum;
  712. + q->limit = new.limit;
  713. + q->depth = new.depth;
  714. + q->hash_divisor = new.hash_divisor;
  715. + q->hash_kind = new.hash_kind;
  716. + q->tail = new.tail;
  717. + q->max_depth = new.max_depth;
  718. + q->ht = new.ht;
  719. + q->dep = new.dep;
  720. + q->next = new.next;
  721. + q->allot = new.allot;
  722. + q->hash = new.hash;
  723. + q->qs = new.qs;
  724. +
  725. + /* finish up */
  726. + if (q->perturb_period) {
  727. + q->perturb_timer.expires = jiffies + q->perturb_period;
  728. + add_timer(&q->perturb_timer);
  729. + } else {
  730. + q->perturbation = 0;
  731. + }
  732. + sch_tree_unlock(sch);
  733. + return 0;
  734. +}
  735. +
  736. +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
  737. +{
  738. + struct esfq_sched_data *q = qdisc_priv(sch);
  739. + unsigned char *b = skb_tail_pointer(skb);
  740. + struct tc_esfq_qopt opt;
  741. +
  742. + opt.quantum = q->quantum;
  743. + opt.perturb_period = q->perturb_period/HZ;
  744. +
  745. + opt.limit = q->limit;
  746. + opt.divisor = q->hash_divisor;
  747. + opt.flows = q->depth;
  748. + opt.hash_kind = q->hash_kind;
  749. +
  750. + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
  751. + goto nla_put_failure;
  752. +
  753. + return skb->len;
  754. +
  755. +nla_put_failure:
  756. + nlmsg_trim(skb, b);
  757. + return -1;
  758. +}
  759. +
  760. +static struct Qdisc_ops esfq_qdisc_ops =
  761. +{
  762. + .next = NULL,
  763. + .cl_ops = NULL,
  764. + .id = "esfq",
  765. + .priv_size = sizeof(struct esfq_sched_data),
  766. + .enqueue = esfq_enqueue,
  767. + .dequeue = esfq_dequeue,
  768. + .peek = esfq_peek,
  769. + .drop = esfq_drop,
  770. + .init = esfq_init,
  771. + .reset = esfq_reset,
  772. + .destroy = esfq_destroy,
  773. + .change = esfq_change,
  774. + .dump = esfq_dump,
  775. + .owner = THIS_MODULE,
  776. +};
  777. +
  778. +static int __init esfq_module_init(void)
  779. +{
  780. + return register_qdisc(&esfq_qdisc_ops);
  781. +}
  782. +static void __exit esfq_module_exit(void)
  783. +{
  784. + unregister_qdisc(&esfq_qdisc_ops);
  785. +}
  786. +module_init(esfq_module_init)
  787. +module_exit(esfq_module_exit)
  788. +MODULE_LICENSE("GPL");