080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch 12 KB


  1. From: Alexander Duyck <alexander.h.duyck@redhat.com>
  2. Date: Wed, 31 Dec 2014 10:56:12 -0800
  3. Subject: [PATCH] fib_trie: Update meaning of pos to represent unchecked
  4. bits
  5. This change moves the pos value to the other side of the "bits" field. By
  6. doing this it actually simplifies a significant amount of code in the trie.
  7. For example when halving a tree we know that the bit lost exists at
  8. oldnode->pos, and if we inflate the tree the new bit being add is at
  9. tn->pos. Previously to find those bits you would have to subtract pos and
  10. bits from the keylength or start with a value of (1 << 31) and then shift
  11. that.
  12. There are a number of spots throughout the code that benefit from this. In
  13. the case of the hot-path searches the main advantage is that we can drop 2
  14. or more operations from the search path as we no longer need to compute the
  15. value for the index to be shifted by and can instead just use the raw pos
  16. value.
  17. In addition the tkey_extract_bits is now defunct and can be replaced by
  18. get_index since the two operations were doing the same thing, but now
  19. get_index does it much more quickly as it is only an xor and shift versus a
  20. pair of shifts and a subtraction.
  21. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
  22. Signed-off-by: David S. Miller <davem@davemloft.net>
  23. ---
  24. --- a/net/ipv4/fib_trie.c
  25. +++ b/net/ipv4/fib_trie.c
  26. @@ -90,8 +90,7 @@ typedef unsigned int t_key;
  27. #define IS_TNODE(n) ((n)->bits)
  28. #define IS_LEAF(n) (!(n)->bits)
  29. -#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
  30. -#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
  31. +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
  32. struct tnode {
  33. t_key key;
  34. @@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_ch
  35. return rcu_dereference_rtnl(tn->child[i]);
  36. }
  37. -static inline t_key mask_pfx(t_key k, unsigned int l)
  38. -{
  39. - return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
  40. -}
  41. -
  42. -static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
  43. -{
  44. - if (offset < KEYLENGTH)
  45. - return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
  46. - else
  47. - return 0;
  48. -}
  49. -
  50. -/*
  51. - To understand this stuff, an understanding of keys and all their bits is
  52. - necessary. Every node in the trie has a key associated with it, but not
  53. - all of the bits in that key are significant.
  54. -
  55. - Consider a node 'n' and its parent 'tp'.
  56. -
  57. - If n is a leaf, every bit in its key is significant. Its presence is
  58. - necessitated by path compression, since during a tree traversal (when
  59. - searching for a leaf - unless we are doing an insertion) we will completely
  60. - ignore all skipped bits we encounter. Thus we need to verify, at the end of
  61. - a potentially successful search, that we have indeed been walking the
  62. - correct key path.
  63. -
  64. - Note that we can never "miss" the correct key in the tree if present by
  65. - following the wrong path. Path compression ensures that segments of the key
  66. - that are the same for all keys with a given prefix are skipped, but the
  67. - skipped part *is* identical for each node in the subtrie below the skipped
  68. - bit! trie_insert() in this implementation takes care of that - note the
  69. - call to tkey_sub_equals() in trie_insert().
  70. -
  71. - if n is an internal node - a 'tnode' here, the various parts of its key
  72. - have many different meanings.
  73. -
  74. - Example:
  75. - _________________________________________________________________
  76. - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
  77. - -----------------------------------------------------------------
  78. - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  79. -
  80. - _________________________________________________________________
  81. - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
  82. - -----------------------------------------------------------------
  83. - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
  84. -
  85. - tp->pos = 7
  86. - tp->bits = 3
  87. - n->pos = 15
  88. - n->bits = 4
  89. -
  90. - First, let's just ignore the bits that come before the parent tp, that is
  91. - the bits from 0 to (tp->pos-1). They are *known* but at this point we do
  92. - not use them for anything.
  93. -
  94. - The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
  95. - index into the parent's child array. That is, they will be used to find
  96. - 'n' among tp's children.
  97. -
  98. - The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
  99. - for the node n.
  100. -
  101. - All the bits we have seen so far are significant to the node n. The rest
  102. - of the bits are really not needed or indeed known in n->key.
  103. -
  104. - The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
  105. - n's child array, and will of course be different for each child.
  106. -
  107. -
  108. - The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
  109. - at this point.
  110. -
  111. -*/
  112. +/* To understand this stuff, an understanding of keys and all their bits is
  113. + * necessary. Every node in the trie has a key associated with it, but not
  114. + * all of the bits in that key are significant.
  115. + *
  116. + * Consider a node 'n' and its parent 'tp'.
  117. + *
  118. + * If n is a leaf, every bit in its key is significant. Its presence is
  119. + * necessitated by path compression, since during a tree traversal (when
  120. + * searching for a leaf - unless we are doing an insertion) we will completely
  121. + * ignore all skipped bits we encounter. Thus we need to verify, at the end of
  122. + * a potentially successful search, that we have indeed been walking the
  123. + * correct key path.
  124. + *
  125. + * Note that we can never "miss" the correct key in the tree if present by
  126. + * following the wrong path. Path compression ensures that segments of the key
  127. + * that are the same for all keys with a given prefix are skipped, but the
  128. + * skipped part *is* identical for each node in the subtrie below the skipped
  129. + * bit! trie_insert() in this implementation takes care of that.
  130. + *
  131. + * if n is an internal node - a 'tnode' here, the various parts of its key
  132. + * have many different meanings.
  133. + *
  134. + * Example:
  135. + * _________________________________________________________________
  136. + * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
  137. + * -----------------------------------------------------------------
  138. + * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
  139. + *
  140. + * _________________________________________________________________
  141. + * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
  142. + * -----------------------------------------------------------------
  143. + * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
  144. + *
  145. + * tp->pos = 22
  146. + * tp->bits = 3
  147. + * n->pos = 13
  148. + * n->bits = 4
  149. + *
  150. + * First, let's just ignore the bits that come before the parent tp, that is
  151. + * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
  152. + * point we do not use them for anything.
  153. + *
  154. + * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
  155. + * index into the parent's child array. That is, they will be used to find
  156. + * 'n' among tp's children.
  157. + *
  158. + * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
  159. + * for the node n.
  160. + *
  161. + * All the bits we have seen so far are significant to the node n. The rest
  162. + * of the bits are really not needed or indeed known in n->key.
  163. + *
  164. + * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
  165. + * n's child array, and will of course be different for each child.
  166. + *
  167. + * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
  168. + * at this point.
  169. + */
  170. static const int halve_threshold = 25;
  171. static const int inflate_threshold = 50;
  172. @@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
  173. * as the nodes are searched
  174. */
  175. l->key = key;
  176. - l->pos = KEYLENGTH;
  177. + l->pos = 0;
  178. /* set bits to 0 indicating we are not a tnode */
  179. l->bits = 0;
  180. @@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key
  181. tn->parent = NULL;
  182. tn->pos = pos;
  183. tn->bits = bits;
  184. - tn->key = mask_pfx(key, pos);
  185. + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
  186. tn->full_children = 0;
  187. tn->empty_children = 1<<bits;
  188. }
  189. @@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key
  190. return tn;
  191. }
  192. -/*
  193. - * Check whether a tnode 'n' is "full", i.e. it is an internal node
  194. +/* Check whether a tnode 'n' is "full", i.e. it is an internal node
  195. * and no bits are skipped. See discussion in dyntree paper p. 6
  196. */
  197. -
  198. static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
  199. {
  200. - return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
  201. + return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
  202. }
  203. static inline void put_child(struct tnode *tn, int i,
  204. @@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie
  205. {
  206. int olen = tnode_child_length(oldtnode);
  207. struct tnode *tn;
  208. + t_key m;
  209. int i;
  210. pr_debug("In inflate\n");
  211. - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
  212. + tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
  213. if (!tn)
  214. return ERR_PTR(-ENOMEM);
  215. @@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie
  216. * fails. In case of failure we return the oldnode and inflate
  217. * of tnode is ignored.
  218. */
  219. + for (i = 0, m = 1u << tn->pos; i < olen; i++) {
  220. + struct tnode *inode = tnode_get_child(oldtnode, i);
  221. - for (i = 0; i < olen; i++) {
  222. - struct tnode *inode;
  223. -
  224. - inode = tnode_get_child(oldtnode, i);
  225. - if (tnode_full(oldtnode, inode) && inode->bits > 1) {
  226. + if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
  227. struct tnode *left, *right;
  228. - t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
  229. - left = tnode_new(inode->key&(~m), inode->pos + 1,
  230. + left = tnode_new(inode->key & ~m, inode->pos,
  231. inode->bits - 1);
  232. if (!left)
  233. goto nomem;
  234. - right = tnode_new(inode->key|m, inode->pos + 1,
  235. + right = tnode_new(inode->key | m, inode->pos,
  236. inode->bits - 1);
  237. if (!right) {
  238. @@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie
  239. /* A leaf or an internal node with skipped bits */
  240. if (!tnode_full(oldtnode, inode)) {
  241. - put_child(tn,
  242. - tkey_extract_bits(inode->key, tn->pos, tn->bits),
  243. - inode);
  244. + put_child(tn, get_index(inode->key, tn), inode);
  245. continue;
  246. }
  247. @@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *
  248. pr_debug("In halve\n");
  249. - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
  250. + tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
  251. if (!tn)
  252. return ERR_PTR(-ENOMEM);
  253. @@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *
  254. if (left && right) {
  255. struct tnode *newn;
  256. - newn = tnode_new(left->key, tn->pos + tn->bits, 1);
  257. + newn = tnode_new(left->key, oldtnode->pos, 1);
  258. if (!newn)
  259. goto nomem;
  260. @@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *
  261. key = tn->key;
  262. while (tn != NULL && (tp = node_parent(tn)) != NULL) {
  263. - cindex = tkey_extract_bits(key, tp->pos, tp->bits);
  264. + cindex = get_index(key, tp);
  265. wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
  266. tn = resize(t, tn);
  267. @@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node
  268. */
  269. if (n) {
  270. struct tnode *tn;
  271. - int newpos;
  272. -
  273. - newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
  274. - tn = tnode_new(key, newpos, 1);
  275. + tn = tnode_new(key, __fls(key ^ n->key), 1);
  276. if (!tn) {
  277. free_leaf_info(li);
  278. node_free(l);
  279. @@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode
  280. static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
  281. {
  282. do {
  283. - t_key idx;
  284. -
  285. - if (c)
  286. - idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
  287. - else
  288. - idx = 0;
  289. + t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
  290. while (idx < 1u << p->bits) {
  291. c = tnode_get_child_rcu(p, idx++);
  292. @@ -1851,7 +1819,7 @@ rescan:
  293. /* Current node exhausted, pop back up */
  294. p = node_parent_rcu(tn);
  295. if (p) {
  296. - cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
  297. + cindex = get_index(tn->key, p) + 1;
  298. tn = p;
  299. --iter->depth;
  300. goto rescan;
  301. @@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_
  302. if (IS_TNODE(n)) {
  303. __be32 prf = htonl(n->key);
  304. - seq_indent(seq, iter->depth - 1);
  305. - seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
  306. - &prf, n->pos, n->bits, n->full_children,
  307. - n->empty_children);
  308. + seq_indent(seq, iter->depth-1);
  309. + seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
  310. + &prf, KEYLENGTH - n->pos - n->bits, n->bits,
  311. + n->full_children, n->empty_children);
  312. } else {
  313. struct leaf_info *li;
  314. __be32 val = htonl(n->key);