080-16-fib_trie-inflate-halve-nodes-in-a-more-RCU-friendly-.patch 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. From: Alexander Duyck <alexander.h.duyck@redhat.com>
  2. Date: Wed, 31 Dec 2014 10:56:55 -0800
  3. Subject: [PATCH] fib_trie: inflate/halve nodes in a more RCU friendly
  4. way
  5. This change pulls the node_set_parent functionality out of put_child_reorg
  6. and instead leaves that to the function to take care of as well. By doing
  7. this we can fully construct the new cluster of tnodes and all of the
  8. pointers out of it before we start routing pointers into it.
  9. I am suspecting this will likely fix some concurency issues though I don't
  10. have a good test to show as such.
  11. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
  12. Signed-off-by: David S. Miller <davem@davemloft.net>
  13. ---
  14. --- a/net/ipv4/fib_trie.c
  15. +++ b/net/ipv4/fib_trie.c
  16. @@ -391,8 +391,6 @@ static void put_child(struct tnode *tn,
  17. else if (!wasfull && isfull)
  18. tn->full_children++;
  19. - node_set_parent(n, tn);
  20. -
  21. rcu_assign_pointer(tn->child[i], n);
  22. }
  23. @@ -436,10 +434,8 @@ static void tnode_free(struct tnode *tn)
  24. static int inflate(struct trie *t, struct tnode *oldtnode)
  25. {
  26. - unsigned long olen = tnode_child_length(oldtnode);
  27. - struct tnode *tp = node_parent(oldtnode);
  28. - struct tnode *tn;
  29. - unsigned long i;
  30. + struct tnode *inode, *node0, *node1, *tn, *tp;
  31. + unsigned long i, j, k;
  32. t_key m;
  33. pr_debug("In inflate\n");
  34. @@ -448,43 +444,13 @@ static int inflate(struct trie *t, struc
  35. if (!tn)
  36. return -ENOMEM;
  37. - /*
  38. - * Preallocate and store tnodes before the actual work so we
  39. - * don't get into an inconsistent state if memory allocation
  40. - * fails. In case of failure we return the oldnode and inflate
  41. - * of tnode is ignored.
  42. + /* Assemble all of the pointers in our cluster, in this case that
  43. + * represents all of the pointers out of our allocated nodes that
  44. + * point to existing tnodes and the links between our allocated
  45. + * nodes.
  46. */
  47. - for (i = 0, m = 1u << tn->pos; i < olen; i++) {
  48. - struct tnode *inode = tnode_get_child(oldtnode, i);
  49. -
  50. - if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
  51. - struct tnode *left, *right;
  52. -
  53. - left = tnode_new(inode->key & ~m, inode->pos,
  54. - inode->bits - 1);
  55. - if (!left)
  56. - goto nomem;
  57. - tnode_free_append(tn, left);
  58. -
  59. - right = tnode_new(inode->key | m, inode->pos,
  60. - inode->bits - 1);
  61. -
  62. - if (!right)
  63. - goto nomem;
  64. - tnode_free_append(tn, right);
  65. -
  66. - put_child(tn, 2*i, left);
  67. - put_child(tn, 2*i+1, right);
  68. - }
  69. - }
  70. -
  71. - /* prepare oldtnode to be freed */
  72. - tnode_free_init(oldtnode);
  73. -
  74. - for (i = 0; i < olen; i++) {
  75. - struct tnode *inode = tnode_get_child(oldtnode, i);
  76. - struct tnode *left, *right;
  77. - unsigned long size, j;
  78. + for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
  79. + inode = tnode_get_child(oldtnode, --i);
  80. /* An empty child */
  81. if (inode == NULL)
  82. @@ -496,65 +462,99 @@ static int inflate(struct trie *t, struc
  83. continue;
  84. }
  85. - /* drop the node in the old tnode free list */
  86. - tnode_free_append(oldtnode, inode);
  87. -
  88. /* An internal node with two children */
  89. if (inode->bits == 1) {
  90. - put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
  91. - put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
  92. + put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
  93. + put_child(tn, 2 * i, tnode_get_child(inode, 0));
  94. continue;
  95. }
  96. - /* An internal node with more than two children */
  97. -
  98. /* We will replace this node 'inode' with two new
  99. - * ones, 'left' and 'right', each with half of the
  100. + * ones, 'node0' and 'node1', each with half of the
  101. * original children. The two new nodes will have
  102. * a position one bit further down the key and this
  103. * means that the "significant" part of their keys
  104. * (see the discussion near the top of this file)
  105. * will differ by one bit, which will be "0" in
  106. - * left's key and "1" in right's key. Since we are
  107. + * node0's key and "1" in node1's key. Since we are
  108. * moving the key position by one step, the bit that
  109. * we are moving away from - the bit at position
  110. - * (inode->pos) - is the one that will differ between
  111. - * left and right. So... we synthesize that bit in the
  112. - * two new keys.
  113. - * The mask 'm' below will be a single "one" bit at
  114. - * the position (inode->pos)
  115. + * (tn->pos) - is the one that will differ between
  116. + * node0 and node1. So... we synthesize that bit in the
  117. + * two new keys.
  118. */
  119. + node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
  120. + if (!node1)
  121. + goto nomem;
  122. + tnode_free_append(tn, node1);
  123. +
  124. + node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1);
  125. + if (!node0)
  126. + goto nomem;
  127. + tnode_free_append(tn, node0);
  128. +
  129. + /* populate child pointers in new nodes */
  130. + for (k = tnode_child_length(inode), j = k / 2; j;) {
  131. + put_child(node1, --j, tnode_get_child(inode, --k));
  132. + put_child(node0, j, tnode_get_child(inode, j));
  133. + put_child(node1, --j, tnode_get_child(inode, --k));
  134. + put_child(node0, j, tnode_get_child(inode, j));
  135. + }
  136. +
  137. + /* link new nodes to parent */
  138. + NODE_INIT_PARENT(node1, tn);
  139. + NODE_INIT_PARENT(node0, tn);
  140. +
  141. + /* link parent to nodes */
  142. + put_child(tn, 2 * i + 1, node1);
  143. + put_child(tn, 2 * i, node0);
  144. + }
  145. +
  146. + /* setup the parent pointer into and out of this node */
  147. + tp = node_parent(oldtnode);
  148. + NODE_INIT_PARENT(tn, tp);
  149. + put_child_root(tp, t, tn->key, tn);
  150. - /* Use the old key, but set the new significant
  151. - * bit to zero.
  152. - */
  153. + /* prepare oldtnode to be freed */
  154. + tnode_free_init(oldtnode);
  155. - left = tnode_get_child(tn, 2*i);
  156. - put_child(tn, 2*i, NULL);
  157. + /* update all child nodes parent pointers to route to us */
  158. + for (i = tnode_child_length(oldtnode); i;) {
  159. + inode = tnode_get_child(oldtnode, --i);
  160. - BUG_ON(!left);
  161. + /* A leaf or an internal node with skipped bits */
  162. + if (!tnode_full(oldtnode, inode)) {
  163. + node_set_parent(inode, tn);
  164. + continue;
  165. + }
  166. - right = tnode_get_child(tn, 2*i+1);
  167. - put_child(tn, 2*i+1, NULL);
  168. + /* drop the node in the old tnode free list */
  169. + tnode_free_append(oldtnode, inode);
  170. - BUG_ON(!right);
  171. + /* fetch new nodes */
  172. + node1 = tnode_get_child(tn, 2 * i + 1);
  173. + node0 = tnode_get_child(tn, 2 * i);
  174. - size = tnode_child_length(left);
  175. - for (j = 0; j < size; j++) {
  176. - put_child(left, j, rtnl_dereference(inode->child[j]));
  177. - put_child(right, j, rtnl_dereference(inode->child[j + size]));
  178. + /* bits == 1 then node0 and node1 represent inode's children */
  179. + if (inode->bits == 1) {
  180. + node_set_parent(node1, tn);
  181. + node_set_parent(node0, tn);
  182. + continue;
  183. }
  184. - put_child(tn, 2 * i, left);
  185. - put_child(tn, 2 * i + 1, right);
  186. + /* update parent pointers in child node's children */
  187. + for (k = tnode_child_length(inode), j = k / 2; j;) {
  188. + node_set_parent(tnode_get_child(inode, --k), node1);
  189. + node_set_parent(tnode_get_child(inode, --j), node0);
  190. + node_set_parent(tnode_get_child(inode, --k), node1);
  191. + node_set_parent(tnode_get_child(inode, --j), node0);
  192. + }
  193. /* resize child nodes */
  194. - resize(t, left);
  195. - resize(t, right);
  196. + resize(t, node1);
  197. + resize(t, node0);
  198. }
  199. - put_child_root(tp, t, tn->key, tn);
  200. -
  201. /* we completed without error, prepare to free old node */
  202. tnode_free(oldtnode);
  203. return 0;
  204. @@ -566,10 +566,8 @@ nomem:
  205. static int halve(struct trie *t, struct tnode *oldtnode)
  206. {
  207. - unsigned long olen = tnode_child_length(oldtnode);
  208. - struct tnode *tp = node_parent(oldtnode);
  209. - struct tnode *tn, *left, *right;
  210. - int i;
  211. + struct tnode *tn, *tp, *inode, *node0, *node1;
  212. + unsigned long i;
  213. pr_debug("In halve\n");
  214. @@ -577,68 +575,64 @@ static int halve(struct trie *t, struct
  215. if (!tn)
  216. return -ENOMEM;
  217. - /*
  218. - * Preallocate and store tnodes before the actual work so we
  219. - * don't get into an inconsistent state if memory allocation
  220. - * fails. In case of failure we return the oldnode and halve
  221. - * of tnode is ignored.
  222. + /* Assemble all of the pointers in our cluster, in this case that
  223. + * represents all of the pointers out of our allocated nodes that
  224. + * point to existing tnodes and the links between our allocated
  225. + * nodes.
  226. */
  227. + for (i = tnode_child_length(oldtnode); i;) {
  228. + node1 = tnode_get_child(oldtnode, --i);
  229. + node0 = tnode_get_child(oldtnode, --i);
  230. - for (i = 0; i < olen; i += 2) {
  231. - left = tnode_get_child(oldtnode, i);
  232. - right = tnode_get_child(oldtnode, i+1);
  233. + /* At least one of the children is empty */
  234. + if (!node1 || !node0) {
  235. + put_child(tn, i / 2, node1 ? : node0);
  236. + continue;
  237. + }
  238. /* Two nonempty children */
  239. - if (left && right) {
  240. - struct tnode *newn;
  241. -
  242. - newn = tnode_new(left->key, oldtnode->pos, 1);
  243. - if (!newn) {
  244. - tnode_free(tn);
  245. - return -ENOMEM;
  246. - }
  247. - tnode_free_append(tn, newn);
  248. -
  249. - put_child(tn, i/2, newn);
  250. + inode = tnode_new(node0->key, oldtnode->pos, 1);
  251. + if (!inode) {
  252. + tnode_free(tn);
  253. + return -ENOMEM;
  254. }
  255. + tnode_free_append(tn, inode);
  256. + /* initialize pointers out of node */
  257. + put_child(inode, 1, node1);
  258. + put_child(inode, 0, node0);
  259. + NODE_INIT_PARENT(inode, tn);
  260. +
  261. + /* link parent to node */
  262. + put_child(tn, i / 2, inode);
  263. }
  264. + /* setup the parent pointer out of and back into this node */
  265. + tp = node_parent(oldtnode);
  266. + NODE_INIT_PARENT(tn, tp);
  267. + put_child_root(tp, t, tn->key, tn);
  268. +
  269. /* prepare oldtnode to be freed */
  270. tnode_free_init(oldtnode);
  271. - for (i = 0; i < olen; i += 2) {
  272. - struct tnode *newBinNode;
  273. -
  274. - left = tnode_get_child(oldtnode, i);
  275. - right = tnode_get_child(oldtnode, i+1);
  276. -
  277. - /* At least one of the children is empty */
  278. - if (left == NULL) {
  279. - if (right == NULL) /* Both are empty */
  280. - continue;
  281. - put_child(tn, i/2, right);
  282. - continue;
  283. - }
  284. -
  285. - if (right == NULL) {
  286. - put_child(tn, i/2, left);
  287. + /* update all of the child parent pointers */
  288. + for (i = tnode_child_length(tn); i;) {
  289. + inode = tnode_get_child(tn, --i);
  290. +
  291. + /* only new tnodes will be considered "full" nodes */
  292. + if (!tnode_full(tn, inode)) {
  293. + node_set_parent(inode, tn);
  294. continue;
  295. }
  296. /* Two nonempty children */
  297. - newBinNode = tnode_get_child(tn, i/2);
  298. - put_child(newBinNode, 0, left);
  299. - put_child(newBinNode, 1, right);
  300. -
  301. - put_child(tn, i / 2, newBinNode);
  302. + node_set_parent(tnode_get_child(inode, 1), inode);
  303. + node_set_parent(tnode_get_child(inode, 0), inode);
  304. /* resize child node */
  305. - resize(t, newBinNode);
  306. + resize(t, inode);
  307. }
  308. - put_child_root(tp, t, tn->key, tn);
  309. -
  310. /* all pointers should be clean so we are done */
  311. tnode_free(oldtnode);