322-mac80211-add-A-MSDU-tx-support.patch 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. From: Felix Fietkau <nbd@openwrt.org>
  2. Date: Fri, 5 Feb 2016 01:38:51 +0100
  3. Subject: [PATCH] mac80211: add A-MSDU tx support
  4. Requires software tx queueing support. frag_list support (for zero-copy)
  5. is optional.
  6. Signed-off-by: Felix Fietkau <nbd@openwrt.org>
  7. ---
  8. --- a/include/net/mac80211.h
  9. +++ b/include/net/mac80211.h
  10. @@ -709,6 +709,7 @@ enum mac80211_tx_info_flags {
  11. * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
  12. * frame (PS-Poll or uAPSD).
  13. * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
  14. + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
  15. *
  16. * These flags are used in tx_info->control.flags.
  17. */
  18. @@ -716,6 +717,7 @@ enum mac80211_tx_control_flags {
  19. IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0),
  20. IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1),
  21. IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
  22. + IEEE80211_TX_CTRL_AMSDU = BIT(3),
  23. };
  24. /*
  25. @@ -1728,6 +1730,7 @@ struct ieee80211_sta_rates {
  26. * size is min(max_amsdu_len, 7935) bytes.
  27. * Both additional HT limits must be enforced by the low level driver.
  28. * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2).
  29. + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
  30. * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
  31. */
  32. struct ieee80211_sta {
  33. @@ -1748,6 +1751,7 @@ struct ieee80211_sta {
  34. bool mfp;
  35. u8 max_amsdu_subframes;
  36. u16 max_amsdu_len;
  37. + u16 max_rc_amsdu_len;
  38. struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
  39. @@ -1961,6 +1965,15 @@ struct ieee80211_txq {
  40. * order and does not need to manage its own reorder buffer or BA session
  41. * timeout.
  42. *
  43. + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated
  44. + * A-MSDU frames. Requires software tx queueing and fast-xmit support.
  45. + * When not using minstrel/minstrel_ht rate control, the driver should
  46. + * limit the maximum A-MSDU size based on the current tx rate by setting
  47. + * max_rc_amsdu_len in struct ieee80211_sta.
  48. + *
  49. + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list
  50. + * skbs, needed for zero-copy software A-MSDU.
  51. + *
  52. * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  53. */
  54. enum ieee80211_hw_flags {
  55. @@ -1998,6 +2011,8 @@ enum ieee80211_hw_flags {
  56. IEEE80211_HW_BEACON_TX_STATUS,
  57. IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
  58. IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
  59. + IEEE80211_HW_TX_AMSDU,
  60. + IEEE80211_HW_TX_FRAG_LIST,
  61. /* keep last, obviously */
  62. NUM_IEEE80211_HW_FLAGS
  63. @@ -2070,6 +2085,9 @@ enum ieee80211_hw_flags {
  64. * size is smaller (an example is LinkSys WRT120N with FW v1.0.07
  65. * build 002 Jun 18 2012).
  66. *
  67. + * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
  68. + * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
  69. + *
  70. * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
  71. * (if %IEEE80211_HW_QUEUE_CONTROL is set)
  72. *
  73. @@ -2124,6 +2142,7 @@ struct ieee80211_hw {
  74. u8 max_rate_tries;
  75. u8 max_rx_aggregation_subframes;
  76. u8 max_tx_aggregation_subframes;
  77. + u8 max_tx_fragments;
  78. u8 offchannel_tx_hw_queue;
  79. u8 radiotap_mcs_details;
  80. u16 radiotap_vht_details;
  81. --- a/net/mac80211/agg-tx.c
  82. +++ b/net/mac80211/agg-tx.c
  83. @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct
  84. size_t len)
  85. {
  86. struct tid_ampdu_tx *tid_tx;
  87. + struct ieee80211_txq *txq;
  88. u16 capab, tid;
  89. u8 buf_size;
  90. bool amsdu;
  91. @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct
  92. buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
  93. buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
  94. + txq = sta->sta.txq[tid];
  95. + if (!amsdu && txq)
  96. + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags);
  97. +
  98. mutex_lock(&sta->ampdu_mlme.mtx);
  99. tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
  100. --- a/net/mac80211/debugfs.c
  101. +++ b/net/mac80211/debugfs.c
  102. @@ -127,6 +127,8 @@ static const char *hw_flag_names[NUM_IEE
  103. FLAG(BEACON_TX_STATUS),
  104. FLAG(NEEDS_UNIQUE_STA_ADDR),
  105. FLAG(SUPPORTS_REORDERING_BUFFER),
  106. + FLAG(TX_AMSDU),
  107. + FLAG(TX_FRAG_LIST),
  108. /* keep last for the build bug below */
  109. (void *)0x1
  110. --- a/net/mac80211/ieee80211_i.h
  111. +++ b/net/mac80211/ieee80211_i.h
  112. @@ -799,6 +799,7 @@ struct mac80211_qos_map {
  113. enum txq_info_flags {
  114. IEEE80211_TXQ_STOP,
  115. IEEE80211_TXQ_AMPDU,
  116. + IEEE80211_TXQ_NO_AMSDU,
  117. };
  118. struct txq_info {
  119. --- a/net/mac80211/tx.c
  120. +++ b/net/mac80211/tx.c
  121. @@ -1318,6 +1318,10 @@ struct sk_buff *ieee80211_tx_dequeue(str
  122. out:
  123. spin_unlock_bh(&txqi->queue.lock);
  124. + if (skb && skb_has_frag_list(skb) &&
  125. + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
  126. + skb_linearize(skb);
  127. +
  128. return skb;
  129. }
  130. EXPORT_SYMBOL(ieee80211_tx_dequeue);
  131. @@ -2757,6 +2761,163 @@ void ieee80211_clear_fast_xmit(struct st
  132. kfree_rcu(fast_tx, rcu_head);
  133. }
  134. +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local,
  135. + struct sk_buff *skb, int headroom,
  136. + int *subframe_len)
  137. +{
  138. + int amsdu_len = *subframe_len + sizeof(struct ethhdr);
  139. + int padding = (4 - amsdu_len) & 3;
  140. +
  141. + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) {
  142. + I802_DEBUG_INC(local->tx_expand_skb_head);
  143. +
  144. + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) {
  145. + wiphy_debug(local->hw.wiphy,
  146. + "failed to reallocate TX buffer\n");
  147. + return false;
  148. + }
  149. + }
  150. +
  151. + if (padding) {
  152. + *subframe_len += padding;
  153. + memset(skb_put(skb, padding), 0, padding);
  154. + }
  155. +
  156. + return true;
  157. +}
  158. +
  159. +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
  160. + struct ieee80211_fast_tx *fast_tx,
  161. + struct sk_buff *skb)
  162. +{
  163. + struct ieee80211_local *local = sdata->local;
  164. + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
  165. + struct ieee80211_hdr *hdr;
  166. + struct ethhdr amsdu_hdr;
  167. + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
  168. + int subframe_len = skb->len - hdr_len;
  169. + void *data;
  170. + u8 *qc;
  171. +
  172. + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
  173. + return false;
  174. +
  175. + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
  176. + return true;
  177. +
  178. + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr),
  179. + &subframe_len))
  180. + return false;
  181. +
  182. + amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
  183. + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
  184. + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
  185. +
  186. + data = skb_push(skb, sizeof(amsdu_hdr));
  187. + memmove(data, data + sizeof(amsdu_hdr), hdr_len);
  188. + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
  189. +
  190. + hdr = data;
  191. + qc = ieee80211_get_qos_ctl(hdr);
  192. + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
  193. +
  194. + info->control.flags |= IEEE80211_TX_CTRL_AMSDU;
  195. +
  196. + return true;
  197. +}
  198. +
  199. +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
  200. + struct sta_info *sta,
  201. + struct ieee80211_fast_tx *fast_tx,
  202. + struct sk_buff *skb)
  203. +{
  204. + struct ieee80211_local *local = sdata->local;
  205. + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
  206. + struct ieee80211_txq *txq = sta->sta.txq[tid];
  207. + struct txq_info *txqi;
  208. + struct sk_buff **frag_tail, *head;
  209. + int subframe_len = skb->len - ETH_ALEN;
  210. + u8 max_subframes = sta->sta.max_amsdu_subframes;
  211. + int max_frags = local->hw.max_tx_fragments;
  212. + int max_amsdu_len = sta->sta.max_amsdu_len;
  213. + __be16 len;
  214. + void *data;
  215. + bool ret = false;
  216. + int n = 1, nfrags;
  217. +
  218. + if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
  219. + return false;
  220. +
  221. + if (!txq)
  222. + return false;
  223. +
  224. + txqi = to_txq_info(txq);
  225. + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags))
  226. + return false;
  227. +
  228. + if (sta->sta.max_rc_amsdu_len)
  229. + max_amsdu_len = min_t(int, max_amsdu_len,
  230. + sta->sta.max_rc_amsdu_len);
  231. +
  232. + spin_lock_bh(&txqi->queue.lock);
  233. +
  234. + head = skb_peek_tail(&txqi->queue);
  235. + if (!head)
  236. + goto out;
  237. +
  238. + if (skb->len + head->len > max_amsdu_len)
  239. + goto out;
  240. +
  241. + /*
  242. + * HT A-MPDU limits maximum MPDU size to 4095 bytes. Since aggregation
  243. + * sessions are started/stopped without txq flush, use the limit here
  244. + * to avoid having to de-aggregate later.
  245. + */
  246. + if (skb->len + head->len > 4095 &&
  247. + !sta->sta.vht_cap.vht_supported)
  248. + goto out;
  249. +
  250. + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
  251. + goto out;
  252. +
  253. + nfrags = 1 + skb_shinfo(skb)->nr_frags;
  254. + nfrags += 1 + skb_shinfo(head)->nr_frags;
  255. + frag_tail = &skb_shinfo(head)->frag_list;
  256. + while (*frag_tail) {
  257. + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags;
  258. + frag_tail = &(*frag_tail)->next;
  259. + n++;
  260. + }
  261. +
  262. + if (max_subframes && n > max_subframes)
  263. + goto out;
  264. +
  265. + if (max_frags && nfrags > max_frags)
  266. + goto out;
  267. +
  268. + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2,
  269. + &subframe_len))
  270. + return false;
  271. +
  272. + ret = true;
  273. + data = skb_push(skb, ETH_ALEN + 2);
  274. + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN);
  275. +
  276. + data += 2 * ETH_ALEN;
  277. + len = cpu_to_be16(subframe_len);
  278. + memcpy(data, &len, 2);
  279. + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header));
  280. +
  281. + head->len += skb->len;
  282. + head->data_len += skb->len;
  283. + *frag_tail = skb;
  284. +
  285. +out:
  286. + spin_unlock_bh(&txqi->queue.lock);
  287. +
  288. + return ret;
  289. +}
  290. +
  291. static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
  292. struct net_device *dev, struct sta_info *sta,
  293. struct ieee80211_fast_tx *fast_tx,
  294. @@ -2811,6 +2972,10 @@ static bool ieee80211_xmit_fast(struct i
  295. ieee80211_tx_stats(dev, skb->len + extra_head);
  296. + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
  297. + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
  298. + return true;
  299. +
  300. /* will not be crypto-handled beyond what we do here, so use false
  301. * as the may-encrypt argument for the resize to not account for
  302. * more room than we already have in 'extra_head'