123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- Subject: NET: skip GRO for foreign MAC addresses
- For network drivers using napi_gro_receive, packets are run through GRO,
- even when the destination MAC address does not match, and they're supposed
- to be delivered to another host behind a different bridge port.
- This can be very expensive, because for drivers without TSO or scatter-
- gather, this can only be undone by copying the skb and checksumming it
- again.
- To be able to track foreign MAC addresses in an inexpensive way, create
- a mask of changed bits in MAC addresses of upper devices. This allows
- handling VLANs and bridge devices with different addresses (as long as
- they are not too different).
- Signed-off-by: Felix Fietkau <nbd@nbd.name>
- --- a/net/core/dev.c
- +++ b/net/core/dev.c
- @@ -4205,6 +4205,9 @@ static enum gro_result dev_gro_receive(s
- enum gro_result ret;
- int grow;
-
- + if (skb->gro_skip)
- + goto normal;
- +
- if (!(skb->dev->features & NETIF_F_GRO))
- goto normal;
-
- @@ -5357,6 +5360,48 @@ static void __netdev_adjacent_dev_unlink
- &upper_dev->adj_list.lower);
- }
-
- +static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
- + struct net_device *dev)
- +{
- + int i;
- +
- + for (i = 0; i < dev->addr_len; i++)
- + mask[i] |= addr[i] ^ dev->dev_addr[i];
- +}
- +
- +static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
- + struct net_device *lower)
- +{
- + struct net_device *cur;
- + struct list_head *iter;
- +
- + netdev_for_each_upper_dev_rcu(dev, cur, iter) {
- + __netdev_addr_mask(mask, cur->dev_addr, lower);
- + __netdev_upper_mask(mask, cur, lower);
- + }
- +}
- +
- +static void __netdev_update_addr_mask(struct net_device *dev)
- +{
- + unsigned char mask[MAX_ADDR_LEN];
- + struct net_device *cur;
- + struct list_head *iter;
- +
- + memset(mask, 0, sizeof(mask));
- + __netdev_upper_mask(mask, dev, dev);
- + memcpy(dev->local_addr_mask, mask, dev->addr_len);
- +
- + netdev_for_each_lower_dev(dev, cur, iter)
- + __netdev_update_addr_mask(cur);
- +}
- +
- +static void netdev_update_addr_mask(struct net_device *dev)
- +{
- + rcu_read_lock();
- + __netdev_update_addr_mask(dev);
- + rcu_read_unlock();
- +}
- +
- static int __netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev, bool master,
- void *private)
- @@ -5428,6 +5473,7 @@ static int __netdev_upper_dev_link(struc
- goto rollback_lower_mesh;
- }
-
- + netdev_update_addr_mask(dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
- &changeupper_info.info);
- return 0;
- @@ -5554,6 +5600,7 @@ void netdev_upper_dev_unlink(struct net_
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(dev, i->dev);
-
- + netdev_update_addr_mask(dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
- &changeupper_info.info);
- }
- @@ -6094,6 +6141,7 @@ int dev_set_mac_address(struct net_devic
- if (err)
- return err;
- dev->addr_assign_type = NET_ADDR_SET;
- + netdev_update_addr_mask(dev);
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- add_device_randomness(dev->dev_addr, dev->addr_len);
- return 0;
- --- a/include/linux/netdevice.h
- +++ b/include/linux/netdevice.h
- @@ -1638,6 +1638,8 @@ struct net_device {
- struct netdev_hw_addr_list mc;
- struct netdev_hw_addr_list dev_addrs;
-
- + unsigned char local_addr_mask[MAX_ADDR_LEN];
- +
- #ifdef CONFIG_SYSFS
- struct kset *queues_kset;
- #endif
- --- a/include/linux/skbuff.h
- +++ b/include/linux/skbuff.h
- @@ -642,7 +642,8 @@ struct sk_buff {
- __u8 ipvs_property:1;
- __u8 inner_protocol_type:1;
- __u8 remcsum_offload:1;
- - /* 3 or 5 bit hole */
- + __u8 gro_skip:1;
- + /* 2 or 4 bit hole */
-
- #ifdef CONFIG_NET_SCHED
- __u16 tc_index; /* traffic control index */
- --- a/net/ethernet/eth.c
- +++ b/net/ethernet/eth.c
- @@ -140,6 +140,18 @@ u32 eth_get_headlen(void *data, unsigned
- }
- EXPORT_SYMBOL(eth_get_headlen);
-
- +static inline bool
- +eth_check_local_mask(const void *addr1, const void *addr2, const void *mask)
- +{
- + const u16 *a1 = addr1;
- + const u16 *a2 = addr2;
- + const u16 *m = mask;
- +
- + return (((a1[0] ^ a2[0]) & ~m[0]) |
- + ((a1[1] ^ a2[1]) & ~m[1]) |
- + ((a1[2] ^ a2[2]) & ~m[2]));
- +}
- +
- /**
- * eth_type_trans - determine the packet's protocol ID.
- * @skb: received socket data
- @@ -168,8 +180,12 @@ __be16 eth_type_trans(struct sk_buff *sk
- skb->pkt_type = PACKET_MULTICAST;
- }
- else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- - dev->dev_addr)))
- + dev->dev_addr))) {
- skb->pkt_type = PACKET_OTHERHOST;
- + if (eth_check_local_mask(eth->h_dest, dev->dev_addr,
- + dev->local_addr_mask))
- + skb->gro_skip = 1;
- + }
-
- /*
- * Some variants of DSA tagging don't have an ethertype field
|