diff options
56 files changed, 662 insertions, 467 deletions
diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml index 311c570165f9..836d2d60e87d 100644 --- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml +++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml @@ -19,6 +19,7 @@ allOf: properties: compatible: enum: + - ti,am642-icssg-prueth # for AM64x SoC family - ti,am654-icssg-prueth # for AM65x SoC family sram: diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a66054d0763a..5bfa1837968c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER Default : 44 +tcp_backlog_ack_defer - BOOLEAN + If set, user thread processing socket backlog tries sending + one ACK for the whole queue. This helps to avoid potential + long latencies at end of a TCP socket syscall. + + Default : true + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/MAINTAINERS b/MAINTAINERS index bf0f54c24f81..3b32228356de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4320,8 +4320,7 @@ F: drivers/net/ethernet/broadcom/bcmsysport.* F: drivers/net/ethernet/broadcom/unimac.h BROADCOM TG3 GIGABIT ETHERNET DRIVER -M: Siva Reddy Kallam <siva.kallam@broadcom.com> -M: Prashant Sreedharan <prashant@broadcom.com> +M: Pavan Chebbi <pavan.chebbi@broadcom.com> M: Michael Chan <mchan@broadcom.com> L: netdev@vger.kernel.org S: Supported diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index 43d821fe7a54..63ba64dbb731 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -504,15 +504,12 @@ struct atl1c_rrd_ring { u16 next_to_use; u16 next_to_clean; struct napi_struct napi; - struct page *rx_page; - unsigned int rx_page_offset; }; /* board specific private data structure */ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; - unsigned int rx_frag_size; struct atl1c_hw hw; struct atl1c_hw_stats hw_stats; struct mii_if_info mii; /* MII interface info */ diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 940c5d1ff9cf..74b78164cf74 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -483,15 +483,10 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p) static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter, struct net_device *dev) { - unsigned int head_size; int mtu = dev->mtu; adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ? roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE; - - head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - adapter->rx_frag_size = roundup_pow_of_two(head_size); } static netdev_features_t atl1c_fix_features(struct net_device *netdev, @@ -964,7 +959,6 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter) static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; - int i; dma_free_coherent(&pdev->dev, adapter->ring_header.size, adapter->ring_header.desc, adapter->ring_header.dma); @@ -977,12 +971,6 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) kfree(adapter->tpd_ring[0].buffer_info); adapter->tpd_ring[0].buffer_info = NULL; } - for (i = 0; i < adapter->rx_queue_count; ++i) { - if (adapter->rrd_ring[i].rx_page) { - put_page(adapter->rrd_ring[i].rx_page); - adapter->rrd_ring[i].rx_page = NULL; - } - } } /** @@ -1754,48 +1742,11 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, skb_checksum_none_assert(skb); } -static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter, - u32 queue, bool napi_mode) -{ - struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; - struct sk_buff *skb; - struct page *page; - - if (adapter->rx_frag_size > PAGE_SIZE) { - if (likely(napi_mode)) - return napi_alloc_skb(&rrd_ring->napi, - adapter->rx_buffer_len); - else - return netdev_alloc_skb_ip_align(adapter->netdev, - adapter->rx_buffer_len); - } - - page = rrd_ring->rx_page; - if (!page) { - page = alloc_page(GFP_ATOMIC); - if (unlikely(!page)) - return NULL; - rrd_ring->rx_page = page; - rrd_ring->rx_page_offset = 0; - } - - skb = build_skb(page_address(page) + rrd_ring->rx_page_offset, - adapter->rx_frag_size); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - rrd_ring->rx_page_offset += adapter->rx_frag_size; - if (rrd_ring->rx_page_offset >= PAGE_SIZE) - rrd_ring->rx_page = NULL; - else - get_page(page); - } - return skb; -} - static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, bool napi_mode) { struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue]; + struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; struct pci_dev *pdev = adapter->pdev; struct atl1c_buffer *buffer_info, *next_info; struct sk_buff *skb; @@ -1814,13 +1765,27 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, while (next_info->flags & ATL1C_BUFFER_FREE) { rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = atl1c_alloc_skb(adapter, queue, napi_mode); + /* When DMA RX address is set to something like + * 0x....fc0, it will be very likely to cause DMA + * RFD overflow issue. + * + * To work around it, we apply rx skb with 64 bytes + * longer space, and offset the address whenever + * 0x....fc0 is detected. + */ + if (likely(napi_mode)) + skb = napi_alloc_skb(&rrd_ring->napi, adapter->rx_buffer_len + 64); + else + skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len + 64); if (unlikely(!skb)) { if (netif_msg_rx_err(adapter)) dev_warn(&pdev->dev, "alloc rx buffer failed\n"); break; } + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + /* * Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index ad47ac51a139..9b60966736db 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -861,7 +861,7 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq); struct hinic_dev *nic_dev = netdev_priv(netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; - int err, irqname_len; + int err; txq->netdev = netdev; txq->sq = sq; @@ -882,15 +882,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, goto err_alloc_free_sges; } - irqname_len = snprintf(NULL, 0, "%s_txq%d", netdev->name, qp->q_id) + 1; - txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL); + txq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL, "%s_txq%d", + netdev->name, qp->q_id); if (!txq->irq_name) { err = -ENOMEM; goto err_alloc_irqname; } - sprintf(txq->irq_name, "%s_txq%d", netdev->name, qp->q_id); - err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING, CI_UPDATE_NO_COALESC); if (err) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index de7fd43dc11c..00ca2b88165c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16320,11 +16320,15 @@ static void i40e_remove(struct pci_dev *pdev) i40e_switch_branch_release(pf->veb[i]); } - /* Now we can shutdown the PF's VSI, just before we kill + /* Now we can shutdown the PF's VSIs, just before we kill * adminq and hmc. */ - if (pf->vsi[pf->lan_vsi]) - i40e_vsi_release(pf->vsi[pf->lan_vsi]); + for (i = pf->num_alloc_vsi; i--;) + if (pf->vsi[i]) { + i40e_vsi_close(pf->vsi[i]); + i40e_vsi_release(pf->vsi[i]); + pf->vsi[i] = NULL; + } i40e_cloud_filter_exit(pf); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 85fba85fbb23..738e25657c6b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,8 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) #define IAVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) #define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) -#define IAVF_FLAG_PROMISC_ON BIT(13) -#define IAVF_FLAG_ALLMULTI_ON BIT(14) #define IAVF_FLAG_LEGACY_RX BIT(15) #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) @@ -325,10 +323,7 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_SET_HENA BIT_ULL(12) #define IAVF_FLAG_AQ_SET_RSS_KEY BIT_ULL(13) #define IAVF_FLAG_AQ_SET_RSS_LUT BIT_ULL(14) -#define IAVF_FLAG_AQ_REQUEST_PROMISC BIT_ULL(15) -#define IAVF_FLAG_AQ_RELEASE_PROMISC BIT_ULL(16) -#define IAVF_FLAG_AQ_REQUEST_ALLMULTI BIT_ULL(17) -#define IAVF_FLAG_AQ_RELEASE_ALLMULTI BIT_ULL(18) +#define IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE BIT_ULL(15) #define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT_ULL(19) #define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT_ULL(20) #define IAVF_FLAG_AQ_ENABLE_CHANNELS BIT_ULL(21) @@ -365,6 +360,12 @@ struct iavf_adapter { (IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \ IAVF_EXTENDED_CAP_RECV_VLAN_V2) + /* Lock to prevent possible clobbering of + * current_netdev_promisc_flags + */ + spinlock_t current_netdev_promisc_flags_lock; + netdev_features_t current_netdev_promisc_flags; + /* OS defined structs */ struct net_device *netdev; struct pci_dev *pdev; @@ -551,7 +552,8 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter); void iavf_del_ether_addrs(struct iavf_adapter *adapter); void iavf_add_vlans(struct iavf_adapter *adapter); void iavf_del_vlans(struct iavf_adapter *adapter); -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); +void iavf_set_promiscuous(struct iavf_adapter *adapter); +bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter); void iavf_request_stats(struct iavf_adapter *adapter); int iavf_request_reset(struct iavf_adapter *adapter); void iavf_get_hena(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7b300c86ceda..36b94ee44211 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1189,6 +1189,16 @@ static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr) } /** + * iavf_promiscuous_mode_changed - check if promiscuous mode bits changed + * @adapter: device specific adapter + */ +bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter) +{ + return (adapter->current_netdev_promisc_flags ^ adapter->netdev->flags) & + (IFF_PROMISC | IFF_ALLMULTI); +} + +/** * iavf_set_rx_mode - NDO callback to set the netdev filters * @netdev: network interface device structure **/ @@ -1201,19 +1211,10 @@ static void iavf_set_rx_mode(struct net_device *netdev) __dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); spin_unlock_bh(&adapter->mac_vlan_list_lock); - if (netdev->flags & IFF_PROMISC && - !(adapter->flags & IAVF_FLAG_PROMISC_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC; - else if (!(netdev->flags & IFF_PROMISC) && - adapter->flags & IAVF_FLAG_PROMISC_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC; - - if (netdev->flags & IFF_ALLMULTI && - !(adapter->flags & IAVF_FLAG_ALLMULTI_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI; - else if (!(netdev->flags & IFF_ALLMULTI) && - adapter->flags & IAVF_FLAG_ALLMULTI_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI; + spin_lock_bh(&adapter->current_netdev_promisc_flags_lock); + if (iavf_promiscuous_mode_changed(adapter)) + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); } /** @@ -2163,19 +2164,8 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) return 0; } - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) { - iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC); - return 0; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) { - iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); - return 0; - } - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) || - (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { - iavf_set_promiscuous(adapter, 0); + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE) { + iavf_set_promiscuous(adapter); return 0; } @@ -4971,6 +4961,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->cloud_filter_list_lock); spin_lock_init(&adapter->fdir_fltr_lock); spin_lock_init(&adapter->adv_rss_lock); + spin_lock_init(&adapter->current_netdev_promisc_flags_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index f9727e9c3d63..0b97b424e487 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -936,14 +936,14 @@ void iavf_del_vlans(struct iavf_adapter *adapter) /** * iavf_set_promiscuous * @adapter: adapter structure - * @flags: bitmask to control unicast/multicast promiscuous. * * Request that the PF enable promiscuous mode for our VSI. **/ -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) +void iavf_set_promiscuous(struct iavf_adapter *adapter) { + struct net_device *netdev = adapter->netdev; struct virtchnl_promisc_info vpi; - int promisc_all; + unsigned int flags; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -952,36 +952,57 @@ void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) return; } - promisc_all = FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC; - if ((flags & promisc_all) == promisc_all) { - adapter->flags |= IAVF_FLAG_PROMISC_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC; - dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); - } + /* prevent changes to promiscuous flags */ + spin_lock_bh(&adapter->current_netdev_promisc_flags_lock); - if (flags & FLAG_VF_MULTICAST_PROMISC) { - adapter->flags |= IAVF_FLAG_ALLMULTI_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI; - dev_info(&adapter->pdev->dev, "%s is entering multicast promiscuous mode\n", - adapter->netdev->name); + /* sanity check to prevent duplicate AQ calls */ + if (!iavf_promiscuous_mode_changed(adapter)) { + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + dev_dbg(&adapter->pdev->dev, "No change in promiscuous mode\n"); + /* allow changes to promiscuous flags */ + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); + return; } - if (!flags) { - if (adapter->flags & IAVF_FLAG_PROMISC_ON) { - adapter->flags &= ~IAVF_FLAG_PROMISC_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_PROMISC; - dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); - } + /* there are 2 bits, but only 3 states */ + if (!(netdev->flags & IFF_PROMISC) && + netdev->flags & IFF_ALLMULTI) { + /* State 1 - only multicast promiscuous mode enabled + * - !IFF_PROMISC && IFF_ALLMULTI + */ + flags = FLAG_VF_MULTICAST_PROMISC; + adapter->current_netdev_promisc_flags |= IFF_ALLMULTI; + adapter->current_netdev_promisc_flags &= ~IFF_PROMISC; + dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); + } else if (!(netdev->flags & IFF_PROMISC) && + !(netdev->flags & IFF_ALLMULTI)) { + /* State 2 - unicast/multicast promiscuous mode disabled + * - !IFF_PROMISC && !IFF_ALLMULTI + */ + flags = 0; + adapter->current_netdev_promisc_flags &= + ~(IFF_PROMISC | IFF_ALLMULTI); + dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); + } else { + /* State 3 - unicast/multicast promiscuous mode enabled + * - IFF_PROMISC && IFF_ALLMULTI + * - IFF_PROMISC && !IFF_ALLMULTI + */ + flags = FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC; + adapter->current_netdev_promisc_flags |= IFF_PROMISC; + if (netdev->flags & IFF_ALLMULTI) + adapter->current_netdev_promisc_flags |= IFF_ALLMULTI; + else + adapter->current_netdev_promisc_flags &= ~IFF_ALLMULTI; - if (adapter->flags & IAVF_FLAG_ALLMULTI_ON) { - adapter->flags &= ~IAVF_FLAG_ALLMULTI_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_ALLMULTI; - dev_info(&adapter->pdev->dev, "%s is leaving multicast promiscuous mode\n", - adapter->netdev->name); - } + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); } + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + + /* allow changes to promiscuous flags */ + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); + adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; vpi.vsi_id = adapter->vsi_res->vsi_id; vpi.flags = flags; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index a4efbeb16208..ef3980840695 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -196,10 +196,10 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, if (mtk_is_netsys_v2_or_greater(eth)) { switch (info.wdma_idx) { case 0: - pse_port = 8; + pse_port = PSE_WDMA0_PORT; break; case 1: - pse_port = 9; + pse_port = PSE_WDMA1_PORT; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index 071ed3dea860..72bcdaed12a9 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -68,6 +68,9 @@ mtk_wed_update_rx_stats(struct mtk_wed_device *wed, struct sk_buff *skb) struct mtk_wed_wo_rx_stats *stats; int i; + if (!wed->wlan.update_wo_rx_stats) + return; + if (count * sizeof(*stats) > skb->len - sizeof(u32)) return; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 410612f43cbd..92b13057d4de 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2313,8 +2313,13 @@ static const struct prueth_pdata am654_icssg_pdata = { .quirk_10m_link_issue = 1, }; +static const struct prueth_pdata am64x_icssg_pdata = { + .fdqring_mode = K3_RINGACC_RING_MODE_RING, +}; + static const struct of_device_id prueth_dt_match[] = { { .compatible = "ti,am654-icssg-prueth", .data = &am654_icssg_pdata }, + { .compatible = "ti,am642-icssg-prueth", .data = &am64x_icssg_pdata }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, prueth_dt_match); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 85dc16faca54..f0063d569c80 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -12,6 +12,98 @@ #include "wx_lib.h" #include "wx_hw.h" +static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + u32 command, val; + int ret; + + /* setup and write the address cycle command */ + command = WX_MSCA_RA(regnum) | + WX_MSCA_PA(phy_addr) | + WX_MSCA_DA(devnum); + wr32(wx, WX_MSCA, command); + + command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY; + if (wx->mac.type == wx_mac_em) + command |= WX_MDIO_CLK(6); + wr32(wx, WX_MSCC, command); + + /* wait to complete */ + ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, + 100000, false, wx, WX_MSCC); + if (ret) { + wx_err(wx, "Mdio read c22 command did not complete.\n"); + return ret; + } + + return (u16)rd32(wx, WX_MSCC); +} + +static int wx_phy_write_reg_mdi(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + u32 command, val; + int ret; + + /* setup and write the address cycle command */ + command = WX_MSCA_RA(regnum) | + WX_MSCA_PA(phy_addr) | + WX_MSCA_DA(devnum); + wr32(wx, WX_MSCA, command); + + command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY; + if (wx->mac.type == wx_mac_em) + command |= WX_MDIO_CLK(6); + wr32(wx, WX_MSCC, command); + + /* wait to complete */ + ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, + 100000, false, wx, WX_MSCC); + if (ret) + wx_err(wx, "Mdio write c22 command did not complete.\n"); + + return ret; +} + +int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF); + return wx_phy_read_reg_mdi(bus, phy_addr, 0, regnum); +} +EXPORT_SYMBOL(wx_phy_read_reg_mdi_c22); + +int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF); + return wx_phy_write_reg_mdi(bus, phy_addr, 0, regnum, value); +} +EXPORT_SYMBOL(wx_phy_write_reg_mdi_c22); + +int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0); + return wx_phy_read_reg_mdi(bus, phy_addr, devnum, regnum); +} +EXPORT_SYMBOL(wx_phy_read_reg_mdi_c45); + +int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0); + return wx_phy_write_reg_mdi(bus, phy_addr, devnum, regnum, value); +} +EXPORT_SYMBOL(wx_phy_write_reg_mdi_c45); + static void wx_intr_disable(struct wx *wx, u64 qmask) { u32 mask; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 0b3447bc6f2f..48d3ccabc272 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -4,6 +4,13 @@ #ifndef _WX_HW_H_ #define _WX_HW_H_ +#include <linux/phy.h> + +int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum); +int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value); +int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum); +int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value); void wx_intr_enable(struct wx *wx, u64 qmask); void wx_irq_disable(struct wx *wx); int wx_check_flash_load(struct wx *wx, u32 check_bit); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index c5cbd177ef62..e3fc49284219 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -251,6 +251,7 @@ enum WX_MSCA_CMD_value { #define WX_MSCC_SADDR BIT(18) #define WX_MSCC_BUSY BIT(22) #define WX_MDIO_CLK(v) FIELD_PREP(GENMASK(21, 19), v) +#define WX_MDIO_CLAUSE_SELECT 0x11220 #define WX_MMC_CONTROL 0x11800 #define WX_MMC_CONTROL_RSTONRD BIT(2) /* reset on read */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index 591f5b7b6da6..6302ecca71bb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -29,117 +29,6 @@ static int ngbe_phy_write_reg_internal(struct mii_bus *bus, int phy_addr, int re return 0; } -static int ngbe_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum) -{ - u32 command, val, device_type = 0; - struct wx *wx = bus->priv; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(device_type); - wr32(wx, WX_MSCA, command); - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c22 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int ngbe_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value) -{ - u32 command, val, device_type = 0; - struct wx *wx = bus->priv; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(device_type); - wr32(wx, WX_MSCA, command); - command = value | - WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c22 command did not complete.\n"); - - return ret; -} - -static int ngbe_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum) -{ - struct wx *wx = bus->priv; - u32 val, command; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c45 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int ngbe_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, - int devnum, int regnum, u16 value) -{ - struct wx *wx = bus->priv; - int ret, command; - u16 val; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - command = value | - WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c45 command did not complete.\n"); - - return ret; -} - static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum) { struct wx *wx = bus->priv; @@ -148,7 +37,7 @@ static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum) if (wx->mac_type == em_mac_type_mdi) phy_data = ngbe_phy_read_reg_internal(bus, phy_addr, regnum); else - phy_data = ngbe_phy_read_reg_mdi_c22(bus, phy_addr, regnum); + phy_data = wx_phy_read_reg_mdi_c22(bus, phy_addr, regnum); return phy_data; } @@ -162,7 +51,7 @@ static int ngbe_phy_write_reg_c22(struct mii_bus *bus, int phy_addr, if (wx->mac_type == em_mac_type_mdi) ret = ngbe_phy_write_reg_internal(bus, phy_addr, regnum, value); else - ret = ngbe_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value); + ret = wx_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value); return ret; } @@ -262,8 +151,8 @@ int ngbe_mdio_init(struct wx *wx) mii_bus->priv = wx; if (wx->mac_type == em_mac_type_rgmii) { - mii_bus->read_c45 = ngbe_phy_read_reg_mdi_c45; - mii_bus->write_c45 = ngbe_phy_write_reg_mdi_c45; + mii_bus->read_c45 = wx_phy_read_reg_mdi_c45; + mii_bus->write_c45 = wx_phy_write_reg_mdi_c45; } snprintf(mii_bus->id, MII_BUS_ID_SIZE, "ngbe-%x", pci_dev_id(pdev)); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 72c8cd2d5575..ff754d69bdf6 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -59,9 +59,6 @@ #define NGBE_EEPROM_VERSION_L 0x1D #define NGBE_EEPROM_VERSION_H 0x1E -/* Media-dependent registers. */ -#define NGBE_MDIO_CLAUSE_SELECT 0x11220 - /* GPIO Registers */ #define NGBE_GPIO_DR 0x14800 #define NGBE_GPIO_DDR 0x14804 diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 4159c84035fd..b6c06adb8656 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -647,58 +647,6 @@ static int txgbe_sfp_register(struct txgbe *txgbe) return 0; } -static int txgbe_phy_read(struct mii_bus *bus, int phy_addr, - int devnum, int regnum) -{ - struct wx *wx = bus->priv; - u32 val, command; - int ret; - - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY; - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c45 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int txgbe_phy_write(struct mii_bus *bus, int phy_addr, - int devnum, int regnum, u16 value) -{ - struct wx *wx = bus->priv; - int ret, command; - u16 val; - - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - - command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY; - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c45 command did not complete.\n"); - - return ret; -} - static int txgbe_ext_phy_init(struct txgbe *txgbe) { struct phy_device *phydev; @@ -715,8 +663,8 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe) return -ENOMEM; mii_bus->name = "txgbe_mii_bus"; - mii_bus->read_c45 = &txgbe_phy_read; - mii_bus->write_c45 = &txgbe_phy_write; + mii_bus->read_c45 = &wx_phy_read_reg_mdi_c45; + mii_bus->write_c45 = &wx_phy_write_reg_mdi_c45; mii_bus->parent = &pdev->dev; mii_bus->phy_mask = GENMASK(31, 1); mii_bus->priv = wx; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 144ec626230d..b3aa0c3d5826 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk) gtp->sk0 = NULL; else gtp->sk1u = NULL; - udp_sk(sk)->encap_type = 0; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); rcu_assign_sk_user_data(sk, NULL); release_sock(sk); sock_put(sk); @@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - switch (udp_sk(sk)->encap_type) { + switch (READ_ONCE(udp_sk(sk)->encap_type)) { case UDP_ENCAP_GTP0: netdev_dbg(gtp->dev, "received GTP0 packet\n"); ret = gtp0_udp_encap_recv(gtp, skb); diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 284ab1f56391..87df60916960 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */ +#include <linux/bitmap.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/debugfs.h> @@ -395,7 +396,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) char buf[0x20]; int id; - idmap = (unsigned long *)get_zeroed_page(GFP_KERNEL); + idmap = bitmap_zalloc(max_ports, GFP_KERNEL); if (!idmap) return -ENOMEM; @@ -414,7 +415,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) /* Allocate unique id */ id = find_first_zero_bit(idmap, max_ports); - free_page((unsigned long)idmap); + bitmap_free(idmap); snprintf(buf, sizeof(buf), fmt, id); /* Name generation */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..44d946161d4a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,15 +463,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ - gro_enabled:1, /* Request GRO aggregation */ - accept_udp_l4:1, - accept_udp_fraglist:1; + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +67,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,28 +94,39 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, @@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/net/dst.h b/include/net/dst.h index 78884429deed..f8b8599a0600 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -392,10 +392,10 @@ static inline int dst_discard(struct sk_buff *skb) { return dst_discard_out(&init_net, skb->sk, skb); } -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags); struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..d96d05b08819 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; + u8 sysctl_tcp_backlog_ack_defer; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf..676146e9d181 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1823,12 +1823,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) static inline void sock_release_ownership(struct sock *sk) { - if (sock_owned_by_user_nocheck(sk)) { - sk->sk_lock.owned = 0; + DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk)); + sk->sk_lock.owned = 0; - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } /* no reclassification while locks are held */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..29251c3519cf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/net/core/dst.c b/net/core/dst.c index 980e2fd2f013..6838d3212c37 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = { EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; @@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - rcuref_init(&dst->__rcuref, initial_ref); + rcuref_init(&dst->__rcuref, 1); INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; @@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) + int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (!dst) return NULL; - dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } @@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; diff --git a/net/core/sock.c b/net/core/sock.c index 16584e2dd648..bb89b88bc1e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3001,6 +3001,9 @@ void __sk_flush_backlog(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); __release_sock(sk); + + if (sk->sk_prot->release_cb) + sk->sk_prot->release_cb(sk); spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); @@ -3519,9 +3522,6 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); - /* Warning : release_cb() might need to release sk ownership, - * ie call sock_release_ownership(sk) before us. - */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 418e5fb58fd3..76c3ea75b8dd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2944,8 +2944,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; spin_lock_bh(&state->im->lock); psf = state->im->sources; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66f419e7f9a7..fb3045692b99 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1630,7 +1630,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, { struct rtable *rt; - rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, (noxfrm ? DST_NOXFRM : 0)); if (rt) { @@ -1658,7 +1658,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) { struct rtable *new_rt; - new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, rt->dst.flags); if (new_rt) { @@ -2832,7 +2832,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6ac890b4073f..e7f024d93572 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1367,6 +1367,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, }, { + .procname = "tcp_backlog_ack_defer", + .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, .maxlen = sizeof(u8), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06fe1cf645d5..41b471748437 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5553,6 +5553,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { + /* If we are running from __release_sock() in user context, + * Defer the ack until tcp_release_cb(). + */ + if (sock_owned_by_user_nocheck(sk) && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); + return; + } send_now: tcp_send_ack(sk); return; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27140e5cdc06..f13eb7e23d03 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3263,6 +3263,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccfc8bbf7455..1fc1f879cfd6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1077,7 +1077,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1101,16 +1102,6 @@ void tcp_release_cb(struct sock *sk) tcp_tsq_write(sk); __sock_put(sk); } - /* Here begins the tricky part : - * We are called from release_sock() with : - * 1) BH disabled - * 2) sk_lock.slock spinlock held - * 3) socket owned by us (sk->sk_lock.owned == 1) - * - * But following code is meant to be called from BH handlers, - * so we should keep BH disabled, but early release socket ownership - */ - sock_release_ownership(sk); if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); @@ -1124,6 +1115,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f39b9c844580..c3ff984b6354 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, @@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) u8 tos, scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1315,11 +1315,11 @@ void udp_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_push_pending_frames(sk); release_sock(sk); } @@ -1868,7 +1868,7 @@ try_again: (struct sockaddr *)sin); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (inet_cmsg_flags(inet)) @@ -2081,7 +2081,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udp_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -2119,7 +2120,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); /* * MIB statistics other than incrementing the error count are @@ -2132,7 +2134,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * delivery of packets with coverage values less than a value * provided by the application." */ - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; @@ -2143,9 +2145,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -2618,7 +2620,7 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); } } @@ -2658,9 +2660,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - WRITE_ONCE(up->corkflag, 1); + udp_set_bit(CORK, sk); } else { - WRITE_ONCE(up->corkflag, 0); + udp_clear_bit(CORK, sk); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2675,17 +2677,17 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + ipv6_stub->xfrm6_udp_encap_rcv); else #endif - up->encap_rcv = xfrm4_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + xfrm4_udp_encap_rcv); #endif fallthrough; case UDP_ENCAP_L2TPINUDP: - up->encap_type = val; - lock_sock(sk); - udp_tunnel_encap_enable(sk->sk_socket); - release_sock(sk); + WRITE_ONCE(up->encap_type, val); + udp_tunnel_encap_enable(sk); break; default: err = -ENOPROTOOPT; @@ -2694,11 +2696,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - up->no_check6_tx = valbool; + udp_set_no_check6_tx(sk, valbool); break; case UDP_NO_CHECK6_RX: - up->no_check6_rx = valbool; + udp_set_no_check6_rx(sk, valbool); break; case UDP_SEGMENT: @@ -2708,14 +2710,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - lock_sock(sk); /* when enabling GRO, accept the related GSO packet type */ if (valbool) - udp_tunnel_encap_enable(sk->sk_socket); - up->gro_enabled = valbool; - up->accept_udp_l4 = valbool; - release_sock(sk); + udp_tunnel_encap_enable(sk); + udp_assign_bit(GRO_ENABLED, sk, valbool); + udp_assign_bit(ACCEPT_L4, sk, valbool); break; /* @@ -2730,8 +2730,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcslen = val; - up->pcflag |= UDPLITE_SEND_CC; + WRITE_ONCE(up->pcslen, val); + udp_set_bit(UDPLITE_SEND_CC, sk); break; /* The receiver specifies a minimum checksum coverage value. To make @@ -2744,8 +2744,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcrlen = val; - up->pcflag |= UDPLITE_RECV_CC; + WRITE_ONCE(up->pcrlen, val); + udp_set_bit(UDPLITE_RECV_CC, sk); break; default: @@ -2783,19 +2783,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = READ_ONCE(up->corkflag); + val = udp_test_bit(CORK, sk); break; case UDP_ENCAP: - val = up->encap_type; + val = READ_ONCE(up->encap_type); break; case UDP_NO_CHECK6_TX: - val = up->no_check6_tx; + val = udp_get_no_check6_tx(sk); break; case UDP_NO_CHECK6_RX: - val = up->no_check6_rx; + val = udp_get_no_check6_rx(sk); break; case UDP_SEGMENT: @@ -2803,17 +2803,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - val = up->gro_enabled; + val = udp_test_bit(GRO_ENABLED, sk); break; /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: - val = up->pcslen; + val = READ_ONCE(up->pcslen); break; case UDPLITE_RECV_CSCOV: - val = up->pcrlen; + val = READ_ONCE(up->pcrlen); break; default: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0f46b3c2e4ac..6c95d28d0c4a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist) return call_gro_receive(udp_gro_receive_segment, head, skb); /* no GRO, be sure flush the current packet */ diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9b18f371af0d..1e7e4aecdc48 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; - udp_tunnel_encap_enable(sock); + udp_tunnel_encap_enable(sk); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39ecdad1b50c..af37af3ab727 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table); static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eac206a290d0..183f6dc37242 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce25bcb9974..421264a69e97 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -3011,8 +3011,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; psf = rcu_dereference(state->im->mca_sources); } out: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c687b357e6a..9d8dfc7423e4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 1, DST_OBSOLETE_FORCE_CHK, flags); + DST_OBSOLETE_FORCE_CHK, flags); if (rt) { rt6_info_init(rt); @@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 86b5d509a468..f60ba4295435 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -413,7 +413,7 @@ try_again: (struct sockaddr *)sin6); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) @@ -571,7 +571,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, @@ -688,7 +688,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udpv6_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -726,16 +727,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -858,7 +860,7 @@ start_lookup: /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; @@ -980,7 +982,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); - if (!uh->check && !udp_sk(sk)->no_check6_rx) { + if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; @@ -1002,7 +1004,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } @@ -1241,7 +1243,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (udp_sk(sk)->no_check6_tx) { + if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } @@ -1262,7 +1264,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, if (is_udplite) csum = udplite_csum(skb); - else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ + else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ @@ -1332,7 +1334,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1644,11 +1646,11 @@ static void udpv6_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } @@ -1670,7 +1672,7 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) { + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 267d491e9707..a60bec9b14f1 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,7 +17,6 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4907ab241d6b..4156387248e4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct ipv6hdr *ip6h; int len; int ip6hlen = sizeof(struct ipv6hdr); - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_udp_encap_rcv(sk, skb); + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 03608d3ded4b..8d21ff25f160 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk) switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ - (udp_sk(sk))->encap_type = 0; - (udp_sk(sk))->encap_rcv = NULL; - (udp_sk(sk))->encap_destroy = NULL; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); + udp_sk(sk)->encap_rcv = NULL; + udp_sk(sk)->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fd66014d8a76..5f8094acd056 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -873,7 +873,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -890,7 +890,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1e20bbd687f1..1424bfeaca73 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -375,9 +375,9 @@ out: static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 }, - [TCA_ROUTE4_TO] = { .type = NLA_U32 }, - [TCA_ROUTE4_FROM] = { .type = NLA_U32 }, - [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, + [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF), }; static int route4_set_parms(struct net *net, struct tcf_proto *tp, @@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_ROUTE4_TO]) { - if (new && handle & 0x8000) + if (new && handle & 0x8000) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } to = nla_get_u32(tb[TCA_ROUTE4_TO]); - if (to > 0xFF) - return -EINVAL; nhandle = to; } + if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM], + "'from' and 'fromif' are mutually exclusive"); + return -EINVAL; + } + if (tb[TCA_ROUTE4_FROM]) { - if (tb[TCA_ROUTE4_IIF]) - return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); - if (id > 0xFF) - return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); - if (id > 0x7FFF) - return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; - if (nhandle != handle) + if (nhandle != handle) { + NL_SET_ERR_MSG_FMT(extack, + "Handle mismatch constructed: %x (expected: %x)", + handle, nhandle); return -EINVAL; + } } if (!nhandle) { @@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct route4_filter __rcu **fp; struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; - struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; int err; @@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (opt == NULL) + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing options"); return -EINVAL; + } - err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS], route4_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a235890..ce63414185fd 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; @@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 020cf17ab7e4..013b65241b65 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1921,6 +1921,9 @@ out_err: err = total_written; } out: + if (sk->sk_type == SOCK_STREAM) + err = sk_stream_error(sk, msg->msg_flags, err); + release_sock(sk); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6b405782b63..c4c4fc29ccf5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2561,7 +2561,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json index 5272049566d6..a4a83fb3e96f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json @@ -1351,5 +1351,54 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "e470", + "name": "Try to delete class referenced by fw after a replace", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "ec1a", + "name": "Replace fw classid with nil", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 parent 10:", + "matchPattern": "fw chain 0 handle 0x1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json index 1f6f19f02997..8d8de8f65aef 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json @@ -177,5 +177,30 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "b042", + "name": "Try to delete class referenced by route after a replace", + "category": [ + "filter", + "route" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: prio 1 route from 10 classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: prio 1 route from 5 classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index bd64a4bf11ab..ddc7c355be0a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -247,5 +247,30 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "0c37", + "name": "Try to delete class referenced by u32 after a replace", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 90718c2fd4ea..148fc9c47c50 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -19,6 +19,7 @@ #include <time.h> #include <sys/mman.h> #include <poll.h> +#include <signal.h> #include "timeout.h" #include "control.h" @@ -1170,6 +1171,133 @@ static void test_seqpacket_msg_peek_server(const struct test_opts *opts) return test_msg_peek_server(opts, true); } +static sig_atomic_t have_sigpipe; + +static void sigpipe(int signo) +{ + have_sigpipe = 1; +} + +static void test_stream_check_sigpipe(int fd) +{ + ssize_t res; + + have_sigpipe = 0; + + res = send(fd, "A", 1, 0); + if (res != -1) { + fprintf(stderr, "expected send(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (!have_sigpipe) { + fprintf(stderr, "SIGPIPE expected\n"); + exit(EXIT_FAILURE); + } + + have_sigpipe = 0; + + res = send(fd, "A", 1, MSG_NOSIGNAL); + if (res != -1) { + fprintf(stderr, "expected send(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (have_sigpipe) { + fprintf(stderr, "SIGPIPE not expected\n"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_shutwr_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_WR)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutwr_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SHUTRDDONE"); + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_RD)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + control_writeln("SHUTRDDONE"); + control_expectln("CLIENTDONE"); + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1250,6 +1378,16 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_msg_peek_client, .run_server = test_seqpacket_msg_peek_server, }, + { + .name = "SOCK_STREAM SHUT_WR", + .run_client = test_stream_shutwr_client, + .run_server = test_stream_shutwr_server, + }, + { + .name = "SOCK_STREAM SHUT_RD", + .run_client = test_stream_shutrd_client, + .run_server = test_stream_shutrd_server, + }, {}, }; |