From 2d3db210860f1df099a35b1dd54cca35454e0361 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Tue, 29 Oct 2013 18:11:59 -0400 Subject: Revert "mac80211: allow disable power save in mesh" This reverts commit ee1f668136b2fb6640ee2d54c2a525ea41f98211. The aformentioned commit added a check to allow 'iw wlan0 set power_save off' to work for mesh interfaces. However, this is problematic because it also allows 'iw wlan0 set power_save on', which will crash in short order because all of the subsequent code manipulates sdata->u.mgd. The power-saving states for mesh interfaces can be manipulated through the mesh config, e.g: 'iw wlan0 set mesh_param mesh_power_save=active' (which, despite the name, actualy disables power saving since the setting refers to the type of sleep the interface undergoes). Cc: stable@vger.kernel.org Fixes: ee1f668136b2 ("mac80211: allow disable power save in mesh") Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 95667b088c5b..0ec245120a63 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2488,8 +2488,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) -- cgit v1.2.3 From 1fe4517cebc35ef900fa483d19c3090681f3c7bc Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 30 Oct 2013 16:09:33 +0100 Subject: cfg80211: fix ibss wext chandef creation The wext internal chandefs for ibss should be created using the cfg80211_chandef_create() functions. Initializing fields manually is error-prone. Reported-by: Dirk Gouders Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9d797df56649..89737ee2669a 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -262,7 +262,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_channel *new_chan = NULL; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -278,18 +278,19 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.center_freq1 = - chan->center_freq; + new_chan = chan; break; } - if (wdev->wext.ibss.chandef.chan) + if (new_chan) break; } - if (!wdev->wext.ibss.chandef.chan) + if (!new_chan) return -EINVAL; + + cfg80211_chandef_create(&wdev->wext.ibss.chandef, new_chan, + NL80211_CHAN_NO_HT); } /* don't join -- SSID is not there */ @@ -363,9 +364,8 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, return err; if (chan) { - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - wdev->wext.ibss.chandef.center_freq1 = freq; + cfg80211_chandef_create(&wdev->wext.ibss.chandef, chan, + NL80211_CHAN_NO_HT); wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ -- cgit v1.2.3 From 84a3d1c97d024acd1d27ebbc10cb95784b11f4e7 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Tue, 5 Nov 2013 14:48:46 +0100 Subject: mac80211: DFS setup chandef for radar_event correctly Setup chandef for radar event correctly, before we will clear this in ieee80211_dfs_cac_cancel() function. Without this patch mac80211 will report wrong channel width in case we will get radar event during active CAC. Signed-off-by: Janusz Dziedzic Reviewed-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/mac80211/util.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 592a18171f95..e9ce36d32ef5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2278,17 +2278,15 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = local->hw.conf.chandef; ieee80211_dfs_cac_cancel(local); if (local->use_chanctx) /* currently not handled */ WARN_ON(1); - else { - chandef = local->hw.conf.chandef; + else cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); - } } void ieee80211_radar_detected(struct ieee80211_hw *hw) -- cgit v1.2.3 From 18db594a1005d908d995a2fc8f5a7bf4286fdca0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Nov 2013 10:34:36 +0100 Subject: mac80211: fix scheduled scan rtnl deadlock When changing cfg80211 to use RTNL locking, this caused a deadlock in mac80211 as it calls cfg80211_sched_scan_stopped() from a work item that's on a workqueue that is flushed with the RTNL held. Fix this by simply using schedule_work(), the work only needs to finish running before the wiphy is unregistered, no other synchronisation (e.g. with suspend) is really required since for suspend userspace is already blocked anyway when we flush the workqueue so will only pick up the event after resume. Cc: stable@vger.kernel.org Fixes: 5fe231e87372 ("cfg80211: vastly simplify locking") Reported-and-tested-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/main.c | 1 + net/mac80211/scan.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 21d5d44444d0..e765f77bb97a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1047,6 +1047,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); + flush_work(&local->sched_scan_stopped_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5ad66a83ef7f..bcc4833d7542 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1088,6 +1088,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) trace_api_sched_scan_stopped(local); - ieee80211_queue_work(&local->hw, &local->sched_scan_stopped_work); + schedule_work(&local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped); -- cgit v1.2.3 From ae917c9f55862691e31b84de7ec29bedcb83971c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:05:22 +0200 Subject: nl80211: check nla_put_* return values Coverity pointed out that in a few functions we don't check the return value of the nla_put_*() calls. Most of these are fairly harmless because the input isn't very dynamic and controlled by the kernel, but the pattern is simply wrong, so fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 52 +++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a1eb21073176..0ffb18371376 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9633,8 +9633,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; - if (req->flags) - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + if (req->flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + goto nla_put_failure; return 0; nla_put_failure: @@ -11118,16 +11119,18 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; - if (wakeup->tcp_match) - nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + if (wakeup->tcp_match && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) + goto free_msg; - if (wakeup->tcp_connlost) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + if (wakeup->tcp_connlost && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) + goto free_msg; - if (wakeup->tcp_nomoretokens) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->tcp_nomoretokens && + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) + goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; @@ -11263,24 +11266,29 @@ void cfg80211_ft_event(struct net_device *netdev, return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto out; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) + goto out; - nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); - if (ft_event->ies) - nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); - if (ft_event->ric_ies) - nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, - ft_event->ric_ies); + if (ft_event->ies && + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) + goto out; + if (ft_event->ric_ies && + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies)) + goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); + return; + out: + nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); -- cgit v1.2.3 From 9fe271af7d4de96471c5aaee2f4d0d1576050497 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:15:12 +0200 Subject: nl80211: fix error path in nl80211_get_key() Coverity pointed out that in the (practically impossible) error case we leak the message - fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0ffb18371376..f1370ed9f498 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2687,7 +2687,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) - return -ENOBUFS; + goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; -- cgit v1.2.3 From 7fa322c878d70e38675f50e17acdce7fa3f5ac8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:16:58 +0200 Subject: nl80211: check nla_nest_start() return value Coverity pointed out that we might dereference NULL later if nla_nest_start() returns a failure. This isn't really true since we'd bomb out before, but we should check the return value directly, so do that. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f1370ed9f498..e20c27ff0f14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11094,6 +11094,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct nlattr *reasons; reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + if (!reasons) + goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) -- cgit v1.2.3 From 57fb089f480d199e4275da086d407b978de67214 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 8 Nov 2013 17:31:37 +0100 Subject: mac80211: fix crash when using AP VLAN interfaces Commit "mac80211: implement SMPS for AP" applies to AP_VLAN as well. It assumes that sta->sdata->vif.bss_conf.bssid is present, which did not get set for AP_VLAN. Initialize it to sdata->vif.addr like for other interface types. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ff101ea1d9ae..36c3a4cbcabf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1325,7 +1325,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; break; case NL80211_IFTYPE_AP_VLAN: - break; case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; -- cgit v1.2.3 From 6c751ef8a1a15d633cd755eafa86ede9c32b2617 Mon Sep 17 00:00:00 2001 From: Javier Lopez Date: Wed, 6 Nov 2013 10:04:29 -0800 Subject: mac80211: fix for mesh beacon update on powersave Mesh beacon was not being rebuild after user triggered a mesh powersave change. To solve this issue use ieee80211_mbss_info_change_notify instead of ieee80211_bss_info_change_notify. This helper function forces mesh beacon to be rebuild and then notifies the driver about the beacon change. Signed-off-by: Javier Lopez Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0ec245120a63..9e7e68d7b1a7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1368,7 +1368,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); #endif } -- cgit v1.2.3 From 351df099721e02e1a25a498268e52c0378c0e272 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 13 Nov 2013 23:07:07 +0100 Subject: mac80211: minstrel_ht: fix rates selection When initializing rates selections starting indexes upon stats update, the minstrel_sta->max_* rates should be 'group * MCS_GROUP_RATES + i' not 'i'. This affects settings where one of the peers does not support any of the rates of the group 0 (i.e. when ht_cap.mcs.rx_mask[0] == 0). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5d60779a0c1b..47aa6f81566b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -277,13 +277,15 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!(mg->supported & BIT(i))) continue; + index = MCS_GROUP_RATES * group + i; + /* initialize rates selections starting indexes */ if (!mg_rates_valid) { mg->max_tp_rate = mg->max_tp_rate2 = mg->max_prob_rate = i; if (!mi_rates_valid) { mi->max_tp_rate = mi->max_tp_rate2 = - mi->max_prob_rate = i; + mi->max_prob_rate = index; mi_rates_valid = true; } mg_rates_valid = true; @@ -291,7 +293,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = &mg->rates[i]; mr->retry_updated = false; - index = MCS_GROUP_RATES * group + i; minstrel_calc_rate_ewma(mr); minstrel_ht_calc_tp(mi, group, i); -- cgit v1.2.3 From 9f16d84ad73ea04145a5dc85c8f1067915b37eea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 17 Nov 2013 10:37:34 +0100 Subject: cfg80211: disable 5/10 MHz support for all drivers Due to nl80211 API breakage, 5/10 MHz support is broken for all drivers. Fixing it requires adding new API, but that can't be done as a bugfix commit since that would require either updating all APIs in the trees needing the bugfix or cause different kernels to have incompatible API. Therefore, just disable 5/10 MHz support for all drivers. Cc: stable@vger.kernel.org [3.12] Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index aff959e5a1b3..00a65ba3aeaa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -451,6 +451,9 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ + wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.3 From 051a41fa4ee14f5c39668f0980973b9a195de560 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Nov 2013 11:28:27 +0100 Subject: mac80211: don't attempt to reorder multicast frames Multicast frames can't be transmitted as part of an aggregation session (such a session couldn't even be set up) so don't try to reorder them. Trying to do so would cause the reorder to stop working correctly since multicast QoS frames (as transmitted by the Aruba APs this was found with) would cause sequence number confusion in the buffer. Cc: stable@vger.kernel.org Reported-by: Blaise Gassend Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index caecef870c0e..2b0debb0422b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -911,7 +911,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, u16 sc; u8 tid, ack_policy; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* -- cgit v1.2.3 From 3f718fd8401d7db86b9efc3ea1cdf5df41354b9f Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 8 Nov 2013 15:09:43 +0800 Subject: mac80211: fix the mesh channel switch support Mesh STA receiving the mesh CSA action frame is not able to trigger the mesh channel switch due to the incorrect handling and comparison of mesh channel switch parameters element (MCSP)'s TTL. Make sure the MCSP's TTL is updated accordingly before calling the ieee80211_mesh_process_chnswitch. Also, we update the beacon before forwarding the CSA action frame, so MCSP's precedence value and initiator flag need to be updated prior to this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 +++++++++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.c | 20 ++++++++++++-------- net/mac80211/spectmgmt.c | 2 ++ net/mac80211/util.c | 5 ----- 5 files changed, 24 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9e7e68d7b1a7..364ce0c5962f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3119,9 +3119,17 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, params->chandef.chan->band) return -EINVAL; + ifmsh->chsw_init = true; + if (!ifmsh->pre_value) + ifmsh->pre_value = 1; + else + ifmsh->pre_value++; + err = ieee80211_mesh_csa_beacon(sdata, params, true); - if (err < 0) + if (err < 0) { + ifmsh->chsw_init = false; return err; + } break; #endif default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 29dc505be125..4aea4e791113 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1228,6 +1228,7 @@ struct ieee80211_csa_ie { u8 mode; u8 count; u8 ttl; + u16 pre_value; }; /* Parsed Information Elements */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 896fe3bd599e..ba105257d03f 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -943,14 +943,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, params.chandef.chan->center_freq); params.block_tx = csa_ie.mode & WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; - if (beacon) + if (beacon) { ifmsh->chsw_ttl = csa_ie.ttl - 1; - else - ifmsh->chsw_ttl = 0; + if (ifmsh->pre_value >= csa_ie.pre_value) + return false; + ifmsh->pre_value = csa_ie.pre_value; + } - if (ifmsh->chsw_ttl > 0) + if (ifmsh->chsw_ttl < ifmsh->mshcfg.dot11MeshTTL) { if (ieee80211_mesh_csa_beacon(sdata, ¶ms, false) < 0) return false; + } else { + return false; + } sdata->csa_radar_required = params.radar_required; @@ -1163,7 +1168,6 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, offset_ttl = (len < 42) ? 7 : 10; *(pos + offset_ttl) -= 1; *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; - sdata->u.mesh.chsw_ttl = *(pos + offset_ttl); memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1182,7 +1186,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u16 pre_value; bool fwd_csa = true; size_t baselen; - u8 *pos, ttl; + u8 *pos; if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) @@ -1193,8 +1197,8 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u.action.u.chan_switch.variable); ieee802_11_parse_elems(pos, len - baselen, false, &elems); - ttl = elems.mesh_chansw_params_ie->mesh_ttl; - if (!--ttl) + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!--ifmsh->chsw_ttl) fwd_csa = false; pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index a40da20b32e0..6ab009070084 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -78,6 +78,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; + csa_ie->pre_value = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_pre_value); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e9ce36d32ef5..9f9b9bd3fd44 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2457,14 +2457,9 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); /* Reason Cd */ pos += 2; - if (!ifmsh->pre_value) - ifmsh->pre_value = 1; - else - ifmsh->pre_value++; pre_value = cpu_to_le16(ifmsh->pre_value); memcpy(pos, &pre_value, 2); /* Precedence Value */ pos += 2; - ifmsh->chsw_init = true; } ieee80211_tx_skb(sdata, skb); -- cgit v1.2.3 From 12b5f34d2d5934e998975bbae4e29f81d94052f6 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 18 Nov 2013 19:06:46 +0200 Subject: mac80211: fix connection polling Commit 392b9ff ("mac80211: change beacon/connection polling") removed the IEEE80211_STA_BEACON_POLL flag. However, it accidentally removed the setting of IEEE80211_STA_CONNECTION_POLL, making the connection polling completely useless (the flag is always clear, so the result is never being checked). Fix it. Signed-off-by: Eliad Peller Acked-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7504ab61a34..b3a3ce316656 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1910,6 +1910,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) already = true; + ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; + mutex_unlock(&sdata->local->mtx); if (already) -- cgit v1.2.3 From 1b09cd82d8c479700ef6185665839d1020b02519 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Nov 2013 19:40:41 +0100 Subject: cfg80211: ignore supported rates for nonexistant bands on scan Fixes wpa_supplicant p2p_find on 5GHz-only devices Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e20c27ff0f14..138dc3bb8b67 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5349,6 +5349,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } + + if (!wiphy->bands[band]) + continue; + err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), -- cgit v1.2.3 From 5664da4429c177495256f958194c241625074ec0 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 20 Nov 2013 19:13:35 +0100 Subject: mac80211: use capped prob when computing throughputs Commit 3e8b1eb "mac80211/minstrel_ht: improve rate selection stability" introduced a local capped prob in minstrel_ht_calc_tp but omitted to use it to compute the per rate throughput. Signed-off-by: Karl Beldan Cc: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 47aa6f81566b..4096ff6cc24f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -226,7 +226,7 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((mr->probability * 1000) / nsecs); + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } -- cgit v1.2.3 From 24d47300d118c5909a51b7270276d749cce150a2 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Tue, 19 Nov 2013 17:12:05 +0100 Subject: mac80211: set hw initial idle state ATM, the first call of ieee80211_do_open will configure the hw as non-idle, even if the interface being brought up is not a monitor, and this leads to inconsistent sequences like: register_hw() do_open(sta) hw_config(non-idle) (.. sta is non-idle ..) scan(sta) hw_config(idle) (after scan finishes) do_stop(sta) do_open(sta) (.. sta is idle ..) Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e765f77bb97a..7d1c3ac48ed9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -940,6 +940,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + local->hw.conf.flags = IEEE80211_CONF_IDLE; + ieee80211_led_init(local); rtnl_lock(); -- cgit v1.2.3 From b49faea7655ec10ade15d7d007e4218ca578a513 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 14 Nov 2013 10:41:01 -0500 Subject: netfilter: ipset: fix incorret comparison in hash_netnet4_data_equal() Both sides of the comparison are the same, looks like a cut-and-paste error. Spotted by Coverity. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 2bc2dec20b00..6226803fc490 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -59,7 +59,7 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, u32 *multi) { return ip1->ipcmp == ip2->ipcmp && - ip2->ccmp == ip2->ccmp; + ip1->ccmp == ip2->ccmp; } static inline int -- cgit v1.2.3 From dda444d52496aa8ddc501561bca580f1374a96a9 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:07:26 +0100 Subject: cfg80211: disable CSA for all drivers The channel switch announcement code has some major locking problems which can cause a deadlock in worst case. A series of fixes has been proposed, but these are non-trivial and need to be tested first. Therefore disable CSA completely for 3.13. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 00a65ba3aeaa..52b865fb7351 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -454,6 +454,12 @@ int wiphy_register(struct wiphy *wiphy) /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + /* + * There are major locking problems in nl80211/mac80211 for CSA, + * disable for all drivers until this has been reworked. + */ + wiphy->flags &= ~WIPHY_FLAG_HAS_CHANNEL_SWITCH; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.3 From 0834ae3c3af44480834cce128b6fef83006e537f Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:45:18 +0100 Subject: mac80211: check csa wiphy flag in ibss before switching When external CSA IEs are received (beacons or action messages), a channel switch is triggered as well. This should only be allowed on devices which actually support channel switches, otherwise disconnect. (For the corresponding userspace invocation, the wiphy flag is checked in nl80211). Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 531be040b9ae..27a39de89679 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -823,6 +823,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* channel switch is not supported, disconnect */ + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) + goto disconnect; + params.count = csa_ie.count; params.chandef = csa_ie.chandef; -- cgit v1.2.3 From 30e56918dd1e6d64350661f186657f6a6f2646e6 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Tue, 26 Nov 2013 15:46:56 +0800 Subject: ipv6: judge the accept_ra_defrtr before calling rt6_route_rcv when dealing with a RA message, if accept_ra_defrtr is false, the kernel will not add the default route, and then deal with the following route information options. Unfortunately, those options maybe contain default route, so let's judge the accept_ra_defrtr before calling rt6_route_rcv. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3512177deb4d..300865171394 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1277,6 +1277,9 @@ skip_linkparms: ri->prefix_len == 0) continue; #endif + if (ri->prefix_len == 0 && + !in6_dev->cnf.accept_ra_defrtr) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, -- cgit v1.2.3 From 57ec0afe293834f8ca9499214ed74748d89eaa44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois-Xavier=20Le=20Bail?= Date: Mon, 2 Dec 2013 11:28:49 +0100 Subject: ipv6: fix third arg of anycast_dst_alloc(), must be bool. Signed-off-by: Francois-Xavier Le Bail Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12c97d8aa6bb..d5fa5b8c443e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2613,7 +2613,7 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->rt) continue; - sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); /* Failure cases are ignored */ if (!IS_ERR(sp_rt)) { -- cgit v1.2.3 From 7150aede5dd241539686e17d9592f5ebd28a2cda Mon Sep 17 00:00:00 2001 From: Kamala R Date: Mon, 2 Dec 2013 19:55:21 +0530 Subject: IPv6: Fixed support for blackhole and prohibit routes The behaviour of blackhole and prohibit routes has been corrected by setting the input and output pointers of the dst variable appropriately. For blackhole routes, they are set to dst_discard and to ip6_pkt_discard and ip6_pkt_discard_out respectively for prohibit routes. ipv6: ip6_pkt_prohibit(_out) should not depend on CONFIG_IPV6_MULTIPLE_TABLES We need ip6_pkt_prohibit(_out) available without CONFIG_IPV6_MULTIPLE_TABLES Signed-off-by: Kamala R Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7faa9d5e1503..ddb9d41c8eea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -234,9 +236,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -1565,21 +1564,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -2144,8 +2146,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); @@ -2157,8 +2157,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ -- cgit v1.2.3 From 18fc25c94eadc52a42c025125af24657a93638c0 Mon Sep 17 00:00:00 2001 From: Venkat Venkatsubra Date: Mon, 2 Dec 2013 15:41:39 -0800 Subject: rds: prevent BUG_ON triggered on congestion update to loopback After congestion update on a local connection, when rds_ib_xmit returns less bytes than that are there in the message, rds_send_xmit calls back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE) to trigger. For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually the message contains 8240 bytes. rds_send_xmit thinks there is more to send and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header) =4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k]. The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f "rds: prevent BUG_ON triggering on congestion map updates" introduced this regression. That change was addressing the triggering of a different BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE: BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); This was the sequence it was going through: (rds_ib_xmit) /* Do not send cong updates to IB loopback */ if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; } rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time) = 8192 c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again and so on (c_xmit_data_off 24672,32912,41152,49392,57632) rds_ib_xmit returns 8240 On this iteration this sequence causes the BUG_ON in rds_send_xmit: while (ret) { tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off); [tmp = 65536 - 57632 = 7904] conn->c_xmit_data_off += tmp; [c_xmit_data_off = 57632 + 7904 = 65536] ret -= tmp; [ret = 8240 - 7904 = 336] if (conn->c_xmit_data_off == sg->length) { conn->c_xmit_data_off = 0; sg++; conn->c_xmit_sg++; BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); [c_xmit_sg = 1, rm->data.op_nents = 1] What the current fix does: Since the congestion update over loopback is not actually transmitted as a message, all that rds_ib_xmit needs to do is let the caller think the full message has been transmitted and not return partial bytes. It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb. And 64Kb+48 when page size is 64Kb. Reported-by: Josh Hunt Tested-by: Honggang Li Acked-by: Bang Nguyen Signed-off-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- net/rds/ib_send.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index e59094981175..37be6e226d1b 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); scat = &rm->data.op_sg[sg]; - ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; - ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); - return ret; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ -- cgit v1.2.3 From 76c82d7a3d24a4ae1f9b098287c18055546c1a47 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:52 -0500 Subject: net_sched: Fail if missing mandatory action operation methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd7072827a40..618695e84190 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -270,6 +270,10 @@ int tcf_register_action(struct tc_action_ops *act) { struct tc_action_ops *a, **ap; + /* Must supply act, dump, cleanup and init */ + if (!act->act || !act->dump || !act->cleanup || !act->init) + return -EINVAL; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -381,7 +385,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, } while ((a = act) != NULL) { repeat: - if (a->ops && a->ops->act) { + if (a->ops) { ret = a->ops->act(skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ @@ -405,7 +409,7 @@ void tcf_action_destroy(struct tc_action *act, int bind) struct tc_action *a; for (a = act; a; a = act) { - if (a->ops && a->ops->cleanup) { + if (a->ops) { if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -424,7 +428,7 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; return a->ops->dump(skb, a, bind, ref); } @@ -436,7 +440,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; if (nla_put_string(skb, TCA_KIND, a->ops->kind)) -- cgit v1.2.3 From 63ef6174654a986f263d25e957ef9d1ff243f649 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:53 -0500 Subject: net_sched: Default action lookup method for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 618695e84190..d1a022e441be 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,6 +274,9 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + if (!act->lookup) + act->lookup = tcf_hash_search; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -727,8 +730,6 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (a->ops == NULL) goto err_free; - if (a->ops->lookup == NULL) - goto err_mod; err = -ENOENT; if (a->ops->lookup(a, index) == 0) goto err_mod; -- cgit v1.2.3 From 43c00dcf8888daea234226e8adf09c37b00d2245 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:54 -0500 Subject: net_sched: Use default action lookup functions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_police.c | 1 - 7 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3a4c0caa1f7d..4225a9382a2b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -585,7 +585,6 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, - .lookup = tcf_hash_search, .init = tcf_csum_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index fd2b3cff5fa2..15851da99f3b 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -206,7 +206,6 @@ static struct tc_action_ops act_gact_ops = { .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, - .lookup = tcf_hash_search, .init = tcf_gact_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60d88b6b9560..1d3e19180c2e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -298,7 +298,6 @@ static struct tc_action_ops act_ipt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 977c10e0631b..6cb16ec3d624 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -271,7 +271,6 @@ static struct tc_action_ops act_mirred_ops = { .act = tcf_mirred, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, - .lookup = tcf_hash_search, .init = tcf_mirred_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 876f0ef29694..30c13dedc94d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -308,7 +308,6 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat, .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, - .lookup = tcf_hash_search, .init = tcf_nat_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7ed78c9e505c..ab4fc56f8852 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -243,7 +243,6 @@ static struct tc_action_ops act_pedit_ops = { .act = tcf_pedit, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, - .lookup = tcf_hash_search, .init = tcf_pedit_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 272d8e924cf6..16a62c36928a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -407,7 +407,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; -- cgit v1.2.3 From 382ca8a1ad8963c7676585f9e25f4c5ff8b28439 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:55 -0500 Subject: net_sched: Provide default walker function for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d1a022e441be..69cb848e8345 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,8 +274,11 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + /* Supply defaults */ if (!act->lookup) act->lookup = tcf_hash_search; + if (!act->walk) + act->walk = tcf_generic_walker; write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { @@ -1089,12 +1092,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; - if (a_o->walk == NULL) { - WARN(1, "tc_dump_action: %s !capable of dumping table\n", - a_o->kind); - goto out_module_put; - } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) -- cgit v1.2.3 From 651a6493ae5c055c78777bb7178c23b5565631da Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:56 -0500 Subject: net_sched: Use default action walker methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_simple.c | 1 - net/sched/act_skbedit.c | 1 - 8 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4225a9382a2b..5c5edf56adbd 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -586,7 +586,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, .init = tcf_csum_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 15851da99f3b..5645a4d32abd 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -207,7 +207,6 @@ static struct tc_action_ops act_gact_ops = { .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, .init = tcf_gact_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1d3e19180c2e..882a89762f77 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -299,7 +299,6 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; static struct tc_action_ops act_xt_ops = { @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6cb16ec3d624..252378121ce7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -272,7 +272,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, .init = tcf_mirred_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 30c13dedc94d..6a15ace00241 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -309,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, .init = tcf_nat_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ab4fc56f8852..03b67674169c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -244,7 +244,6 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7725eb4ab756..31157d3e729c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,6 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cb4221171f93..35ea643b4325 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -203,7 +203,6 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Alexander Duyck, "); -- cgit v1.2.3 From 78ac814f120da17053b3d52aa215c7c547c5e77d Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 4 Dec 2013 17:32:39 +0800 Subject: sctp: disable max_burst when the max_burst is 0 As Michael pointed out that when max_burst is 0, it just disable max_burst. It declared in rfc6458#section-8.1.24. so add the check in sctp_transport_burst_limited, when it 0, just do nothing. Reviewed-by: Daniel Borkmann Suggested-by: Vlad Yasevich Suggested-by: Michael Tuexen Signed-off-by: Wang Weidong Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e332efb124cc..efc46ffed1fd 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -573,7 +573,7 @@ void sctp_transport_burst_limited(struct sctp_transport *t) u32 old_cwnd = t->cwnd; u32 max_burst_bytes; - if (t->burst_limited) + if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); -- cgit v1.2.3 From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f4..f7e522c558ba 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* -- cgit v1.2.3 From 239c78db9c41a8f524cce60507440d72229d73bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 5 Dec 2013 23:29:19 +0100 Subject: net: clear local_df when passing skb between namespaces We must clear local_df when passing the skb between namespaces as the packet is not local to the new namespace any more and thus may not get fragmented by local rules. Fred Templin noticed that other namespaces do fragment IPv6 packets while forwarding. Instead they should have send back a PTB. The same problem should be present when forwarding DF-IPv4 packets between namespaces. Reported-by: Templin, Fred L Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8c..06e72d3cdf60 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3584,6 +3584,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; + skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); -- cgit v1.2.3 From 859828c0ea476b42f3a93d69d117aaba90994b6f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 5 Dec 2013 16:27:37 +0100 Subject: br: fix use of ->rx_handler_data in code executed on non-rx_handler path br_stp_rcv() is reached by non-rx_handler path. That means there is no guarantee that dev is bridge port and therefore simple NULL check of ->rx_handler_data is not enough. There is need to check if dev is really bridge port and since only rcu read lock is held here, do it by checking ->rx_handler pointer. Note that synchronize_net() in netdev_rx_handler_unregister() ensures this approach as valid. Introduced originally by: commit f350a0a87374418635689471606454abc7beaa3a "bridge: use rx_handler_data pointer to store net_bridge_port pointer" Fixed but not in the best way by: commit b5ed54e94d324f17c97852296d61a143f01b227a "bridge: fix RCU races with bridge port" Reintroduced by: commit 716ec052d2280d511e10e90ad54a86f5b5d4dcc2 "bridge: fix NULL pointer deref of br_port_get_rcu" Please apply to stable trees as well. Thanks. RH bugzilla reference: https://bugzilla.redhat.com/show_bug.cgi?id=1025770 Reported-by: Laine Stump Debugged-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_stp_bpdu.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 229d820bdf0b..045d56eaeca2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -426,6 +426,16 @@ netdev_features_t br_features_recompute(struct net_bridge *br, int br_handle_frame_finish(struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8660ea3be705..bdb459d21ad8 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - p = br_port_get_rcu(dev); + p = br_port_get_check_rcu(dev); if (!p) goto err; -- cgit v1.2.3 From b4ef4ce09308955d1aa54a289c0162607b3aa16c Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 6 Dec 2013 10:57:19 +0200 Subject: netfilter: xt_hashlimit: fix proc entry leak in netns destroy path In (32263dd1b netfilter: xt_hashlimit: fix namespace destroy path) the hashlimit_net_exit() function is always called right before hashlimit_mt_destroy() to release netns data. If you use xt_hashlimit with IPv4 and IPv6 together, this produces the following splat via netconsole in the netns destroy path: Pid: 9499, comm: kworker/u:0 Tainted: G WC O 3.2.0-5-netctl-amd64-core2 Call Trace: [] ? warn_slowpath_common+0x78/0x8c [] ? warn_slowpath_fmt+0x45/0x4a [] ? remove_proc_entry+0xd8/0x22e [] ? kfree+0x5b/0x6c [] ? hashlimit_net_exit+0x45/0x8d [xt_hashlimit] [] ? ops_exit_list+0x1c/0x44 [] ? cleanup_net+0xf1/0x180 [] ? should_resched+0x5/0x23 [] ? process_one_work+0x161/0x269 [] ? cwq_activate_delayed_work+0x3c/0x48 [] ? worker_thread+0xc2/0x145 [] ? manage_workers.isra.25+0x15b/0x15b [] ? kthread+0x76/0x7e [] ? kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x139/0x139 [] ? gs_change+0x13/0x13 ---[ end trace d8c3cc0ad163ef79 ]--- ------------[ cut here ]------------ WARNING: at /usr/src/linux-3.2.52/debian/build/source_netctl/fs/proc/generic.c:849 remove_proc_entry+0x217/0x22e() Hardware name: remove_proc_entry: removing non-empty directory 'net/ip6t_hashlimit', leaking at least 'IN-REJECT' This is due to lack of removal net/ip6t_hashlimit/* entries in hashlimit_proc_net_exit(), since only IPv4 entries are deleted. Fix it by always removing the IPv4 and IPv6 entries and their parent directories in the netns destroy path. Signed-off-by: Sergey Popovich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9ff035c71403..a3910fc2122b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -325,21 +325,24 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) +static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); struct proc_dir_entry *parent; - del_timer_sync(&hinfo->timer); - if (hinfo->family == NFPROTO_IPV4) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - if(parent != NULL) + if (parent != NULL) remove_proc_entry(hinfo->name, parent); +} +static void htable_destroy(struct xt_hashlimit_htable *hinfo) +{ + del_timer_sync(&hinfo->timer); + htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); vfree(hinfo); @@ -883,21 +886,15 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - /* recent_net_exit() is called before recent_mt_destroy(). Make sure - * that the parent xt_recent proc entry is is empty before trying to - * remove it. + /* hashlimit_net_exit() is called before hashlimit_mt_destroy(). + * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc + * entries is empty before trying to remove it. */ mutex_lock(&hashlimit_mutex); - pde = hashlimit_net->ipt_hashlimit; - if (pde == NULL) - pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) - remove_proc_entry(hinfo->name, pde); - + htable_remove_proc_entry(hinfo); hashlimit_net->ipt_hashlimit = NULL; hashlimit_net->ip6t_hashlimit = NULL; mutex_unlock(&hashlimit_mutex); -- cgit v1.2.3 From cf9dc09d0949f0b5953fb08caa10bba0dc7ee71f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 24 Nov 2013 20:39:10 +0100 Subject: netfilter: nf_tables: fix missing rules flushing per table This patch allows you to atomically remove all rules stored in a table via the NFT_MSG_DELRULE command. You only need to indicate the specific table and no chain to flush all rules stored in that table. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcddc49c0e08..f93b7d06f4be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1717,6 +1717,19 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) return -ENOENT; } +static int nf_table_delrule_by_chain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &ctx->chain->rules, list) { + err = nf_tables_delrule_one(ctx, rule); + if (err < 0) + return err; + } + return 0; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1725,8 +1738,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); const struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; + struct nft_chain *chain = NULL; + struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; @@ -1738,22 +1751,29 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); - if (IS_ERR(chain)) - return PTR_ERR(chain); + if (nla[NFTA_RULE_CHAIN]) { + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_HANDLE]) { - rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) - return PTR_ERR(rule); + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, + nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule); - } else { - /* Remove all rules in this chain */ - list_for_each_entry_safe(rule, tmp, &chain->rules, list) { err = nf_tables_delrule_one(&ctx, rule); + } else { + err = nf_table_delrule_by_chain(&ctx); + } + } else { + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + err = nf_table_delrule_by_chain(&ctx); if (err < 0) break; } -- cgit v1.2.3 From 66e56cd46b93ef407c60adcac62cf33b06119d50 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:15 +0100 Subject: packet: fix send path when running with proto == 0 Commit e40526cb20b5 introduced a cached dev pointer, that gets hooked into register_prot_hook(), __unregister_prot_hook() to update the device used for the send path. We need to fix this up, as otherwise this will not work with sockets created with protocol = 0, plus with sll_protocol = 0 passed via sockaddr_ll when doing the bind. So instead, assign the pointer directly. The compiler can inline these helper functions automagically. While at it, also assume the cached dev fast-path as likely(), and document this variant of socket creation as it seems it is not widely used (seems not even the author of TX_RING was aware of that in his reference example [1]). Tested with reproducer from e40526cb20b5. [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released") Signed-off-by: Daniel Borkmann Tested-by: Salam Noureddine Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/packet/af_packet.c | 65 +++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba2548bd85bf..88cfbc189558 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,30 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -246,12 +270,10 @@ static void register_prot_hook(struct sock *sk) struct packet_sock *po = pkt_sk(sk); if (!po->running) { - if (po->fanout) { + if (po->fanout) __fanout_link(sk, po); - } else { + else dev_add_pack(&po->prot_hook); - rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); - } sock_hold(sk); po->running = 1; @@ -270,12 +292,11 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) { + + if (po->fanout) __fanout_unlink(sk, po); - } else { + else __dev_remove_pack(&po->prot_hook); - RCU_INIT_POINTER(po->cached_dev, NULL); - } __sock_put(sk); @@ -2059,19 +2080,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } -static struct net_device *packet_cached_dev_get(struct packet_sock *po) -{ - struct net_device *dev; - - rcu_read_lock(); - dev = rcu_dereference(po->cached_dev); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - - return dev; -} - static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2088,7 +2096,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2242,7 +2250,7 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2451,6 +2459,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2506,14 +2516,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2626,7 +2639,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; - RCU_INIT_POINTER(po->cached_dev, NULL); + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -3337,6 +3351,7 @@ static int packet_notifier(struct notifier_block *this, sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit v1.2.3 From a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 7 Dec 2013 03:33:45 +0100 Subject: ipv6: don't count addrconf generated routes against gc limit Brett Ciphery reported that new ipv6 addresses failed to get installed because the addrconf generated dsts where counted against the dst gc limit. We don't need to count those routes like we currently don't count administratively added routes. Because the max_addresses check enforces a limit on unbounded address generation first in case someone plays with router advertisments, we are still safe here. Reported-by: Brett Ciphery Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ddb9d41c8eea..a0a48ac3403f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2166,12 +2166,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); -- cgit v1.2.3 From d323e92cc3f4edd943610557c9ea1bb4bb5056e8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 8 Dec 2013 09:36:56 -0500 Subject: net: drop_monitor: fix the value of maxattr maxattr in genl_family should be used to save the max attribute type, but not the max command type. Drop monitor doesn't support any attributes, so we should leave it as zero. Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 95897183226e..e70301eb7a4a 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); -- cgit v1.2.3 From 673498b8ed4c4d4b7221c5309d891c5eac2b7528 Mon Sep 17 00:00:00 2001 From: Stefan Tomanek Date: Tue, 10 Dec 2013 23:21:25 +0100 Subject: inet: fix NULL pointer Oops in fib(6)_rule_suppress This changes ensures that the routing entry investigated by the suppress function actually does point to a device struct before following that pointer, fixing a possible kernel oops situation when verifying the interface group associated with a routing table entry. According to Daniel Golle, this Oops can be triggered by a user process trying to establish an outgoing IPv6 connection while having no real IPv6 connectivity set up (only autoassigned link-local addresses). Fixes: 6ef94cfafba15 ("fib_rules: add route suppression based on ifgroup") Reported-by: Daniel Golle Tested-by: Daniel Golle Signed-off-by: Stefan Tomanek Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 5 ++++- net/ipv6/fib6_rules.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 523be38e37de..f2e15738534d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -104,7 +104,10 @@ errout: static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; - struct net_device *dev = result->fi->fib_dev; + struct net_device *dev = NULL; + + if (result->fi) + dev = result->fi->fib_dev; /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f92..3fd0a578329e 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ -- cgit v1.2.3 From 12663bfc97c8b3fdb292428105dd92d563164050 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 7 Dec 2013 17:26:27 -0500 Subject: net: unix: allow set_peek_off to fail unix_dgram_recvmsg() will hold the readlock of the socket until recv is complete. In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until unix_dgram_recvmsg() will complete (which can take a while) without allowing us to break out of it, triggering a hung task spew. Instead, allow set_peek_off to fail, this way userspace will not hang. Signed-off-by: Sasha Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/unix/af_unix.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index ab20ed9b0f31..5393b4b719d7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -882,7 +882,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 01625ccc3ae6..a0ca162e5bd5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -530,13 +530,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); -static void unix_set_peek_off(struct sock *sk, int val) +static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - mutex_lock(&u->readlock); + if (mutex_lock_interruptible(&u->readlock)) + return -EINTR; + sk->sk_peek_off = val; mutex_unlock(&u->readlock); + + return 0; } -- cgit v1.2.3 From 993b858e37b3120ee76d9957a901cca22312ffaa Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 9 Dec 2013 22:54:46 -0800 Subject: tipc: correct the order of stopping services at rmmod The 'signal handler' service in TIPC is a mechanism that makes it possible to postpone execution of functions, by launcing them into a job queue for execution in a separate tasklet, independent of the launching execution thread. When we do rmmod on the tipc module, this service is stopped after the network service. At the same time, the stopping of the network service may itself launch jobs for execution, with the risk that these functions may be scheduled for execution after the data structures meant to be accessed by the job have already been deleted. We have seen this happen, most often resulting in an oops. This commit ensures that the signal handler is the very first to be stopped when TIPC is shut down, so there are no surprises during the cleanup of the other services. Signed-off-by: Jon Maloy Reviewed-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index fd4eeeaa972a..c6d3f75a9e1b 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -113,7 +113,6 @@ err: static void tipc_core_stop(void) { tipc_netlink_stop(); - tipc_handler_stop(); tipc_cfg_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); @@ -146,9 +145,10 @@ static int tipc_core_start(void) res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); - if (res) + if (res) { + tipc_handler_stop(); tipc_core_stop(); - + } return res; } @@ -178,6 +178,7 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { + tipc_handler_stop(); tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); -- cgit v1.2.3 From 00ede977098be3296d42d05a4265ec5ec4a28419 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 9 Dec 2013 22:54:47 -0800 Subject: tipc: protect handler_enabled variable with qitem_lock spin lock 'handler_enabled' is a global flag indicating whether the TIPC signal handling service is enabled or not. The lack of lock protection for this flag incurs a risk for contention, so that a tipc_k_signal() call might queue a signal handler to a destroyed signal queue, with unpredictable results. To correct this, we let the already existing 'qitem_lock' protect the flag, as it already does with the queue itself. This way, we ensure that the flag always is consistent across all cores. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/handler.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/handler.c b/net/tipc/handler.c index b36f0fcd9bdf..e4bc8a296744 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -56,12 +56,13 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) { struct queue_item *item; + spin_lock_bh(&qitem_lock); if (!handler_enabled) { pr_err("Signal request ignored by handler\n"); + spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } - spin_lock_bh(&qitem_lock); item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); if (!item) { pr_err("Signal queue out of memory\n"); @@ -112,10 +113,14 @@ void tipc_handler_stop(void) struct list_head *l, *n; struct queue_item *item; - if (!handler_enabled) + spin_lock_bh(&qitem_lock); + if (!handler_enabled) { + spin_unlock_bh(&qitem_lock); return; - + } handler_enabled = 0; + spin_unlock_bh(&qitem_lock); + tasklet_kill(&tipc_tasklet); spin_lock_bh(&qitem_lock); -- cgit v1.2.3 From 9f70f46bd4c7267d48ef461a1d613ec9ec0d520c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 10 Dec 2013 06:48:15 -0500 Subject: sctp: properly latch and use autoclose value from sock to association Currently, sctp associations latch a sockets autoclose value to an association at association init time, subject to capping constraints from the max_autoclose sysctl value. This leads to an odd situation where an application may set a socket level autoclose timeout, but sliently sctp will limit the autoclose timeout to something less than that. Fix this by modifying the autoclose setsockopt function to check the limit, cap it and warn the user via syslog that the timeout is capped. This will allow getsockopt to return valid autoclose timeout values that reflect what subsequent associations actually use. While were at it, also elimintate the assoc->autoclose variable, it duplicates whats in the timeout array, which leads to multiple sources for the same information, that may differ (as the former isn't subject to any capping). This gives us the timeout information in a canonical place and saves some space in the association structure as well. Signed-off-by: Neil Horman Acked-by: Vlad Yasevich CC: Wang Weidong CC: David Miller CC: Vlad Yasevich CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/sctp/associola.c | 5 +---- net/sctp/output.c | 3 ++- net/sctp/sm_statefuns.c | 12 ++++++------ net/sctp/socket.c | 4 ++++ 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68a27f9796d2..31ed008c8e13 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -154,8 +154,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -291,8 +290,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.ipv6_address = 1; INIT_LIST_HEAD(&asoc->asocs); - asoc->autoclose = sp->autoclose; - asoc->default_stream = sp->default_stream; asoc->default_ppid = sp->default_ppid; asoc->default_flags = sp->default_flags; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0e2644d0a773..0fb140f8f088 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -581,7 +581,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned long timeout; /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { + if (sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dfe3f36ff2aa..a26065be7289 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -820,7 +820,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) + if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -908,7 +908,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2970,7 +2970,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) force = SCTP_FORCE(); - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -3878,7 +3878,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, SCTP_CHUNK(chunk)); /* Count this as receiving DATA. */ - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -5267,7 +5267,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5346,7 +5346,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72046b9729a8..5455043f4496 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2196,6 +2196,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -2205,6 +2206,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + if (sp->autoclose > net->sctp.max_autoclose) + sp->autoclose = net->sctp.max_autoclose; + return 0; } -- cgit v1.2.3 From ce7a3bdf18a8dbcba1409f5d335c56fde432ca89 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Tue, 10 Dec 2013 15:15:46 +0100 Subject: ipv6: do not erase dst address with flow label destination This patch is following b579035ff766c9412e2b92abf5cab794bff102b6 "ipv6: remove old conditions on flow label sharing" Since there is no reason to restrict a label to a destination, we should not erase the destination value of a socket with the value contained in the flow label storage. This patch allows to really have the same flow label to more than one destination. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 - net/ipv6/datagram.c | 1 - net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/l2tp/l2tp_ip6.c | 1 - 6 files changed, 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4ac71ff7c2e4..2b90a786e475 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -851,7 +851,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8dfe1f4d3c1a..93b1aa34c432 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; } } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7fb4e14c467f..b6bb87e55805 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -792,7 +792,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0740f93a114a..f67033b4bb66 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -156,7 +156,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bcd5699313c3..089c741a3992 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1140,7 +1140,6 @@ do_udp_sendmsg: flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d9b437e55007..bb6e206ea70b 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -528,7 +528,6 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } -- cgit v1.2.3 From 85f935d41af097a87067367be66de52896b964e1 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:38 +0800 Subject: sctp: check the rto_min and rto_max in setsockopt When we set 0 to rto_min or rto_max, just not change the value. Also we should check the rto_min > rto_max. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5455043f4496..42b709c95cf3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2815,6 +2815,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne { struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; + unsigned long rto_min, rto_max; + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; @@ -2828,26 +2830,36 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) return -EINVAL; + rto_max = rtoinfo.srto_max; + rto_min = rtoinfo.srto_min; + + if (rto_max) + rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; + else + rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max; + + if (rto_min) + rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min; + else + rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min; + + if (rto_min > rto_max) + return -EINVAL; + if (asoc) { if (rtoinfo.srto_initial != 0) asoc->rto_initial = msecs_to_jiffies(rtoinfo.srto_initial); - if (rtoinfo.srto_max != 0) - asoc->rto_max = msecs_to_jiffies(rtoinfo.srto_max); - if (rtoinfo.srto_min != 0) - asoc->rto_min = msecs_to_jiffies(rtoinfo.srto_min); + asoc->rto_max = rto_max; + asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - struct sctp_sock *sp = sctp_sk(sk); - if (rtoinfo.srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo.srto_initial; - if (rtoinfo.srto_max != 0) - sp->rtoinfo.srto_max = rtoinfo.srto_max; - if (rtoinfo.srto_min != 0) - sp->rtoinfo.srto_min = rtoinfo.srto_min; + sp->rtoinfo.srto_max = rto_max; + sp->rtoinfo.srto_min = rto_min; } return 0; -- cgit v1.2.3 From 4f3fdf3bc59cafd14c3bc2c2369efad34c7aa8b5 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:39 +0800 Subject: sctp: add check rto_min and rto_max in sysctl rto_min should be smaller than rto_max while rto_max should be larger than rto_min. Add two proc_handler for the checking. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b36561a1b3b..43b5e3243871 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -61,6 +61,13 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -102,17 +109,17 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_sctp_do_rto_min, .extra1 = &one, - .extra2 = &timer_max + .extra2 = &init_net.sctp.rto_max }, { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .proc_handler = proc_sctp_do_rto_max, + .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, { @@ -342,6 +349,60 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, return ret; } +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_min = new_value; + } + return ret; +} + +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_max = new_value; + } + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; -- cgit v1.2.3 From b486b2289e40797e386a18048a66b535206a463b Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:40 +0800 Subject: sctp: fix up a spacing fix up spacing of proc_sctp_do_hmac_alg for according to the proc_sctp_do_rto_min[max] in sysctl.c Suggested-by: Daniel Borkmann Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 43b5e3243871..b0565afb61c7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -56,10 +56,8 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, - loff_t *ppos); static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, @@ -301,8 +299,7 @@ static struct ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -- cgit v1.2.3 From 8afdd99a1315e759de04ad6e2344f0c5f17ecb1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2013 18:07:23 -0800 Subject: udp: ipv4: fix an use after free in __udp4_lib_rcv() Dave Jones reported a use after free in UDP stack : [ 5059.434216] ========================= [ 5059.434314] [ BUG: held lock freed! ] [ 5059.434420] 3.13.0-rc3+ #9 Not tainted [ 5059.434520] ------------------------- [ 5059.434620] named/863 is freeing memory ffff88005e960000-ffff88005e96061f, with a lock still held there! [ 5059.434815] (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435012] 3 locks held by named/863: [ 5059.435086] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x11d/0x940 [ 5059.435295] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x3e/0x410 [ 5059.435500] #2: (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435734] stack backtrace: [ 5059.435858] CPU: 0 PID: 863 Comm: named Not tainted 3.13.0-rc3+ #9 [loadavg: 0.21 0.06 0.06 1/115 1365] [ 5059.436052] Hardware name: /D510MO, BIOS MOPNV10J.86A.0175.2010.0308.0620 03/08/2010 [ 5059.436223] 0000000000000002 ffff88007e203ad8 ffffffff8153a372 ffff8800677130e0 [ 5059.436390] ffff88007e203b10 ffffffff8108cafa ffff88005e960000 ffff88007b00cfc0 [ 5059.436554] ffffea00017a5800 ffffffff8141c490 0000000000000246 ffff88007e203b48 [ 5059.436718] Call Trace: [ 5059.436769] [] dump_stack+0x4d/0x66 [ 5059.436904] [] debug_check_no_locks_freed+0x15a/0x160 [ 5059.437037] [] ? __sk_free+0x110/0x230 [ 5059.437147] [] kmem_cache_free+0x6a/0x150 [ 5059.437260] [] __sk_free+0x110/0x230 [ 5059.437364] [] sk_free+0x19/0x20 [ 5059.437463] [] sock_edemux+0x25/0x40 [ 5059.437567] [] sock_queue_rcv_skb+0x81/0x280 [ 5059.437685] [] ? udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.437805] [] __udp_queue_rcv_skb+0x42/0x240 [ 5059.437925] [] ? _raw_spin_lock+0x65/0x70 [ 5059.438038] [] udp_queue_rcv_skb+0x26b/0x4b0 [ 5059.438155] [] __udp4_lib_rcv+0x152/0xb00 [ 5059.438269] [] udp_rcv+0x15/0x20 [ 5059.438367] [] ip_local_deliver_finish+0x10f/0x410 [ 5059.438492] [] ? ip_local_deliver_finish+0x3e/0x410 [ 5059.438621] [] ip_local_deliver+0x43/0x80 [ 5059.438733] [] ip_rcv_finish+0x140/0x5a0 [ 5059.438843] [] ip_rcv+0x296/0x3f0 [ 5059.438945] [] __netif_receive_skb_core+0x742/0x940 [ 5059.439074] [] ? __netif_receive_skb_core+0x11d/0x940 [ 5059.442231] [] ? trace_hardirqs_on+0xd/0x10 [ 5059.442231] [] __netif_receive_skb+0x13/0x60 [ 5059.442231] [] netif_receive_skb+0x1e/0x1f0 [ 5059.442231] [] napi_gro_receive+0x70/0xa0 [ 5059.442231] [] rtl8169_poll+0x166/0x700 [r8169] [ 5059.442231] [] net_rx_action+0x129/0x1e0 [ 5059.442231] [] __do_softirq+0xed/0x240 [ 5059.442231] [] irq_exit+0x125/0x140 [ 5059.442231] [] do_IRQ+0x51/0xc0 [ 5059.442231] [] common_interrupt+0x6f/0x6f We need to keep a reference on the socket, by using skb_steal_sock() at the right place. Note that another patch is needed to fix a race in udp_sk_rx_dst_set(), as we hold no lock protecting the dst. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44f6a20fa29d..2e2aecbe22c4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -1739,15 +1735,15 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { int ret; - sk = skb->sk; if (unlikely(sk->sk_rx_dst == NULL)) udp_sk_rx_dst_set(sk, skb); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ -- cgit v1.2.3 From cc106e441a63bec3b1cb72948df82ea15945c449 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:27 +0800 Subject: net: sched: tbf: fix the calculation of max_size Current max_size is caluated from rate table. Now, the rate table has been replaced and it's wrong to caculate max_size based on this rate table. It can lead wrong calculation of max_size. The burst in kernel may be lower than user asked, because burst may gets some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s") and it seems we cannot avoid this loss. Burst's value(max_size) based on rate table may be equal user asked. If a packet's length is max_size, this packet will be stalled in tbf_dequeue() because its length is above the burst in kernel so that it cannot get enough tokens. The max_size guards against enqueuing packet sizes above q->buffer "time" in tbf_enqueue(). To make consistent with the calculation of tokens, this patch add a helper psched_ns_t2l() to calculate burst(max_size) directly to fix this problem. After this fix, we can support to using 64bit rates to calculate burst as well. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 115 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a6090051c5db..a44928c6ba24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -118,6 +118,30 @@ struct tbf_sched_data { }; +/* Time to Length, convert time in ns to length in bytes + * to determinate how many bytes can be sent in given time. + */ +static u64 psched_ns_t2l(const struct psched_ratecfg *r, + u64 time_in_ns) +{ + /* The formula is : + * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC + */ + u64 len = time_in_ns * r->rate_bytes_ps; + + do_div(len, NSEC_PER_SEC); + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + len = (len / 53) * 48; + + if (len > r->overhead) + len -= r->overhead; + else + len = 0; + + return len; +} + /* * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) @@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; - struct qdisc_rate_table *rtab = NULL; - struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; - int max_size, n; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + u64 max_size; + s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); @@ -304,38 +329,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); - rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); - if (rtab == NULL) - goto done; - - if (qopt->peakrate.rate) { - if (qopt->peakrate.rate > qopt->rate.rate) - ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); - if (ptab == NULL) - goto done; - } - - for (n = 0; n < 256; n++) - if (rtab->data[n] > qopt->buffer) - break; - max_size = (n << qopt->rate.cell_log) - 1; - if (ptab) { - int size; - - for (n = 0; n < 256; n++) - if (ptab->data[n] > qopt->mtu) - break; - size = (n << qopt->peakrate.cell_log) - 1; - if (size < max_size) - max_size = size; - } - if (max_size < 0) - goto done; + if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, + tb[TCA_TBF_RTAB])); - if (max_size < psched_mtu(qdisc_dev(sch))) - pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", - max_size, qdisc_dev(sch)->name, - psched_mtu(qdisc_dev(sch))); + if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, + tb[TCA_TBF_PTAB])); if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); @@ -349,6 +349,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) } } + buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); + mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); + + if (tb[TCA_TBF_RATE64]) + rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); + psched_ratecfg_precompute(&rate, &qopt->rate, rate64); + + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + + if (qopt->peakrate.rate) { + if (tb[TCA_TBF_PRATE64]) + prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); + psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); + if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { + pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", + peak.rate_bytes_ps, rate.rate_bytes_ps); + err = -EINVAL; + goto done; + } + + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } + + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + + if (!max_size) { + err = -EINVAL; + goto done; + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); @@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - if (tb[TCA_TBF_RATE64]) - rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); - psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); - if (ptab) { - if (tb[TCA_TBF_PRATE64]) - prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); - psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); + memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); + if (qopt->peakrate.rate) { + memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); q->peak_present = true; } else { q->peak_present = false; @@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); err = 0; done: - if (rtab) - qdisc_put_rtab(rtab); - if (ptab) - qdisc_put_rtab(ptab); return err; } -- cgit v1.2.3 From 1598f7cb4745c40467decc91ee44ec615773eb45 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:28 +0800 Subject: net: sched: htb: fix the calculation of quantum Now, 32bit rates may be not the true rate. So use rate_bytes_ps which is from max(rate32, rate64) to calcualte quantum. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0e1e38b40025..717b2108f852 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1477,11 +1477,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); } + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); + /* it used to be a nasty bug here, we have to check that node * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = hopt->rate.rate / q->rate2quantum; + u64 quantum = cl->rate.rate_bytes_ps; + + do_div(quantum, q->rate2quantum); + cl->quantum = min_t(u64, quantum, INT_MAX); + if (!hopt->quantum && cl->quantum < 1000) { pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", @@ -1500,13 +1511,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; - - psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); - cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); -- cgit v1.2.3 From 610438b74496b2986a9025f8e23c134cb638e338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 08:10:05 -0800 Subject: udp: ipv4: fix potential use after free in udp_v4_early_demux() pskb_may_pull() can reallocate skb->head, we need to move the initialization of iph and uh pointers after its call. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e2aecbe22c4..16d246a51a02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,17 +1909,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, -- cgit v1.2.3 From 975022310233fb0f0193873d79a7b8438070fa82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 14:46:51 -0800 Subject: udp: ipv4: must add synchronization in udp_sk_rx_dst_set() Unlike TCP, UDP input path does not hold the socket lock. Before messing with sk->sk_rx_dst, we must use a spinlock, otherwise multiple cpus could leak a refcount. This patch also takes care of renewing a stale dst entry. (When the sk->sk_rx_dst would not be used by IP early demux) Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d246a51a02..62c19fdd102d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1599,12 +1599,21 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use sk_dst_lock to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; - dst_hold(dst); - sk->sk_rx_dst = dst; + spin_lock(&sk->sk_dst_lock); + old = sk->sk_rx_dst; + if (likely(old != dst)) { + dst_hold(dst); + sk->sk_rx_dst = dst; + dst_release(old); + } + spin_unlock(&sk->sk_dst_lock); } /* @@ -1737,10 +1746,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = skb_steal_sock(skb); if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3 From d55d282e6af88120ad90e93a88f70e3116dc0e3d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 12 Dec 2013 10:57:22 +0800 Subject: sch_tbf: use do_div() for 64-bit divide It's doing a 64-bit divide which is not supported on 32-bit architectures in psched_ns_t2l(). The correct way to do this is to use do_div(). It's introduced by commit cc106e441a63 ("net: sched: tbf: fix the calculation of max_size") Reported-by: kbuild test robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a44928c6ba24..887e672f9d7d 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -131,8 +131,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, do_div(len, NSEC_PER_SEC); - if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) - len = (len / 53) * 48; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { + do_div(len, 53); + len = len * 48; + } if (len > r->overhead) len -= r->overhead; -- cgit v1.2.3