aboutsummaryrefslogtreecommitdiff
path: root/net/packet
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet')
-rw-r--r--net/packet/af_packet.c116
-rw-r--r--net/packet/diag.c12
-rw-r--r--net/packet/internal.h37
3 files changed, 90 insertions, 75 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d4e76e2ae153..568f8d76e3c1 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -270,8 +270,11 @@ static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
}
#endif
-static int packet_direct_xmit(struct sk_buff *skb)
+static int packet_xmit(const struct packet_sock *po, struct sk_buff *skb)
{
+ if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS))
+ return dev_queue_xmit(skb);
+
#ifdef CONFIG_NETFILTER_EGRESS
if (nf_hook_egress_active()) {
skb = nf_hook_direct_egress(skb);
@@ -305,11 +308,6 @@ static void packet_cached_dev_reset(struct packet_sock *po)
RCU_INIT_POINTER(po->cached_dev, NULL);
}
-static bool packet_use_direct_xmit(const struct packet_sock *po)
-{
- return po->xmit == packet_direct_xmit;
-}
-
static u16 packet_pick_tx_queue(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -339,14 +337,14 @@ static void __register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
- if (!po->running) {
+ if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
if (po->fanout)
__fanout_link(sk, po);
else
dev_add_pack(&po->prot_hook);
sock_hold(sk);
- po->running = 1;
+ packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1);
}
}
@@ -368,7 +366,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
lockdep_assert_held_once(&po->bind_lock);
- po->running = 0;
+ packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0);
if (po->fanout)
__fanout_unlink(sk, po);
@@ -388,7 +386,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
- if (po->running)
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
__unregister_prot_hook(sk, sync);
}
@@ -473,7 +471,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
struct timespec64 ts;
__u32 ts_status;
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+ if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp))))
return 0;
h.raw = frame;
@@ -1306,22 +1304,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po,
static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
- int pressure, ret;
+ bool pressure;
+ int ret;
ret = __packet_rcv_has_room(po, skb);
pressure = ret != ROOM_NORMAL;
- if (READ_ONCE(po->pressure) != pressure)
- WRITE_ONCE(po->pressure, pressure);
+ if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure)
+ packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure);
return ret;
}
static void packet_rcv_try_clear_pressure(struct packet_sock *po)
{
- if (READ_ONCE(po->pressure) &&
+ if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) &&
__packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
- WRITE_ONCE(po->pressure, 0);
+ packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false);
}
static void packet_sock_destruct(struct sock *sk)
@@ -1408,7 +1407,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
i = j = min_t(int, po->rollover->sock, num - 1);
do {
po_next = pkt_sk(rcu_dereference(f->arr[i]));
- if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
+ if (po_next != po_skip &&
+ !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) &&
packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
po->rollover->sock = i;
@@ -1781,7 +1781,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -EINVAL;
spin_lock(&po->bind_lock);
- if (po->running &&
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
@@ -2183,7 +2183,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_hatype = dev->type;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2308,7 +2308,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
netoff += sizeof(struct virtio_net_hdr);
do_vnet = true;
}
@@ -2402,7 +2402,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
* closer to the time of capture.
*/
ts_status = tpacket_get_timestamp(skb, &ts,
- po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+ READ_ONCE(po->tp_tstamp) |
+ SOF_TIMESTAMPING_SOFTWARE);
if (!ts_status)
ktime_get_real_ts64(&ts);
@@ -2460,7 +2461,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2621,8 +2622,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
nr_frags = skb_shinfo(skb)->nr_frags;
if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
- pr_err("Packet exceed the number of skb frags(%lu)\n",
- MAX_SKB_FRAGS);
+ pr_err("Packet exceed the number of skb frags(%u)\n",
+ (unsigned int)MAX_SKB_FRAGS);
return -EFAULT;
}
@@ -2670,7 +2671,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
return -EMSGSIZE;
}
- if (unlikely(po->tp_tx_has_off)) {
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) {
int off_min, off_max;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2778,7 +2779,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
+ if ((size_max > dev->mtu + reserve + VLAN_HLEN) &&
+ !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
size_max = dev->mtu + reserve + VLAN_HLEN;
reinit_completion(&po->skb_completion);
@@ -2807,7 +2809,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
vnet_hdr = data;
data += sizeof(*vnet_hdr);
tp_len -= sizeof(*vnet_hdr);
@@ -2835,13 +2837,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
addr, hlen, copylen, &sockc);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
- !po->has_vnet_hdr &&
+ !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
if (unlikely(tp_len < 0)) {
tpacket_error:
- if (po->tp_loss) {
+ if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) {
__packet_set_status(po, ph,
TP_STATUS_AVAILABLE);
packet_increment_head(&po->tx_ring);
@@ -2854,7 +2856,7 @@ tpacket_error:
}
}
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
tp_len = -EINVAL;
goto tpacket_error;
@@ -2867,7 +2869,7 @@ tpacket_error:
packet_inc_pending(&po->tx_ring);
status = TP_STATUS_SEND_REQUEST;
- err = po->xmit(skb);
+ err = packet_xmit(po, skb);
if (unlikely(err != 0)) {
if (err > 0)
err = net_xmit_errno(err);
@@ -2988,7 +2990,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
if (err)
goto out_unlock;
@@ -3070,7 +3072,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- err = po->xmit(skb);
+ err = packet_xmit(po, skb);
+
if (unlikely(err != 0)) {
if (err > 0)
err = net_xmit_errno(err);
@@ -3217,7 +3220,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
if (need_rehook) {
dev_hold(dev);
- if (po->running) {
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
rcu_read_unlock();
/* prevents packet_notifier() from calling
* register_prot_hook()
@@ -3230,7 +3233,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
dev->ifindex);
}
- BUG_ON(po->running);
+ BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING));
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
@@ -3352,7 +3355,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
- po->xmit = dev_queue_xmit;
err = packet_alloc_pending(po);
if (err)
@@ -3447,7 +3449,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
packet_rcv_try_clear_pressure(pkt_sk(sk));
- if (pkt_sk(sk)->has_vnet_hdr) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) {
err = packet_rcv_vnet(msg, skb, &len);
if (err)
goto out_free;
@@ -3511,7 +3513,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
- if (pkt_sk(sk)->auxdata) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
struct tpacket_auxdata aux;
aux.tp_status = TP_STATUS_USER;
@@ -3882,7 +3884,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
- po->tp_loss = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val);
ret = 0;
}
release_sock(sk);
@@ -3897,9 +3899,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->auxdata = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
return 0;
}
case PACKET_ORIGDEV:
@@ -3911,9 +3911,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->origdev = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
return 0;
}
case PACKET_VNET_HDR:
@@ -3931,7 +3929,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
- po->has_vnet_hdr = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val);
ret = 0;
}
release_sock(sk);
@@ -3946,7 +3944,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_tstamp = val;
+ WRITE_ONCE(po->tp_tstamp, val);
return 0;
}
case PACKET_FANOUT:
@@ -3993,7 +3991,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
lock_sock(sk);
if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
- po->tp_tx_has_off = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val);
release_sock(sk);
return 0;
@@ -4007,7 +4005,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
+ packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val);
return 0;
}
default:
@@ -4058,13 +4056,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
break;
case PACKET_AUXDATA:
- val = po->auxdata;
+ val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
break;
case PACKET_ORIGDEV:
- val = po->origdev;
+ val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
break;
case PACKET_VNET_HDR:
- val = po->has_vnet_hdr;
+ val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR);
break;
case PACKET_VERSION:
val = po->tp_version;
@@ -4094,10 +4092,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
val = po->tp_reserve;
break;
case PACKET_LOSS:
- val = po->tp_loss;
+ val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS);
break;
case PACKET_TIMESTAMP:
- val = po->tp_tstamp;
+ val = READ_ONCE(po->tp_tstamp);
break;
case PACKET_FANOUT:
val = (po->fanout ?
@@ -4119,10 +4117,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
- val = po->tp_tx_has_off;
+ val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF);
break;
case PACKET_QDISC_BYPASS:
- val = packet_use_direct_xmit(po);
+ val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS);
break;
default:
return -ENOPROTOOPT;
@@ -4157,7 +4155,7 @@ static int packet_notifier(struct notifier_block *this,
case NETDEV_DOWN:
if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock);
- if (po->running) {
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
__unregister_prot_hook(sk, false);
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
@@ -4468,7 +4466,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
/* Detach socket from network */
spin_lock(&po->bind_lock);
- was_running = po->running;
+ was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING);
num = po->num;
if (was_running) {
WRITE_ONCE(po->num, 0);
@@ -4679,7 +4677,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
s->sk_type,
ntohs(READ_ONCE(po->num)),
READ_ONCE(po->ifindex),
- po->running,
+ packet_sock_flag(po, PACKET_SOCK_RUNNING),
atomic_read(&s->sk_rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
sock_i_ino(s));
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 07812ae5ca07..de4ced5cf3e8 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -18,18 +18,18 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_version = po->tp_version;
pinfo.pdi_reserve = po->tp_reserve;
pinfo.pdi_copy_thresh = po->copy_thresh;
- pinfo.pdi_tstamp = po->tp_tstamp;
+ pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp);
pinfo.pdi_flags = 0;
- if (po->running)
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
pinfo.pdi_flags |= PDI_RUNNING;
- if (po->auxdata)
+ if (packet_sock_flag(po, PACKET_SOCK_AUXDATA))
pinfo.pdi_flags |= PDI_AUXDATA;
- if (po->origdev)
+ if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV))
pinfo.pdi_flags |= PDI_ORIGDEV;
- if (po->has_vnet_hdr)
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
pinfo.pdi_flags |= PDI_VNETHDR;
- if (po->tp_loss)
+ if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS))
pinfo.pdi_flags |= PDI_LOSS;
return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 48af35b1aed2..27930f69f368 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -116,13 +116,7 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
- unsigned int running; /* bind_lock must be held */
- unsigned int auxdata:1, /* writer must hold sock lock */
- origdev:1,
- has_vnet_hdr:1,
- tp_loss:1,
- tp_tx_has_off:1;
- int pressure;
+ unsigned long flags;
int ifindex; /* bound device */
__be16 num;
struct packet_rollover *rollover;
@@ -134,14 +128,37 @@ struct packet_sock {
unsigned int tp_tstamp;
struct completion skb_completion;
struct net_device __rcu *cached_dev;
- int (*xmit)(struct sk_buff *skb);
struct packet_type prot_hook ____cacheline_aligned_in_smp;
atomic_t tp_drops ____cacheline_aligned_in_smp;
};
-static inline struct packet_sock *pkt_sk(struct sock *sk)
+#define pkt_sk(ptr) container_of_const(ptr, struct packet_sock, sk)
+
+enum packet_sock_flags {
+ PACKET_SOCK_ORIGDEV,
+ PACKET_SOCK_AUXDATA,
+ PACKET_SOCK_TX_HAS_OFF,
+ PACKET_SOCK_TP_LOSS,
+ PACKET_SOCK_HAS_VNET_HDR,
+ PACKET_SOCK_RUNNING,
+ PACKET_SOCK_PRESSURE,
+ PACKET_SOCK_QDISC_BYPASS,
+};
+
+static inline void packet_sock_flag_set(struct packet_sock *po,
+ enum packet_sock_flags flag,
+ bool val)
+{
+ if (val)
+ set_bit(flag, &po->flags);
+ else
+ clear_bit(flag, &po->flags);
+}
+
+static inline bool packet_sock_flag(const struct packet_sock *po,
+ enum packet_sock_flags flag)
{
- return (struct packet_sock *)sk;
+ return test_bit(flag, &po->flags);
}
#endif