aboutsummaryrefslogtreecommitdiff
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h60
1 files changed, 41 insertions, 19 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 573f2bf7e0de..11d503417591 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -336,6 +336,7 @@ struct sk_filter;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -428,6 +429,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;
struct sk_filter __rcu *sk_filter;
union {
@@ -1150,8 +1152,12 @@ static inline void sock_rps_record_flow(const struct sock *sk)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
*/
- if (sk->sk_state == TCP_ESTABLISHED)
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
}
#endif
}
@@ -1160,20 +1166,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->hash))
- sk->sk_rxhash = skb->hash;
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in sock_rps_record_flow().
+ */
+ if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
+ WRITE_ONCE(sk->sk_rxhash, skb->hash);
#endif
}
static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
- sk->sk_rxhash = 0;
+ /* Paired with READ_ONCE() in sock_rps_record_flow() */
+ WRITE_ONCE(sk->sk_rxhash, 0);
#endif
}
#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1183,6 +1194,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
@@ -1246,7 +1258,7 @@ struct proto {
bool kern);
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
int (*init)(struct sock *sk);
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
@@ -1265,8 +1277,7 @@ struct proto {
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len);
- int (*sendpage)(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
+ void (*splice_eof)(struct socket *sock);
int (*bind)(struct sock *sk,
struct sockaddr *addr, int addr_len);
int (*bind_add)(struct sock *sk,
@@ -1312,6 +1323,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1328,6 +1340,7 @@ struct proto {
struct kmem_cache *slab;
unsigned int obj_size;
+ unsigned int ipv6_pinfo_offset;
slab_flags_t slab_flags;
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
@@ -1409,6 +1422,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1418,7 +1437,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1495,7 +1514,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
@@ -1906,10 +1925,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
int sock_no_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma);
-ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
- size_t size, int flags);
-ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
/*
* Functions to fill in entries in struct proto_ops when a protocol
@@ -2088,6 +2103,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
}
kuid_t sock_i_uid(struct sock *sk);
+unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
@@ -2131,7 +2147,7 @@ sk_dst_get(struct sock *sk)
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ if (dst && !rcuref_get(&dst->__rcuref))
dst = NULL;
rcu_read_unlock();
return dst;
@@ -2697,7 +2713,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
else
sock_write_timestamp(sk, kt);
- if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
+ if (sock_flag(sk, SOCK_WIFI_STATUS) && skb_wifi_acked_valid(skb))
__sock_recv_wifi_status(msg, sk, skb);
}
@@ -2718,7 +2734,7 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
- else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
+ else if (unlikely(sock_read_timestamp(sk) == SK_DEFAULT_STAMP))
sock_write_timestamp(sk, 0);
}
@@ -2806,20 +2822,23 @@ sk_is_refcounted(struct sock *sk)
* skb_steal_sock - steal a socket from an sk_buff
* @skb: sk_buff to steal the socket from
* @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
*/
static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted)
+skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
{
if (skb->sk) {
struct sock *sk = skb->sk;
*refcounted = true;
- if (skb_sk_is_prefetched(skb))
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched)
*refcounted = sk_is_refcounted(sk);
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
+ *prefetched = false;
*refcounted = false;
return NULL;
}
@@ -2961,6 +2980,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
sockptr_t optval, int optlen, bool old_timeval);
+int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
+ void __user *arg, void *karg, size_t size);
+int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
if (sk->sk_prot->sock_is_readable)