net: lockless sock_i_ino()

Followup of commit c51da3f7a1 ("net: remove sock_i_uid()")

A recent syzbot report was the trigger for this change.

Over the years, we had many problems caused by the
read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid().

We could fix smc_diag_dump_proto() or make a more radical move:

Instead of waiting for new syzbot reports, cache the socket
inode number in sk->sk_ino, so that we no longer
need to acquire sk->sk_callback_lock in sock_i_ino().

This makes socket dumps faster (one less cache line miss,
and two atomic ops avoided).

Prior art:

commit 25a9c8a443 ("netlink: Add __sock_i_ino() for __netlink_diag_dump().")
commit 4f9bf2a2f5 ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.")
commit efc3dbc374 ("rds: Make rds_sock_lock BH rather than IRQ safe.")

Fixes: d2d6422f8b ("x86: Allow to enable PREEMPT_RT.")
Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2025-09-02 18:36:03 +00:00 committed by Jakub Kicinski
parent b4ada0618e
commit 5d6b58c932
4 changed files with 14 additions and 28 deletions

View File

@ -285,6 +285,7 @@ struct sk_filter;
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
* @sk_ino: inode number (zero if orphaned)
* @sk_prefer_busy_poll: prefer busypolling over softirq processing
* @sk_busy_poll_budget: napi processing budget when busypolling
* @sk_priority: %SO_PRIORITY setting
@ -518,6 +519,7 @@ struct sock {
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
unsigned long sk_ino;
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk)
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
sk->sk_socket = sock;
if (sock) {
WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
}
}
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk)
sk_set_socket(sk, NULL);
sk->sk_wq = NULL;
/* Note: sk_uid is unchanged. */
WRITE_ONCE(sk->sk_ino, 0);
write_unlock_bh(&sk->sk_callback_lock);
}
@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
rcu_assign_pointer(sk->sk_wq, &parent->wq);
parent->sk = sk;
sk_set_socket(sk, parent);
WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
security_sock_graft(sk, parent);
write_unlock_bh(&sk->sk_callback_lock);
}
static inline unsigned long sock_i_ino(const struct sock *sk)
{
/* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
return READ_ONCE(sk->sk_ino);
}
static inline kuid_t sk_uid(const struct sock *sk)
{
/* Paired with WRITE_ONCE() in sockfs_setattr() */
return READ_ONCE(sk->sk_uid);
}
unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
{
return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);

View File

@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
unsigned long __sock_i_ino(struct sock *sk)
{
unsigned long ino;
read_lock(&sk->sk_callback_lock);
ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
read_unlock(&sk->sk_callback_lock);
return ino;
}
EXPORT_SYMBOL(__sock_i_ino);
unsigned long sock_i_ino(struct sock *sk)
{
unsigned long ino;
local_bh_disable();
ino = __sock_i_ino(sk);
local_bh_enable();
return ino;
}
EXPORT_SYMBOL(sock_i_ino);
/*
* Allocate a skb from the socket's send buffer.
*/

View File

@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
write_lock_bh(&sk->sk_callback_lock);
rcu_assign_pointer(sk->sk_wq, &parent->wq);
sk_set_socket(sk, parent);
WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
write_unlock_bh(&sk->sk_callback_lock);
}

View File

@ -168,7 +168,7 @@ mc_list:
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
__sock_i_ino(sk)) < 0) {
sock_i_ino(sk)) < 0) {
ret = 1;
break;
}