一、总述
#
- recvmsg可以像普通的recv一样接收消息,也可以其他进程提供的文件句柄
二、代码流程
#
1// net/socket.c
2long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
3 bool forbid_cmsg_compat)
4{
5 int fput_needed, err;
6 struct msghdr msg_sys;
7 struct socket *sock;
8
9 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
10 return -EINVAL;
11
12 sock = sockfd_lookup_light(fd, &err, &fput_needed);
13 if (!sock)
14 goto out;
15
16 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
17
18 fput_light(sock->file, fput_needed);
19out:
20 return err;
21}
22
23SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
24 unsigned int, flags)
25{
26 return __sys_recvmsg(fd, msg, flags, true);
27}
1// net/socket.c
2static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
3 struct msghdr *msg_sys, unsigned int flags, int nosec)
4{
5 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
6 /* user mode address pointers */
7 struct sockaddr __user *uaddr;
8 ssize_t err;
9
10 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
11 if (err < 0)
12 return err;
13
14 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
15 kfree(iov);
16 return err;
17}
1// net/socket.c
2static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
3 struct user_msghdr __user *msg,
4 struct sockaddr __user *uaddr,
5 unsigned int flags, int nosec)
6{
7 struct compat_msghdr __user *msg_compat =
8 (struct compat_msghdr __user *) msg;
9 int __user *uaddr_len = COMPAT_NAMELEN(msg);
10 struct sockaddr_storage addr;
11 unsigned long cmsg_ptr;
12 int len;
13 ssize_t err;
14
15 msg_sys->msg_name = &addr;
16 cmsg_ptr = (unsigned long)msg_sys->msg_control;
17 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
18
19 /* We assume all kernel code knows the size of sockaddr_storage */
20 msg_sys->msg_namelen = 0;
21
22 if (sock->file->f_flags & O_NONBLOCK)
23 flags |= MSG_DONTWAIT;
24
25 // 这里从socket中接收数据
26 if (unlikely(nosec))
27 err = sock_recvmsg_nosec(sock, msg_sys, flags);
28 else
29 err = sock_recvmsg(sock, msg_sys, flags);
30
31 if (err < 0)
32 goto out;
33 len = err;
34
35 if (uaddr != NULL) {
36 err = move_addr_to_user(&addr,
37 msg_sys->msg_namelen, uaddr,
38 uaddr_len);
39 if (err < 0)
40 goto out;
41 }
42 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
43 COMPAT_FLAGS(msg));
44 if (err)
45 goto out;
46 // 兼容64位和32位,拷贝内核空间数据到用户空间
47 if (MSG_CMSG_COMPAT & flags)
48 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
49 &msg_compat->msg_controllen);
50 else
51 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
52 &msg->msg_controllen);
53 if (err)
54 goto out;
55 err = len;
56out:
57 return err;
58}
59
60// net/socket.c
61/**
62 * sock_recvmsg - receive a message from @sock
63 * @sock: socket
64 * @msg: message to receive
65 * @flags: message flags
66 *
67 * Receives @msg from @sock, passing through LSM. Returns the total number
68 * of bytes received, or an error.
69 */
70int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
71{
72 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
73
74 return err ?: sock_recvmsg_nosec(sock, msg, flags);
75}
76EXPORT_SYMBOL(sock_recvmsg);
unix套接字
#
dgram udp的接收
#
1// net/unix/af_unix.c
2static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3 int flags)
4{
5 struct sock *sk = sock->sk;
6
7#ifdef CONFIG_BPF_SYSCALL
8 const struct proto *prot = READ_ONCE(sk->sk_prot);
9
10 if (prot != &unix_dgram_proto)
11 return prot->recvmsg(sk, msg, size, flags, NULL);
12#endif
13 return __unix_dgram_recvmsg(sk, msg, size, flags);
14}
15
16// net/unix/af_unix.c
17int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
18 int flags)
19{
20 struct scm_cookie scm;
21 struct socket *sock = sk->sk_socket;
22 struct unix_sock *u = unix_sk(sk);
23 struct sk_buff *skb, *last;
24 long timeo;
25 int skip;
26 int err;
27
28 err = -EOPNOTSUPP;
29 if (flags&MSG_OOB)
30 goto out;
31
32 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
33 // 带超时时间的接收
34 do {
35 mutex_lock(&u->iolock);
36
37 skip = sk_peek_offset(sk, flags);
38 // 这里是从sock结构体的接收队列取消息
39 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
40 &skip, &err, &last);
41 if (skb) {
42 if (!(flags & MSG_PEEK))
43 scm_stat_del(sk, skb);
44 break;
45 }
46
47 mutex_unlock(&u->iolock);
48
49 if (err != -EAGAIN)
50 break;
51 } while (timeo &&
52 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
53 &err, &timeo, last));
54
55 if (!skb) { /* implies iolock unlocked */
56 unix_state_lock(sk);
57 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
58 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
59 (sk->sk_shutdown & RCV_SHUTDOWN))
60 err = 0;
61 unix_state_unlock(sk);
62 goto out;
63 }
64
65 if (wq_has_sleeper(&u->peer_wait))
66 wake_up_interruptible_sync_poll(&u->peer_wait,
67 EPOLLOUT | EPOLLWRNORM |
68 EPOLLWRBAND);
69
70 if (msg->msg_name)
71 unix_copy_addr(msg, skb->sk);
72
73 if (size > skb->len - skip)
74 size = skb->len - skip;
75 else if (size < skb->len - skip)
76 msg->msg_flags |= MSG_TRUNC;
77
78 err = skb_copy_datagram_msg(skb, skip, msg, size);
79 if (err)
80 goto out_free;
81
82 if (sock_flag(sk, SOCK_RCVTSTAMP))
83 __sock_recv_timestamp(msg, sk, skb);
84
85 memset(&scm, 0, sizeof(scm));
86
87 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
88 // 将skb结构体还原成scm结构体
89 unix_set_secdata(&scm, skb);
90
91 if (!(flags & MSG_PEEK)) {
92 if (UNIXCB(skb).fp)
93 unix_detach_fds(&scm, skb);
94
95 sk_peek_offset_bwd(sk, skb->len);
96 } else {
97 /* It is questionable: on PEEK we could:
98 - do not return fds - good, but too simple 8)
99 - return fds, and do not return them on read (old strategy,
100 apparently wrong)
101 - clone fds (I chose it for now, it is the most universal
102 solution)
103
104 POSIX 1003.1g does not actually define this clearly
105 at all. POSIX 1003.1g doesn't define a lot of things
106 clearly however!
107
108 */
109
110 sk_peek_offset_fwd(sk, size);
111
112 if (UNIXCB(skb).fp)
113 unix_peek_fds(&scm, skb);
114 }
115 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
116 // 这里处理scm结构体数据
117 scm_recv(sock, msg, &scm, flags);
118
119out_free:
120 skb_free_datagram(sk, skb);
121 mutex_unlock(&u->iolock);
122out:
123 return err;
124}
1// include/net/scm.h
2static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
3 struct scm_cookie *scm, int flags)
4{
5 if (!msg->msg_control) {
6 if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp)
7 msg->msg_flags |= MSG_CTRUNC;
8 scm_destroy(scm);
9 return;
10 }
11
12 if (test_bit(SOCK_PASSCRED, &sock->flags)) {
13 struct user_namespace *current_ns = current_user_ns();
14 struct ucred ucreds = {
15 .pid = scm->creds.pid,
16 .uid = from_kuid_munged(current_ns, scm->creds.uid),
17 .gid = from_kgid_munged(current_ns, scm->creds.gid),
18 };
19 put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
20 }
21
22 scm_destroy_cred(scm);
23
24 scm_passec(sock, msg, scm);
25
26 if (!scm->fp)
27 return;
28 // 这里是将数据转化的函数
29 scm_detach_fds(msg, scm);
30}
发送句柄到其他进程的处理
#
1// net/core/scm.c
2void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
3{
4 struct cmsghdr __user *cm =
5 (__force struct cmsghdr __user *)msg->msg_control;
6 unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
7 int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
8 int __user *cmsg_data = CMSG_USER_DATA(cm);
9 int err = 0, i;
10
11 /* no use for FD passing from kernel space callers */
12 if (WARN_ON_ONCE(!msg->msg_control_is_user))
13 return;
14
15 if (msg->msg_flags & MSG_CMSG_COMPAT) {
16 scm_detach_fds_compat(msg, scm);
17 return;
18 }
19
20 // 这里从scm中获取文件结构体然后分配句柄号,放到cmsg_data的对应位置
21 for (i = 0; i < fdmax; i++) {
22 err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
23 if (err < 0)
24 break;
25 }
26
27 if (i > 0) {
28 int cmlen = CMSG_LEN(i * sizeof(int));
29
30 err = put_user(SOL_SOCKET, &cm->cmsg_level);
31 if (!err)
32 err = put_user(SCM_RIGHTS, &cm->cmsg_type);
33 if (!err)
34 err = put_user(cmlen, &cm->cmsg_len);
35 if (!err) {
36 cmlen = CMSG_SPACE(i * sizeof(int));
37 if (msg->msg_controllen < cmlen)
38 cmlen = msg->msg_controllen;
39 msg->msg_control += cmlen;
40 msg->msg_controllen -= cmlen;
41 }
42 }
43
44 if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
45 msg->msg_flags |= MSG_CTRUNC;
46
47 /*
48 * All of the files that fit in the message have had their usage counts
49 * incremented, so we just free the list.
50 */
51 __scm_destroy(scm);
52}
53EXPORT_SYMBOL(scm_detach_fds);
1// include/linux/file.h
2static inline int receive_fd_user(struct file *file, int __user *ufd,
3 unsigned int o_flags)
4{
5 if (ufd == NULL)
6 return -EFAULT;
7 return __receive_fd(file, ufd, o_flags);
8}
9
10// fs/file.c
11/**
12 * __receive_fd() - Install received file into file descriptor table
13 * @file: struct file that was received from another process
14 * @ufd: __user pointer to write new fd number to
15 * @o_flags: the O_* flags to apply to the new fd entry
16 *
17 * Installs a received file into the file descriptor table, with appropriate
18 * checks and count updates. Optionally writes the fd number to userspace, if
19 * @ufd is non-NULL.
20 *
21 * This helper handles its own reference counting of the incoming
22 * struct file.
23 *
24 * Returns newly install fd or -ve on error.
25 */
26int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
27{
28 int new_fd;
29 int error;
30
31 error = security_file_receive(file);
32 if (error)
33 return error;
34
35 // 获取一个新的fd
36 new_fd = get_unused_fd_flags(o_flags);
37 if (new_fd < 0)
38 return new_fd;
39
40 if (ufd) {
41 // 将系统的fd转成用户空间的fd赋值
42 error = put_user(new_fd, ufd);
43 if (error) {
44 put_unused_fd(new_fd);
45 return error;
46 }
47 }
48
49 // 将file结构体和新的fd绑定
50 // get_file会增加文件的引用计数
51 fd_install(new_fd, get_file(file));
52 // 如果是socket类型文件,这里处理
53 __receive_sock(file);
54 return new_fd;
55}
- socket文件类型有单独的处理,应该是更新socket相关状态
1// net/core/sock.c
2/*
3 * When a file is received (via SCM_RIGHTS, etc), we must bump the
4 * various sock-based usage counts.
5 */
6void __receive_sock(struct file *file)
7{
8 struct socket *sock;
9
10 sock = sock_from_file(file);
11 if (sock) {
12 sock_update_netprioidx(&sock->sk->sk_cgrp_data);
13 sock_update_classid(&sock->sk->sk_cgrp_data);
14 }
15}