一、总述
#
- sendmsg可以像普通的send一样发送消息,也可以进行fd的转发
- 做fd的转发需要使用域套接字
二、代码流程
#
1// net/socket.c
2long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
3 bool forbid_cmsg_compat)
4{
5 int fput_needed, err;
6 struct msghdr msg_sys;
7 struct socket *sock;
8
9 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
10 return -EINVAL;
11
12 // 根据文件句柄号找sock结构体
13 sock = sockfd_lookup_light(fd, &err, &fput_needed);
14 if (!sock)
15 goto out;
16
17 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
18
19 fput_light(sock->file, fput_needed);
20out:
21 return err;
22}
23
24SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
25{
26 return __sys_sendmsg(fd, msg, flags, true);
27}
1// net/socket.c
2/*
3 * BSD sendmsg interface
4 */
5long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
6 unsigned int flags)
7{
8 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
9}
10
11// net/socket.c
12static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
13 unsigned int flags, struct used_address *used_address,
14 unsigned int allowed_msghdr_flags)
15{
16 unsigned char ctl[sizeof(struct cmsghdr) + 20]
17 __aligned(sizeof(__kernel_size_t));
18 /* 20 is size of ipv6_pktinfo */
19 unsigned char *ctl_buf = ctl;
20 int ctl_len;
21 ssize_t err;
22
23 err = -ENOBUFS;
24
25 if (msg_sys->msg_controllen > INT_MAX)
26 goto out;
27 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
28 ctl_len = msg_sys->msg_controllen;
29 // 兼容64和32位,从用户空间拷贝消息到内核空间
30 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
31 // 从64位消息头到32位消息头
32 err =
33 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
34 sizeof(ctl));
35 if (err)
36 goto out;
37 ctl_buf = msg_sys->msg_control;
38 ctl_len = msg_sys->msg_controllen;
39 } else if (ctl_len) {
40 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
41 CMSG_ALIGN(sizeof(struct cmsghdr)));
42 if (ctl_len > sizeof(ctl)) {
43 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
44 if (ctl_buf == NULL)
45 goto out;
46 }
47 err = -EFAULT;
48 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
49 goto out_freectl;
50 msg_sys->msg_control = ctl_buf;
51 msg_sys->msg_control_is_user = false;
52 }
53 msg_sys->msg_flags = flags;
54
55 if (sock->file->f_flags & O_NONBLOCK)
56 msg_sys->msg_flags |= MSG_DONTWAIT;
57 /*
58 * If this is sendmmsg() and current destination address is same as
59 * previously succeeded address, omit asking LSM's decision.
60 * used_address->name_len is initialized to UINT_MAX so that the first
61 * destination address never matches.
62 */
63 if (used_address && msg_sys->msg_name &&
64 used_address->name_len == msg_sys->msg_namelen &&
65 !memcmp(&used_address->name, msg_sys->msg_name,
66 used_address->name_len)) {
67 err = sock_sendmsg_nosec(sock, msg_sys);
68 goto out_freectl;
69 }
70 // 这里开始发送数据
71 err = sock_sendmsg(sock, msg_sys);
72 /*
73 * If this is sendmmsg() and sending to current destination address was
74 * successful, remember it.
75 */
76 if (used_address && err >= 0) {
77 used_address->name_len = msg_sys->msg_namelen;
78 if (msg_sys->msg_name)
79 memcpy(&used_address->name, msg_sys->msg_name,
80 used_address->name_len);
81 }
82
83out_freectl:
84 if (ctl_buf != ctl)
85 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
86out:
87 return err;
88}
1// net/socket.c
2/**
3 * sock_sendmsg - send a message through @sock
4 * @sock: socket
5 * @msg: message to send
6 *
7 * Sends @msg through @sock, passing through LSM.
8 * Returns the number of bytes sent, or an error code.
9 */
10int sock_sendmsg(struct socket *sock, struct msghdr *msg)
11{
12 int err = security_socket_sendmsg(sock, msg,
13 msg_data_left(msg));
14
15 return err ?: sock_sendmsg_nosec(sock, msg);
16}
17EXPORT_SYMBOL(sock_sendmsg);
18
19// net/socket.c
20INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
21 size_t));
22INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
23 size_t));
24static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
25{
26 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
27 inet_sendmsg, sock, msg,
28 msg_data_left(msg));
29 BUG_ON(ret == -EIOCBQUEUED);
30 return ret;
31}
unix套接字发送
#
dgram udp的发送
#
1// net/unix/af_unix.c
2static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
3 size_t len)
4{
5 struct sock *sk = sock->sk;
6 struct net *net = sock_net(sk);
7 struct unix_sock *u = unix_sk(sk);
8 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
9 struct sock *other = NULL;
10 int err;
11 struct sk_buff *skb;
12 long timeo;
13 struct scm_cookie scm;
14 int data_len = 0;
15 int sk_locked;
16
17 wait_for_unix_gc();
18 // 这里是处理scm相关,包括句柄发送到其他进程的函数
19 err = scm_send(sock, msg, &scm, false);
20 if (err < 0)
21 return err;
22
23 err = -EOPNOTSUPP;
24 if (msg->msg_flags&MSG_OOB)
25 goto out;
26
27 // 判断是否提供了目的地址
28 if (msg->msg_namelen) {
29 // 提供了就校验地址
30 err = unix_validate_addr(sunaddr, msg->msg_namelen);
31 if (err)
32 goto out;
33 } else {
34 // 没提供说明已经连接了,根据sock结构体获取对端地址
35 sunaddr = NULL;
36 err = -ENOTCONN;
37 other = unix_peer_get(sk);
38 if (!other)
39 goto out;
40 }
41
42 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
43 err = unix_autobind(sk);
44 if (err)
45 goto out;
46 }
47
48 err = -EMSGSIZE;
49 if (len > sk->sk_sndbuf - 32)
50 goto out;
51
52 if (len > SKB_MAX_ALLOC) {
53 data_len = min_t(size_t,
54 len - SKB_MAX_ALLOC,
55 MAX_SKB_FRAGS * PAGE_SIZE);
56 data_len = PAGE_ALIGN(data_len);
57
58 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
59 }
60
61 // 申请skb结构体
62 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
63 msg->msg_flags & MSG_DONTWAIT, &err,
64 PAGE_ALLOC_COSTLY_ORDER);
65 if (skb == NULL)
66 goto out;
67
68 // 拷贝scm的数据到skb中
69 err = unix_scm_to_skb(&scm, skb, true);
70 if (err < 0)
71 goto out_free;
72
73 skb_put(skb, len - data_len);
74 skb->data_len = data_len;
75 skb->len = len;
76 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
77 if (err)
78 goto out_free;
79 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
80
81restart:
82 // 这里为空说明还没有connect,需要根据地址进行connect
83 if (!other) {
84 err = -ECONNRESET;
85 if (sunaddr == NULL)
86 goto out_free;
87
88 other = unix_find_other(net, sunaddr, msg->msg_namelen,
89 sk->sk_type);
90 if (IS_ERR(other)) {
91 err = PTR_ERR(other);
92 other = NULL;
93 goto out_free;
94 }
95 }
96
97 if (sk_filter(other, skb) < 0) {
98 /* Toss the packet but do not return any error to the sender */
99 err = len;
100 goto out_free;
101 }
102
103 sk_locked = 0;
104 unix_state_lock(other);
105restart_locked:
106 err = -EPERM;
107 if (!unix_may_send(sk, other))
108 goto out_unlock;
109
110 if (unlikely(sock_flag(other, SOCK_DEAD))) {
111 /*
112 * Check with 1003.1g - what should
113 * datagram error
114 */
115 unix_state_unlock(other);
116 sock_put(other);
117
118 if (!sk_locked)
119 unix_state_lock(sk);
120
121 err = 0;
122 if (unix_peer(sk) == other) {
123 unix_peer(sk) = NULL;
124 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
125
126 unix_state_unlock(sk);
127
128 sk->sk_state = TCP_CLOSE;
129 unix_dgram_disconnected(sk, other);
130 sock_put(other);
131 err = -ECONNREFUSED;
132 } else {
133 unix_state_unlock(sk);
134 }
135
136 other = NULL;
137 if (err)
138 goto out_free;
139 goto restart;
140 }
141
142 err = -EPIPE;
143 if (other->sk_shutdown & RCV_SHUTDOWN)
144 goto out_unlock;
145
146 if (sk->sk_type != SOCK_SEQPACKET) {
147 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
148 if (err)
149 goto out_unlock;
150 }
151
152 /* other == sk && unix_peer(other) != sk if
153 * - unix_peer(sk) == NULL, destination address bound to sk
154 * - unix_peer(sk) == sk by time of get but disconnected before lock
155 */
156 if (other != sk &&
157 unlikely(unix_peer(other) != sk &&
158 unix_recvq_full_lockless(other))) {
159 if (timeo) {
160 timeo = unix_wait_for_peer(other, timeo);
161
162 err = sock_intr_errno(timeo);
163 if (signal_pending(current))
164 goto out_free;
165
166 goto restart;
167 }
168
169 if (!sk_locked) {
170 unix_state_unlock(other);
171 unix_state_double_lock(sk, other);
172 }
173
174 if (unix_peer(sk) != other ||
175 unix_dgram_peer_wake_me(sk, other)) {
176 err = -EAGAIN;
177 sk_locked = 1;
178 goto out_unlock;
179 }
180
181 if (!sk_locked) {
182 sk_locked = 1;
183 goto restart_locked;
184 }
185 }
186
187 if (unlikely(sk_locked))
188 unix_state_unlock(sk);
189
190 if (sock_flag(other, SOCK_RCVTSTAMP))
191 __net_timestamp(skb);
192 maybe_add_creds(skb, sock, other);
193 scm_stat_add(other, skb);
194 // 这里就是将skb挂到目的地址的接收队列上,这里就是真正的发送
195 skb_queue_tail(&other->sk_receive_queue, skb);
196 unix_state_unlock(other);
197 other->sk_data_ready(other);
198 sock_put(other);
199 scm_destroy(&scm);
200 return len;
201
202out_unlock:
203 if (sk_locked)
204 unix_state_unlock(sk);
205 unix_state_unlock(other);
206out_free:
207 kfree_skb(skb);
208out:
209 if (other)
210 sock_put(other);
211 scm_destroy(&scm);
212 return err;
213}
发送句柄到其他进程的处理
#
1// include/net/scm.h
2static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
3 struct scm_cookie *scm, bool forcecreds)
4{
5 memset(scm, 0, sizeof(*scm));
6 scm->creds.uid = INVALID_UID;
7 scm->creds.gid = INVALID_GID;
8 if (forcecreds)
9 scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
10 unix_get_peersec_dgram(sock, scm);
11 if (msg->msg_controllen <= 0)
12 return 0;
13 return __scm_send(sock, msg, scm);
14}
15
16// net/core/scm.c
17int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
18{
19 struct cmsghdr *cmsg;
20 int err;
21
22 for_each_cmsghdr(cmsg, msg) {
23 err = -EINVAL;
24
25 /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
26 /* The first check was omitted in <= 2.2.5. The reasoning was
27 that parser checks cmsg_len in any case, so that
28 additional check would be work duplication.
29 But if cmsg_level is not SOL_SOCKET, we do not check
30 for too short ancillary data object at all! Oops.
31 OK, let's add it...
32 */
33 if (!CMSG_OK(msg, cmsg))
34 goto error;
35
36 if (cmsg->cmsg_level != SOL_SOCKET)
37 continue;
38
39 switch (cmsg->cmsg_type)
40 {
41 case SCM_RIGHTS:
42 // 发送句柄必须使用unix套接字
43 if (!sock->ops || sock->ops->family != PF_UNIX)
44 goto error;
45 err=scm_fp_copy(cmsg, &p->fp);
46 if (err<0)
47 goto error;
48 break;
49 case SCM_CREDENTIALS:
50 {
51 struct ucred creds;
52 kuid_t uid;
53 kgid_t gid;
54 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
55 goto error;
56 memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
57 err = scm_check_creds(&creds);
58 if (err)
59 goto error;
60
61 p->creds.pid = creds.pid;
62 if (!p->pid || pid_vnr(p->pid) != creds.pid) {
63 struct pid *pid;
64 err = -ESRCH;
65 pid = find_get_pid(creds.pid);
66 if (!pid)
67 goto error;
68 put_pid(p->pid);
69 p->pid = pid;
70 }
71
72 err = -EINVAL;
73 uid = make_kuid(current_user_ns(), creds.uid);
74 gid = make_kgid(current_user_ns(), creds.gid);
75 if (!uid_valid(uid) || !gid_valid(gid))
76 goto error;
77
78 p->creds.uid = uid;
79 p->creds.gid = gid;
80 break;
81 }
82 default:
83 goto error;
84 }
85 }
86
87 if (p->fp && !p->fp->count)
88 {
89 kfree(p->fp);
90 p->fp = NULL;
91 }
92 return 0;
93
94error:
95 scm_destroy(p);
96 return err;
97}
98EXPORT_SYMBOL(__scm_send);
- 上面可以看到需要设置
cmsg->cmsg_level = SOL_SOCKET
并且cmsg->cmsg_type = SCM_RIGHTS
就会进入到句柄发送里面
- 发送句柄必须使用unix套接字
- 使用下面的函数将文件句柄拷贝转具体文件拷贝出来,并校验句柄是否合法,只发送合法的句柄
- sendmsg里面就会增加文件对应的引用计数
1// net/core/scm.c
2static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
3{
4 int *fdp = (int*)CMSG_DATA(cmsg);
5 struct scm_fp_list *fpl = *fplp;
6 struct file **fpp;
7 int i, num;
8
9 // cmsg除了header之外的按照int组装fd数组
10 num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int);
11
12 if (num <= 0)
13 return 0;
14
15 if (num > SCM_MAX_FD)
16 return -EINVAL;
17
18 if (!fpl)
19 {
20 fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
21 if (!fpl)
22 return -ENOMEM;
23 *fplp = fpl;
24 fpl->count = 0;
25 fpl->max = SCM_MAX_FD;
26 fpl->user = NULL;
27 }
28 fpp = &fpl->fp[fpl->count];
29
30 if (fpl->count + num > fpl->max)
31 return -EINVAL;
32
33 /*
34 * Verify the descriptors and increment the usage count.
35 */
36
37 for (i=0; i< num; i++)
38 {
39 int fd = fdp[i];
40 struct file *file;
41 // 这里转换fd到真正的file结构体,所以发送过去的其实是文件结构体而非fd
42 // fget_raw会增加文件的引用计数
43 if (fd < 0 || !(file = fget_raw(fd)))
44 return -EBADF;
45 *fpp++ = file;
46 fpl->count++;
47 }
48
49 if (!fpl->user)
50 fpl->user = get_uid(current_user());
51
52 return num;
53}