sendmsg

一、总述 #

  • sendmsg可以像普通的send一样发送消息,也可以进行fd的转发
  • 做fd的转发需要使用域套接字

二、代码流程 #

  • 入口自然是系统调用的定义
 1// net/socket.c
 2long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
 3		   bool forbid_cmsg_compat)
 4{
 5	int fput_needed, err;
 6	struct msghdr msg_sys;
 7	struct socket *sock;
 8
 9	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
10		return -EINVAL;
11
12    // 根据文件句柄号找sock结构体
13	sock = sockfd_lookup_light(fd, &err, &fput_needed);
14	if (!sock)
15		goto out;
16
17	err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
18
19	fput_light(sock->file, fput_needed);
20out:
21	return err;
22}
23
24SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
25{
26	return __sys_sendmsg(fd, msg, flags, true);
27}
  • 继续到发送的地方
 1// net/socket.c
 2/*
 3 *	BSD sendmsg interface
 4 */
 5long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
 6			unsigned int flags)
 7{
 8	return ____sys_sendmsg(sock, msg, flags, NULL, 0);
 9}
10
11// net/socket.c
12static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
13			   unsigned int flags, struct used_address *used_address,
14			   unsigned int allowed_msghdr_flags)
15{
16	unsigned char ctl[sizeof(struct cmsghdr) + 20]
17				__aligned(sizeof(__kernel_size_t));
18	/* 20 is size of ipv6_pktinfo */
19	unsigned char *ctl_buf = ctl;
20	int ctl_len;
21	ssize_t err;
22
23	err = -ENOBUFS;
24
25	if (msg_sys->msg_controllen > INT_MAX)
26		goto out;
27	flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
28	ctl_len = msg_sys->msg_controllen;
29    // 兼容64和32位,从用户空间拷贝消息到内核空间
30	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
31        // 从64位消息头到32位消息头
32		err =
33		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
34						     sizeof(ctl));
35		if (err)
36			goto out;
37		ctl_buf = msg_sys->msg_control;
38		ctl_len = msg_sys->msg_controllen;
39	} else if (ctl_len) {
40		BUILD_BUG_ON(sizeof(struct cmsghdr) !=
41			     CMSG_ALIGN(sizeof(struct cmsghdr)));
42		if (ctl_len > sizeof(ctl)) {
43			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
44			if (ctl_buf == NULL)
45				goto out;
46		}
47		err = -EFAULT;
48		if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
49			goto out_freectl;
50		msg_sys->msg_control = ctl_buf;
51		msg_sys->msg_control_is_user = false;
52	}
53	msg_sys->msg_flags = flags;
54
55	if (sock->file->f_flags & O_NONBLOCK)
56		msg_sys->msg_flags |= MSG_DONTWAIT;
57	/*
58	 * If this is sendmmsg() and current destination address is same as
59	 * previously succeeded address, omit asking LSM's decision.
60	 * used_address->name_len is initialized to UINT_MAX so that the first
61	 * destination address never matches.
62	 */
63	if (used_address && msg_sys->msg_name &&
64	    used_address->name_len == msg_sys->msg_namelen &&
65	    !memcmp(&used_address->name, msg_sys->msg_name,
66		    used_address->name_len)) {
67		err = sock_sendmsg_nosec(sock, msg_sys);
68		goto out_freectl;
69	}
70    // 这里开始发送数据
71	err = sock_sendmsg(sock, msg_sys);
72	/*
73	 * If this is sendmmsg() and sending to current destination address was
74	 * successful, remember it.
75	 */
76	if (used_address && err >= 0) {
77		used_address->name_len = msg_sys->msg_namelen;
78		if (msg_sys->msg_name)
79			memcpy(&used_address->name, msg_sys->msg_name,
80			       used_address->name_len);
81	}
82
83out_freectl:
84	if (ctl_buf != ctl)
85		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
86out:
87	return err;
88}
  • 最终调用对应套接字的发送函数上
 1// net/socket.c
 2/**
 3 *	sock_sendmsg - send a message through @sock
 4 *	@sock: socket
 5 *	@msg: message to send
 6 *
 7 *	Sends @msg through @sock, passing through LSM.
 8 *	Returns the number of bytes sent, or an error code.
 9 */
10int sock_sendmsg(struct socket *sock, struct msghdr *msg)
11{
12	int err = security_socket_sendmsg(sock, msg,
13					  msg_data_left(msg));
14
15	return err ?: sock_sendmsg_nosec(sock, msg);
16}
17EXPORT_SYMBOL(sock_sendmsg);
18
19// net/socket.c
20INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
21					   size_t));
22INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
23					    size_t));
24static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
25{
26	int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
27				     inet_sendmsg, sock, msg,
28				     msg_data_left(msg));
29	BUG_ON(ret == -EIOCBQUEUED);
30	return ret;
31}

unix套接字发送 #

dgram udp的发送 #

  1// net/unix/af_unix.c
  2static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
  3			      size_t len)
  4{
  5	struct sock *sk = sock->sk;
  6	struct net *net = sock_net(sk);
  7	struct unix_sock *u = unix_sk(sk);
  8	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
  9	struct sock *other = NULL;
 10	int err;
 11	struct sk_buff *skb;
 12	long timeo;
 13	struct scm_cookie scm;
 14	int data_len = 0;
 15	int sk_locked;
 16
 17	wait_for_unix_gc();
 18    // 这里是处理scm相关,包括句柄发送到其他进程的函数
 19	err = scm_send(sock, msg, &scm, false);
 20	if (err < 0)
 21		return err;
 22
 23	err = -EOPNOTSUPP;
 24	if (msg->msg_flags&MSG_OOB)
 25		goto out;
 26
 27    // 判断是否提供了目的地址
 28	if (msg->msg_namelen) {
 29        // 提供了就校验地址
 30		err = unix_validate_addr(sunaddr, msg->msg_namelen);
 31		if (err)
 32			goto out;
 33	} else {
 34        // 没提供说明已经连接了,根据sock结构体获取对端地址
 35		sunaddr = NULL;
 36		err = -ENOTCONN;
 37		other = unix_peer_get(sk);
 38		if (!other)
 39			goto out;
 40	}
 41
 42	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
 43		err = unix_autobind(sk);
 44		if (err)
 45			goto out;
 46	}
 47
 48	err = -EMSGSIZE;
 49	if (len > sk->sk_sndbuf - 32)
 50		goto out;
 51
 52	if (len > SKB_MAX_ALLOC) {
 53		data_len = min_t(size_t,
 54				 len - SKB_MAX_ALLOC,
 55				 MAX_SKB_FRAGS * PAGE_SIZE);
 56		data_len = PAGE_ALIGN(data_len);
 57
 58		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
 59	}
 60
 61    // 申请skb结构体
 62	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
 63				   msg->msg_flags & MSG_DONTWAIT, &err,
 64				   PAGE_ALLOC_COSTLY_ORDER);
 65	if (skb == NULL)
 66		goto out;
 67
 68    // 拷贝scm的数据到skb中
 69	err = unix_scm_to_skb(&scm, skb, true);
 70	if (err < 0)
 71		goto out_free;
 72
 73	skb_put(skb, len - data_len);
 74	skb->data_len = data_len;
 75	skb->len = len;
 76	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
 77	if (err)
 78		goto out_free;
 79	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 80
 81restart:
 82    // 这里为空说明还没有connect,需要根据地址进行connect
 83	if (!other) {
 84		err = -ECONNRESET;
 85		if (sunaddr == NULL)
 86			goto out_free;
 87
 88		other = unix_find_other(net, sunaddr, msg->msg_namelen,
 89					sk->sk_type);
 90		if (IS_ERR(other)) {
 91			err = PTR_ERR(other);
 92			other = NULL;
 93			goto out_free;
 94		}
 95	}
 96
 97	if (sk_filter(other, skb) < 0) {
 98		/* Toss the packet but do not return any error to the sender */
 99		err = len;
100		goto out_free;
101	}
102
103	sk_locked = 0;
104	unix_state_lock(other);
105restart_locked:
106	err = -EPERM;
107	if (!unix_may_send(sk, other))
108		goto out_unlock;
109
110	if (unlikely(sock_flag(other, SOCK_DEAD))) {
111		/*
112		 *	Check with 1003.1g - what should
113		 *	datagram error
114		 */
115		unix_state_unlock(other);
116		sock_put(other);
117
118		if (!sk_locked)
119			unix_state_lock(sk);
120
121		err = 0;
122		if (unix_peer(sk) == other) {
123			unix_peer(sk) = NULL;
124			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
125
126			unix_state_unlock(sk);
127
128			sk->sk_state = TCP_CLOSE;
129			unix_dgram_disconnected(sk, other);
130			sock_put(other);
131			err = -ECONNREFUSED;
132		} else {
133			unix_state_unlock(sk);
134		}
135
136		other = NULL;
137		if (err)
138			goto out_free;
139		goto restart;
140	}
141
142	err = -EPIPE;
143	if (other->sk_shutdown & RCV_SHUTDOWN)
144		goto out_unlock;
145
146	if (sk->sk_type != SOCK_SEQPACKET) {
147		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
148		if (err)
149			goto out_unlock;
150	}
151
152	/* other == sk && unix_peer(other) != sk if
153	 * - unix_peer(sk) == NULL, destination address bound to sk
154	 * - unix_peer(sk) == sk by time of get but disconnected before lock
155	 */
156	if (other != sk &&
157	    unlikely(unix_peer(other) != sk &&
158	    unix_recvq_full_lockless(other))) {
159		if (timeo) {
160			timeo = unix_wait_for_peer(other, timeo);
161
162			err = sock_intr_errno(timeo);
163			if (signal_pending(current))
164				goto out_free;
165
166			goto restart;
167		}
168
169		if (!sk_locked) {
170			unix_state_unlock(other);
171			unix_state_double_lock(sk, other);
172		}
173
174		if (unix_peer(sk) != other ||
175		    unix_dgram_peer_wake_me(sk, other)) {
176			err = -EAGAIN;
177			sk_locked = 1;
178			goto out_unlock;
179		}
180
181		if (!sk_locked) {
182			sk_locked = 1;
183			goto restart_locked;
184		}
185	}
186
187	if (unlikely(sk_locked))
188		unix_state_unlock(sk);
189
190	if (sock_flag(other, SOCK_RCVTSTAMP))
191		__net_timestamp(skb);
192	maybe_add_creds(skb, sock, other);
193	scm_stat_add(other, skb);
194    // 这里就是将skb挂到目的地址的接收队列上,这里就是真正的发送
195	skb_queue_tail(&other->sk_receive_queue, skb);
196	unix_state_unlock(other);
197	other->sk_data_ready(other);
198	sock_put(other);
199	scm_destroy(&scm);
200	return len;
201
202out_unlock:
203	if (sk_locked)
204		unix_state_unlock(sk);
205	unix_state_unlock(other);
206out_free:
207	kfree_skb(skb);
208out:
209	if (other)
210		sock_put(other);
211	scm_destroy(&scm);
212	return err;
213}

发送句柄到其他进程的处理 #

 1// include/net/scm.h
 2static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 3			       struct scm_cookie *scm, bool forcecreds)
 4{
 5	memset(scm, 0, sizeof(*scm));
 6	scm->creds.uid = INVALID_UID;
 7	scm->creds.gid = INVALID_GID;
 8	if (forcecreds)
 9		scm_set_cred(scm, task_tgid(current), current_uid(), current_gid());
10	unix_get_peersec_dgram(sock, scm);
11	if (msg->msg_controllen <= 0)
12		return 0;
13	return __scm_send(sock, msg, scm);
14}
15
16// net/core/scm.c
17int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
18{
19	struct cmsghdr *cmsg;
20	int err;
21
22	for_each_cmsghdr(cmsg, msg) {
23		err = -EINVAL;
24
25		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
26		/* The first check was omitted in <= 2.2.5. The reasoning was
27		   that parser checks cmsg_len in any case, so that
28		   additional check would be work duplication.
29		   But if cmsg_level is not SOL_SOCKET, we do not check
30		   for too short ancillary data object at all! Oops.
31		   OK, let's add it...
32		 */
33		if (!CMSG_OK(msg, cmsg))
34			goto error;
35
36		if (cmsg->cmsg_level != SOL_SOCKET)
37			continue;
38
39		switch (cmsg->cmsg_type)
40		{
41		case SCM_RIGHTS:
42            // 发送句柄必须使用unix套接字
43			if (!sock->ops || sock->ops->family != PF_UNIX)
44				goto error;
45			err=scm_fp_copy(cmsg, &p->fp);
46			if (err<0)
47				goto error;
48			break;
49		case SCM_CREDENTIALS:
50		{
51			struct ucred creds;
52			kuid_t uid;
53			kgid_t gid;
54			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
55				goto error;
56			memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
57			err = scm_check_creds(&creds);
58			if (err)
59				goto error;
60
61			p->creds.pid = creds.pid;
62			if (!p->pid || pid_vnr(p->pid) != creds.pid) {
63				struct pid *pid;
64				err = -ESRCH;
65				pid = find_get_pid(creds.pid);
66				if (!pid)
67					goto error;
68				put_pid(p->pid);
69				p->pid = pid;
70			}
71
72			err = -EINVAL;
73			uid = make_kuid(current_user_ns(), creds.uid);
74			gid = make_kgid(current_user_ns(), creds.gid);
75			if (!uid_valid(uid) || !gid_valid(gid))
76				goto error;
77
78			p->creds.uid = uid;
79			p->creds.gid = gid;
80			break;
81		}
82		default:
83			goto error;
84		}
85	}
86
87	if (p->fp && !p->fp->count)
88	{
89		kfree(p->fp);
90		p->fp = NULL;
91	}
92	return 0;
93
94error:
95	scm_destroy(p);
96	return err;
97}
98EXPORT_SYMBOL(__scm_send);
  • 上面可以看到需要设置cmsg->cmsg_level = SOL_SOCKET并且cmsg->cmsg_type = SCM_RIGHTS就会进入到句柄发送里面
  • 发送句柄必须使用unix套接字
  • 使用下面的函数将文件句柄拷贝转具体文件拷贝出来,并校验句柄是否合法,只发送合法的句柄
  • sendmsg里面就会增加文件对应的引用计数
 1// net/core/scm.c
 2static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 3{
 4	int *fdp = (int*)CMSG_DATA(cmsg);
 5	struct scm_fp_list *fpl = *fplp;
 6	struct file **fpp;
 7	int i, num;
 8
 9    // cmsg除了header之外的按照int组装fd数组
10	num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int);
11
12	if (num <= 0)
13		return 0;
14
15	if (num > SCM_MAX_FD)
16		return -EINVAL;
17
18	if (!fpl)
19	{
20		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
21		if (!fpl)
22			return -ENOMEM;
23		*fplp = fpl;
24		fpl->count = 0;
25		fpl->max = SCM_MAX_FD;
26		fpl->user = NULL;
27	}
28	fpp = &fpl->fp[fpl->count];
29
30	if (fpl->count + num > fpl->max)
31		return -EINVAL;
32
33	/*
34	 *	Verify the descriptors and increment the usage count.
35	 */
36
37	for (i=0; i< num; i++)
38	{
39		int fd = fdp[i];
40		struct file *file;
41        // 这里转换fd到真正的file结构体,所以发送过去的其实是文件结构体而非fd
42        // fget_raw会增加文件的引用计数
43		if (fd < 0 || !(file = fget_raw(fd)))
44			return -EBADF;
45		*fpp++ = file;
46		fpl->count++;
47	}
48
49	if (!fpl->user)
50		fpl->user = get_uid(current_user());
51
52	return num;
53}