Inet6

一、总述 #

1. 关键结构体关系 #

@startuml xxx

class socket {
    struct sock *sk;
}

class sock {}
class inet_sock implements sock {
    struct sock sk;
}
note left of inet_sock
inet_sock是在sock基础上做了一些拓展
创建时申请的是inet_sock但是使用sock结构体指针赋值给socket
在inet里面的操作做强转使用
end note
class inet_connection_sock implements inet_sock {
    struct inet_sock icsk_inet;
}
note left of inet_connection_sock
inet_connection_sock是拓展了inet_sock
同样复用sock的指针
end note

socket <|-- sock

@enduml

二、socket创建过程 inet6_create #

  • 初始化注册inet6_family_ops
 1// net/ipv6/af_inet6.c
 2
 3static const struct net_proto_family inet6_family_ops = {
 4	.family = PF_INET6,
 5	.create = inet6_create,
 6	.owner	= THIS_MODULE,
 7};
 8
 9static int __init inet6_init(void)
10{
11    ...
12	/* Register the family here so that the init calls below will
13	 * be able to create sockets. (?? is this dangerous ??)
14	 */
15	err = sock_register(&inet6_family_ops);
16	if (err)
17		goto out_sock_register_fail;
18    ...
19}
20module_init(inet6_init);
  • 创建调用pf->create也就是inet6_create
  1// net/ipv6/af_inet6.c
  2static int inet6_create(struct net *net, struct socket *sock, int protocol,
  3			int kern)
  4{
  5	struct inet_sock *inet;
  6	struct ipv6_pinfo *np;
  7	struct sock *sk;
  8	struct inet_protosw *answer;
  9	struct proto *answer_prot;
 10	unsigned char answer_flags;
 11	int try_loading_module = 0;
 12	int err;
 13
 14	if (protocol < 0 || protocol >= IPPROTO_MAX)
 15		return -EINVAL;
 16
 17	/* Look for the requested type/protocol pair. */
 18lookup_protocol:
 19	err = -ESOCKTNOSUPPORT;
 20	rcu_read_lock();
 21    // 从inetsw6中找到对应协议的结构体,赋值给answer变量
 22	list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
 23
 24		err = 0;
 25		/* Check the non-wild match. */
 26		if (protocol == answer->protocol) {
 27			if (protocol != IPPROTO_IP)
 28				break;
 29		} else {
 30			/* Check for the two wild cases. */
 31			if (IPPROTO_IP == protocol) {
 32				protocol = answer->protocol;
 33				break;
 34			}
 35			if (IPPROTO_IP == answer->protocol)
 36				break;
 37		}
 38		err = -EPROTONOSUPPORT;
 39	}
 40
 41	if (err) {
 42		if (try_loading_module < 2) {
 43			rcu_read_unlock();
 44			/*
 45			 * Be more specific, e.g. net-pf-10-proto-132-type-1
 46			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
 47			 */
 48			if (++try_loading_module == 1)
 49				request_module("net-pf-%d-proto-%d-type-%d",
 50						PF_INET6, protocol, sock->type);
 51			/*
 52			 * Fall back to generic, e.g. net-pf-10-proto-132
 53			 * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
 54			 */
 55			else
 56				request_module("net-pf-%d-proto-%d",
 57						PF_INET6, protocol);
 58			goto lookup_protocol;
 59		} else
 60			goto out_rcu_unlock;
 61	}
 62
 63	err = -EPERM;
 64	if (sock->type == SOCK_RAW && !kern &&
 65	    !ns_capable(net->user_ns, CAP_NET_RAW))
 66		goto out_rcu_unlock;
 67
 68    // 将对应协议的操作放到sock里面
 69	sock->ops = answer->ops;
 70	answer_prot = answer->prot;
 71	answer_flags = answer->flags;
 72	rcu_read_unlock();
 73
 74	WARN_ON(!answer_prot->slab);
 75
 76	err = -ENOBUFS;
 77    // 给struct sock *sk申请内存
 78    // 同时把sk->sk_prot = answer_prot也就是对应的inetsw6[proto]->prot
 79	sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
 80	if (!sk)
 81		goto out;
 82
 83	sock_init_data(sock, sk);
 84
 85	err = 0;
 86	if (INET_PROTOSW_REUSE & answer_flags)
 87		sk->sk_reuse = SK_CAN_REUSE;
 88
 89	inet = inet_sk(sk);
 90	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 91
 92	if (SOCK_RAW == sock->type) {
 93		inet->inet_num = protocol;
 94		if (IPPROTO_RAW == protocol)
 95			inet->hdrincl = 1;
 96	}
 97
 98	sk->sk_destruct		= inet_sock_destruct;   // sk设置析构函数
 99	sk->sk_family		= PF_INET6;
100	sk->sk_protocol		= protocol;
101
102	sk->sk_backlog_rcv	= answer->prot->backlog_rcv;
103
104	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
105	np->hop_limit	= -1;
106	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
107	np->mc_loop	= 1;
108	np->mc_all	= 1;
109	np->pmtudisc	= IPV6_PMTUDISC_WANT;
110	np->repflow	= net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
111	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
112
113	/* Init the ipv4 part of the socket since we can have sockets
114	 * using v6 API for ipv4.
115	 */
116	inet->uc_ttl	= -1;
117
118	inet->mc_loop	= 1;
119	inet->mc_ttl	= 1;
120	inet->mc_index	= 0;
121	RCU_INIT_POINTER(inet->mc_list, NULL);
122	inet->rcv_tos	= 0;
123
124	if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
125		inet->pmtudisc = IP_PMTUDISC_DONT;
126	else
127		inet->pmtudisc = IP_PMTUDISC_WANT;
128	/*
129	 * Increment only the relevant sk_prot->socks debug field, this changes
130	 * the previous behaviour of incrementing both the equivalent to
131	 * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
132	 *
133	 * This allows better debug granularity as we'll know exactly how many
134	 * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
135	 * transport protocol socks. -acme
136	 */
137	sk_refcnt_debug_inc(sk);
138
139	if (inet->inet_num) {
140		/* It assumes that any protocol which allows
141		 * the user to assign a number at socket
142		 * creation time automatically shares.
143		 */
144		inet->inet_sport = htons(inet->inet_num);
145		err = sk->sk_prot->hash(sk);
146		if (err) {
147			sk_common_release(sk);
148			goto out;
149		}
150	}
151    // 找对应传输层协议调用init
152	if (sk->sk_prot->init) {
153		err = sk->sk_prot->init(sk);
154		if (err) {
155			sk_common_release(sk);
156			goto out;
157		}
158	}
159
160	if (!kern) {
161		err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
162		if (err) {
163			sk_common_release(sk);
164			goto out;
165		}
166	}
167out:
168	return err;
169out_rcu_unlock:
170	rcu_read_unlock();
171	goto out;
172}
  • inetsw6的定义
 1// net/ipv6/af_inet6.c
 2/* The inetsw6 table contains everything that inet6_create needs to
 3 * build a new socket.
 4 */
 5static struct list_head inetsw6[SOCK_MAX];
 6static DEFINE_SPINLOCK(inetsw6_lock);
 7// 使用下面函数注册inetsw6
 8int inet6_register_protosw(struct inet_protosw *p)
 9{
10	struct list_head *lh;
11	struct inet_protosw *answer;
12	struct list_head *last_perm;
13	int protocol = p->protocol;
14	int ret;
15
16	spin_lock_bh(&inetsw6_lock);
17
18	ret = -EINVAL;
19	if (p->type >= SOCK_MAX)
20		goto out_illegal;
21
22	/* If we are trying to override a permanent protocol, bail. */
23	answer = NULL;
24	ret = -EPERM;
25	last_perm = &inetsw6[p->type];
26	list_for_each(lh, &inetsw6[p->type]) {
27		answer = list_entry(lh, struct inet_protosw, list);
28
29		/* Check only the non-wild match. */
30		if (INET_PROTOSW_PERMANENT & answer->flags) {
31			if (protocol == answer->protocol)
32				break;
33			last_perm = lh;
34		}
35
36		answer = NULL;
37	}
38	if (answer)
39		goto out_permanent;
40
41	/* Add the new entry after the last permanent entry if any, so that
42	 * the new entry does not override a permanent entry when matched with
43	 * a wild-card protocol. But it is allowed to override any existing
44	 * non-permanent entry.  This means that when we remove this entry, the
45	 * system automatically returns to the old behavior.
46	 */
47	list_add_rcu(&p->list, last_perm);
48	ret = 0;
49out:
50	spin_unlock_bh(&inetsw6_lock);
51	return ret;
52
53out_permanent:
54	pr_err("Attempt to override permanent protocol %d\n", protocol);
55	goto out;
56
57out_illegal:
58	pr_err("Ignoring attempt to register invalid socket type %d\n",
59	       p->type);
60	goto out;
61}
62EXPORT_SYMBOL(inet6_register_protosw);
  • 注册地方
 1// net/ipv6/tcp_ipv6.c
 2int __init tcpv6_init(void)
 3{
 4    ...
 5	/* register inet6 protocol */
 6	ret = inet6_register_protosw(&tcpv6_protosw);
 7	if (ret)
 8		goto out_tcpv6_protocol;
 9    ...
10}
11
12// net/ipv6/udp.c
13int __init udpv6_init(void)
14{
15    ...
16	ret = inet6_register_protosw(&udpv6_protosw);
17	if (ret)
18		goto out_udpv6_protocol;
19    ...
20}