一、总述
#
1. 关键结构体关系
#
@startuml xxx
class socket {
struct sock *sk;
}
class sock {}
class inet_sock implements sock {
struct sock sk;
}
note left of inet_sock
inet_sock是在sock基础上做了一些拓展
创建时申请的是inet_sock但是使用sock结构体指针赋值给socket
在inet里面的操作做强转使用
end note
class inet_connection_sock implements inet_sock {
struct inet_sock icsk_inet;
}
note left of inet_connection_sock
inet_connection_sock是拓展了inet_sock
同样复用sock的指针
end note
socket <|-- sock
@enduml
二、socket创建过程 inet6_create
#
1// net/ipv6/af_inet6.c
2
3static const struct net_proto_family inet6_family_ops = {
4 .family = PF_INET6,
5 .create = inet6_create,
6 .owner = THIS_MODULE,
7};
8
9static int __init inet6_init(void)
10{
11 ...
12 /* Register the family here so that the init calls below will
13 * be able to create sockets. (?? is this dangerous ??)
14 */
15 err = sock_register(&inet6_family_ops);
16 if (err)
17 goto out_sock_register_fail;
18 ...
19}
20module_init(inet6_init);
- 创建调用
pf->create
也就是inet6_create
1// net/ipv6/af_inet6.c
2static int inet6_create(struct net *net, struct socket *sock, int protocol,
3 int kern)
4{
5 struct inet_sock *inet;
6 struct ipv6_pinfo *np;
7 struct sock *sk;
8 struct inet_protosw *answer;
9 struct proto *answer_prot;
10 unsigned char answer_flags;
11 int try_loading_module = 0;
12 int err;
13
14 if (protocol < 0 || protocol >= IPPROTO_MAX)
15 return -EINVAL;
16
17 /* Look for the requested type/protocol pair. */
18lookup_protocol:
19 err = -ESOCKTNOSUPPORT;
20 rcu_read_lock();
21 // 从inetsw6中找到对应协议的结构体,赋值给answer变量
22 list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
23
24 err = 0;
25 /* Check the non-wild match. */
26 if (protocol == answer->protocol) {
27 if (protocol != IPPROTO_IP)
28 break;
29 } else {
30 /* Check for the two wild cases. */
31 if (IPPROTO_IP == protocol) {
32 protocol = answer->protocol;
33 break;
34 }
35 if (IPPROTO_IP == answer->protocol)
36 break;
37 }
38 err = -EPROTONOSUPPORT;
39 }
40
41 if (err) {
42 if (try_loading_module < 2) {
43 rcu_read_unlock();
44 /*
45 * Be more specific, e.g. net-pf-10-proto-132-type-1
46 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM)
47 */
48 if (++try_loading_module == 1)
49 request_module("net-pf-%d-proto-%d-type-%d",
50 PF_INET6, protocol, sock->type);
51 /*
52 * Fall back to generic, e.g. net-pf-10-proto-132
53 * (net-pf-PF_INET6-proto-IPPROTO_SCTP)
54 */
55 else
56 request_module("net-pf-%d-proto-%d",
57 PF_INET6, protocol);
58 goto lookup_protocol;
59 } else
60 goto out_rcu_unlock;
61 }
62
63 err = -EPERM;
64 if (sock->type == SOCK_RAW && !kern &&
65 !ns_capable(net->user_ns, CAP_NET_RAW))
66 goto out_rcu_unlock;
67
68 // 将对应协议的操作放到sock里面
69 sock->ops = answer->ops;
70 answer_prot = answer->prot;
71 answer_flags = answer->flags;
72 rcu_read_unlock();
73
74 WARN_ON(!answer_prot->slab);
75
76 err = -ENOBUFS;
77 // 给struct sock *sk申请内存
78 // 同时把sk->sk_prot = answer_prot也就是对应的inetsw6[proto]->prot
79 sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
80 if (!sk)
81 goto out;
82
83 sock_init_data(sock, sk);
84
85 err = 0;
86 if (INET_PROTOSW_REUSE & answer_flags)
87 sk->sk_reuse = SK_CAN_REUSE;
88
89 inet = inet_sk(sk);
90 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
91
92 if (SOCK_RAW == sock->type) {
93 inet->inet_num = protocol;
94 if (IPPROTO_RAW == protocol)
95 inet->hdrincl = 1;
96 }
97
98 sk->sk_destruct = inet_sock_destruct; // sk设置析构函数
99 sk->sk_family = PF_INET6;
100 sk->sk_protocol = protocol;
101
102 sk->sk_backlog_rcv = answer->prot->backlog_rcv;
103
104 inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
105 np->hop_limit = -1;
106 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
107 np->mc_loop = 1;
108 np->mc_all = 1;
109 np->pmtudisc = IPV6_PMTUDISC_WANT;
110 np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
111 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
112
113 /* Init the ipv4 part of the socket since we can have sockets
114 * using v6 API for ipv4.
115 */
116 inet->uc_ttl = -1;
117
118 inet->mc_loop = 1;
119 inet->mc_ttl = 1;
120 inet->mc_index = 0;
121 RCU_INIT_POINTER(inet->mc_list, NULL);
122 inet->rcv_tos = 0;
123
124 if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
125 inet->pmtudisc = IP_PMTUDISC_DONT;
126 else
127 inet->pmtudisc = IP_PMTUDISC_WANT;
128 /*
129 * Increment only the relevant sk_prot->socks debug field, this changes
130 * the previous behaviour of incrementing both the equivalent to
131 * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
132 *
133 * This allows better debug granularity as we'll know exactly how many
134 * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
135 * transport protocol socks. -acme
136 */
137 sk_refcnt_debug_inc(sk);
138
139 if (inet->inet_num) {
140 /* It assumes that any protocol which allows
141 * the user to assign a number at socket
142 * creation time automatically shares.
143 */
144 inet->inet_sport = htons(inet->inet_num);
145 err = sk->sk_prot->hash(sk);
146 if (err) {
147 sk_common_release(sk);
148 goto out;
149 }
150 }
151 // 找对应传输层协议调用init
152 if (sk->sk_prot->init) {
153 err = sk->sk_prot->init(sk);
154 if (err) {
155 sk_common_release(sk);
156 goto out;
157 }
158 }
159
160 if (!kern) {
161 err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
162 if (err) {
163 sk_common_release(sk);
164 goto out;
165 }
166 }
167out:
168 return err;
169out_rcu_unlock:
170 rcu_read_unlock();
171 goto out;
172}
1// net/ipv6/af_inet6.c
2/* The inetsw6 table contains everything that inet6_create needs to
3 * build a new socket.
4 */
5static struct list_head inetsw6[SOCK_MAX];
6static DEFINE_SPINLOCK(inetsw6_lock);
7// 使用下面函数注册inetsw6
8int inet6_register_protosw(struct inet_protosw *p)
9{
10 struct list_head *lh;
11 struct inet_protosw *answer;
12 struct list_head *last_perm;
13 int protocol = p->protocol;
14 int ret;
15
16 spin_lock_bh(&inetsw6_lock);
17
18 ret = -EINVAL;
19 if (p->type >= SOCK_MAX)
20 goto out_illegal;
21
22 /* If we are trying to override a permanent protocol, bail. */
23 answer = NULL;
24 ret = -EPERM;
25 last_perm = &inetsw6[p->type];
26 list_for_each(lh, &inetsw6[p->type]) {
27 answer = list_entry(lh, struct inet_protosw, list);
28
29 /* Check only the non-wild match. */
30 if (INET_PROTOSW_PERMANENT & answer->flags) {
31 if (protocol == answer->protocol)
32 break;
33 last_perm = lh;
34 }
35
36 answer = NULL;
37 }
38 if (answer)
39 goto out_permanent;
40
41 /* Add the new entry after the last permanent entry if any, so that
42 * the new entry does not override a permanent entry when matched with
43 * a wild-card protocol. But it is allowed to override any existing
44 * non-permanent entry. This means that when we remove this entry, the
45 * system automatically returns to the old behavior.
46 */
47 list_add_rcu(&p->list, last_perm);
48 ret = 0;
49out:
50 spin_unlock_bh(&inetsw6_lock);
51 return ret;
52
53out_permanent:
54 pr_err("Attempt to override permanent protocol %d\n", protocol);
55 goto out;
56
57out_illegal:
58 pr_err("Ignoring attempt to register invalid socket type %d\n",
59 p->type);
60 goto out;
61}
62EXPORT_SYMBOL(inet6_register_protosw);
1// net/ipv6/tcp_ipv6.c
2int __init tcpv6_init(void)
3{
4 ...
5 /* register inet6 protocol */
6 ret = inet6_register_protosw(&tcpv6_protosw);
7 if (ret)
8 goto out_tcpv6_protocol;
9 ...
10}
11
12// net/ipv6/udp.c
13int __init udpv6_init(void)
14{
15 ...
16 ret = inet6_register_protosw(&udpv6_protosw);
17 if (ret)
18 goto out_udpv6_protocol;
19 ...
20}