sk_buff 内核中数据包结构体

一、结构体定义 #

  1// include/linux/skbuff.h
  2/**
  3 * DOC: Basic sk_buff geometry
  4 *
  5 * struct sk_buff itself is a metadata structure and does not hold any packet
  6 * data. All the data is held in associated buffers.
  7 *
  8 * &sk_buff.head points to the main "head" buffer. The head buffer is divided
  9 * into two parts:
 10 *
 11 *  - data buffer, containing headers and sometimes payload;
 12 *    this is the part of the skb operated on by the common helpers
 13 *    such as skb_put() or skb_pull();
 14 *  - shared info (struct skb_shared_info) which holds an array of pointers
 15 *    to read-only data in the (page, offset, length) format.
 16 *
 17 * Optionally &skb_shared_info.frag_list may point to another skb.
 18 *
 19 * Basic diagram may look like this::
 20 *
 21 *                                  ---------------
 22 *                                 | sk_buff       |
 23 *                                  ---------------
 24 *     ,---------------------------  + head
 25 *    /          ,-----------------  + data
 26 *   /          /      ,-----------  + tail
 27 *  |          |      |            , + end
 28 *  |          |      |           |
 29 *  v          v      v           v
 30 *   -----------------------------------------------
 31 *  | headroom | data |  tailroom | skb_shared_info |
 32 *   -----------------------------------------------
 33 *                                 + [page frag]
 34 *                                 + [page frag]
 35 *                                 + [page frag]
 36 *                                 + [page frag]       ---------
 37 *                                 + frag_list    --> | sk_buff |
 38 *                                                     ---------
 39 *
 40 */
 41
 42/**
 43 *	struct sk_buff - socket buffer
 44 *	@next: Next buffer in list
 45 *	@prev: Previous buffer in list
 46 *	@tstamp: Time we arrived/left
 47 *	@skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
 48 *		for retransmit timer
 49 *	@rbnode: RB tree node, alternative to next/prev for netem/tcp
 50 *	@list: queue head
 51 *	@ll_node: anchor in an llist (eg socket defer_list)
 52 *	@sk: Socket we are owned by
 53 *	@ip_defrag_offset: (aka @sk) alternate use of @sk, used in
 54 *		fragmentation management
 55 *	@dev: Device we arrived on/are leaving by
 56 *	@dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
 57 *	@cb: Control buffer. Free for use by every layer. Put private vars here
 58 *	@_skb_refdst: destination entry (with norefcount bit)
 59 *	@sp: the security path, used for xfrm
 60 *	@len: Length of actual data
 61 *	@data_len: Data length
 62 *	@mac_len: Length of link layer header
 63 *	@hdr_len: writable header length of cloned skb
 64 *	@csum: Checksum (must include start/offset pair)
 65 *	@csum_start: Offset from skb->head where checksumming should start
 66 *	@csum_offset: Offset from csum_start where checksum should be stored
 67 *	@priority: Packet queueing priority
 68 *	@ignore_df: allow local fragmentation
 69 *	@cloned: Head may be cloned (check refcnt to be sure)
 70 *	@ip_summed: Driver fed us an IP checksum
 71 *	@nohdr: Payload reference only, must not modify header
 72 *	@pkt_type: Packet class
 73 *	@fclone: skbuff clone status
 74 *	@ipvs_property: skbuff is owned by ipvs
 75 *	@inner_protocol_type: whether the inner protocol is
 76 *		ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
 77 *	@remcsum_offload: remote checksum offload is enabled
 78 *	@offload_fwd_mark: Packet was L2-forwarded in hardware
 79 *	@offload_l3_fwd_mark: Packet was L3-forwarded in hardware
 80 *	@tc_skip_classify: do not classify packet. set by IFB device
 81 *	@tc_at_ingress: used within tc_classify to distinguish in/egress
 82 *	@redirected: packet was redirected by packet classifier
 83 *	@from_ingress: packet was redirected from the ingress path
 84 *	@nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
 85 *	@peeked: this packet has been seen already, so stats have been
 86 *		done for it, don't do them again
 87 *	@nf_trace: netfilter packet trace flag
 88 *	@protocol: Packet protocol from driver
 89 *	@destructor: Destruct function
 90 *	@tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
 91 *	@_sk_redir: socket redirection information for skmsg
 92 *	@_nfct: Associated connection, if any (with nfctinfo bits)
 93 *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
 94 *	@skb_iif: ifindex of device we arrived on
 95 *	@tc_index: Traffic control index
 96 *	@hash: the packet hash
 97 *	@queue_mapping: Queue mapping for multiqueue devices
 98 *	@head_frag: skb was allocated from page fragments,
 99 *		not allocated by kmalloc() or vmalloc().
100 *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
101 *	@pp_recycle: mark the packet for recycling instead of freeing (implies
102 *		page_pool support on driver)
103 *	@active_extensions: active extensions (skb_ext_id types)
104 *	@ndisc_nodetype: router type (from link layer)
105 *	@ooo_okay: allow the mapping of a socket to a queue to be changed
106 *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
107 *		ports.
108 *	@sw_hash: indicates hash was computed in software stack
109 *	@wifi_acked_valid: wifi_acked was set
110 *	@wifi_acked: whether frame was acked on wifi or not
111 *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
112 *	@encapsulation: indicates the inner headers in the skbuff are valid
113 *	@encap_hdr_csum: software checksum is needed
114 *	@csum_valid: checksum is already valid
115 *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
116 *	@csum_complete_sw: checksum was completed by software
117 *	@csum_level: indicates the number of consecutive checksums found in
118 *		the packet minus one that have been verified as
119 *		CHECKSUM_UNNECESSARY (max 3)
120 *	@dst_pending_confirm: need to confirm neighbour
121 *	@decrypted: Decrypted SKB
122 *	@slow_gro: state present at GRO time, slower prepare step required
123 *	@mono_delivery_time: When set, skb->tstamp has the
124 *		delivery_time in mono clock base (i.e. EDT).  Otherwise, the
125 *		skb->tstamp has the (rcv) timestamp at ingress and
126 *		delivery_time at egress.
127 *	@napi_id: id of the NAPI struct this skb came from
128 *	@sender_cpu: (aka @napi_id) source CPU in XPS
129 *	@alloc_cpu: CPU which did the skb allocation.
130 *	@secmark: security marking
131 *	@mark: Generic packet mark
132 *	@reserved_tailroom: (aka @mark) number of bytes of free space available
133 *		at the tail of an sk_buff
134 *	@vlan_present: VLAN tag is present
135 *	@vlan_proto: vlan encapsulation protocol
136 *	@vlan_tci: vlan tag control information
137 *	@inner_protocol: Protocol (encapsulation)
138 *	@inner_ipproto: (aka @inner_protocol) stores ipproto when
139 *		skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
140 *	@inner_transport_header: Inner transport layer header (encapsulation)
141 *	@inner_network_header: Network layer header (encapsulation)
142 *	@inner_mac_header: Link layer header (encapsulation)
143 *	@transport_header: Transport layer header
144 *	@network_header: Network layer header
145 *	@mac_header: Link layer header
146 *	@kcov_handle: KCOV remote handle for remote coverage collection
147 *	@tail: Tail pointer
148 *	@end: End pointer
149 *	@head: Head of buffer
150 *	@data: Data head pointer
151 *	@truesize: Buffer size
152 *	@users: User count - see {datagram,tcp}.c
153 *	@extensions: allocated extensions, valid if active_extensions is nonzero
154 */
155
156struct sk_buff {
157	union {
158		struct {
159			/* These two members must be first to match sk_buff_head. */
160			struct sk_buff		*next;
161			struct sk_buff		*prev;
162
163			union {
164				struct net_device	*dev;
165				/* Some protocols might use this space to store information,
166				 * while device pointer would be NULL.
167				 * UDP receive path is one user.
168				 */
169				unsigned long		dev_scratch;
170			};
171		};
172		struct rb_node		rbnode; /* used in netem, ip4 defrag, and tcp stack */
173		struct list_head	list;
174		struct llist_node	ll_node;
175	};
176
177	union {
178		struct sock		*sk;
179		int			ip_defrag_offset;
180	};
181
182	union {
183		ktime_t		tstamp;
184		u64		skb_mstamp_ns; /* earliest departure time */
185	};
186	/*
187	 * This is the control buffer. It is free to use for every
188	 * layer. Please put your private variables there. If you
189	 * want to keep them across layers you have to do a skb_clone()
190	 * first. This is owned by whoever has the skb queued ATM.
191	 */
192	char			cb[48] __aligned(8);
193
194	union {
195		struct {
196			unsigned long	_skb_refdst;
197			void		(*destructor)(struct sk_buff *skb);
198		};
199		struct list_head	tcp_tsorted_anchor;
200#ifdef CONFIG_NET_SOCK_MSG
201		unsigned long		_sk_redir;
202#endif
203	};
204
205#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
206	unsigned long		 _nfct;
207#endif
208	unsigned int		len,
209				data_len;
210	__u16			mac_len,
211				hdr_len;
212
213	/* Following fields are _not_ copied in __copy_skb_header()
214	 * Note that queue_mapping is here mostly to fill a hole.
215	 */
216	__u16			queue_mapping;
217
218/* if you move cloned around you also must adapt those constants */
219#ifdef __BIG_ENDIAN_BITFIELD
220#define CLONED_MASK	(1 << 7)
221#else
222#define CLONED_MASK	1
223#endif
224#define CLONED_OFFSET		offsetof(struct sk_buff, __cloned_offset)
225
226	/* private: */
227	__u8			__cloned_offset[0];
228	/* public: */
229	__u8			cloned:1,
230				nohdr:1,
231				fclone:2,
232				peeked:1,
233				head_frag:1,
234				pfmemalloc:1,
235				pp_recycle:1; /* page_pool recycle indicator */
236#ifdef CONFIG_SKB_EXTENSIONS
237	__u8			active_extensions;
238#endif
239
240	/* Fields enclosed in headers group are copied
241	 * using a single memcpy() in __copy_skb_header()
242	 */
243	struct_group(headers,
244
245	/* private: */
246	__u8			__pkt_type_offset[0];
247	/* public: */
248	__u8			pkt_type:3; /* see PKT_TYPE_MAX */
249	__u8			ignore_df:1;
250	__u8			nf_trace:1;
251	__u8			ip_summed:2;
252	__u8			ooo_okay:1;
253
254	__u8			l4_hash:1;
255	__u8			sw_hash:1;
256	__u8			wifi_acked_valid:1;
257	__u8			wifi_acked:1;
258	__u8			no_fcs:1;
259	/* Indicates the inner headers are valid in the skbuff. */
260	__u8			encapsulation:1;
261	__u8			encap_hdr_csum:1;
262	__u8			csum_valid:1;
263
264	/* private: */
265	__u8			__pkt_vlan_present_offset[0];
266	/* public: */
267	__u8			vlan_present:1;	/* See PKT_VLAN_PRESENT_BIT */
268	__u8			csum_complete_sw:1;
269	__u8			csum_level:2;
270	__u8			dst_pending_confirm:1;
271	__u8			mono_delivery_time:1;	/* See SKB_MONO_DELIVERY_TIME_MASK */
272#ifdef CONFIG_NET_CLS_ACT
273	__u8			tc_skip_classify:1;
274	__u8			tc_at_ingress:1;	/* See TC_AT_INGRESS_MASK */
275#endif
276#ifdef CONFIG_IPV6_NDISC_NODETYPE
277	__u8			ndisc_nodetype:2;
278#endif
279
280	__u8			ipvs_property:1;
281	__u8			inner_protocol_type:1;
282	__u8			remcsum_offload:1;
283#ifdef CONFIG_NET_SWITCHDEV
284	__u8			offload_fwd_mark:1;
285	__u8			offload_l3_fwd_mark:1;
286#endif
287	__u8			redirected:1;
288#ifdef CONFIG_NET_REDIRECT
289	__u8			from_ingress:1;
290#endif
291#ifdef CONFIG_NETFILTER_SKIP_EGRESS
292	__u8			nf_skip_egress:1;
293#endif
294#ifdef CONFIG_TLS_DEVICE
295	__u8			decrypted:1;
296#endif
297	__u8			slow_gro:1;
298	__u8			csum_not_inet:1;
299
300#ifdef CONFIG_NET_SCHED
301	__u16			tc_index;	/* traffic control index */
302#endif
303
304	union {
305		__wsum		csum;
306		struct {
307			__u16	csum_start;
308			__u16	csum_offset;
309		};
310	};
311	__u32			priority;
312	int			skb_iif;
313	__u32			hash;
314	__be16			vlan_proto;
315	__u16			vlan_tci;
316#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
317	union {
318		unsigned int	napi_id;
319		unsigned int	sender_cpu;
320	};
321#endif
322	u16			alloc_cpu;
323#ifdef CONFIG_NETWORK_SECMARK
324	__u32		secmark;
325#endif
326
327	union {
328		__u32		mark;
329		__u32		reserved_tailroom;
330	};
331
332	union {
333		__be16		inner_protocol;
334		__u8		inner_ipproto;
335	};
336
337	__u16			inner_transport_header;
338	__u16			inner_network_header;
339	__u16			inner_mac_header;
340
341	__be16			protocol;
342	__u16			transport_header;
343	__u16			network_header;
344	__u16			mac_header;
345
346#ifdef CONFIG_KCOV
347	u64			kcov_handle;
348#endif
349
350	); /* end headers group */
351
352	/* These elements must be at the end, see alloc_skb() for details.  */
353	sk_buff_data_t		tail;
354	sk_buff_data_t		end;
355	unsigned char		*head,
356				*data;
357	unsigned int		truesize;
358	refcount_t		users;
359
360#ifdef CONFIG_SKB_EXTENSIONS
361	/* only useable after checking ->active_extensions != 0 */
362	struct skb_ext		*extensions;
363#endif
364};

1. 几个数据的含义和计算 #

字段 类型 含义
len unsigned int 实际包长度
data unsigned char * 实际包起始地址
head unsigned char * 空间的起始地址,预分配空间的首地址
transport_header __u16 传输层头部到head地址偏移量
network_header __u16 网络层头部到head地址的偏移量
mac_header __u16 数据链路层头部到head地址的偏移量
protocol __be16 sk_buff的协议,Internet Protocol packet就是htons(ETH_P_IP),宏定义在include/uapi/linux/if_ether.h

二、方法定义 #

1. 传输层 #

1static inline void skb_reset_transport_header(struct sk_buff *skb)
2{
3	skb->transport_header = skb->data - skb->head;
4}
  • 只能在传输层调用,会将data认为是传输层头
1static inline void skb_set_transport_header(struct sk_buff *skb,
2					    const int offset)
3{
4	skb_reset_transport_header(skb);
5	skb->transport_header += offset;
6}
  • 若是知道传输层到包的首地址head之间的offset,可以调用这个,如ip包就是传入ip头长度

2. 网络层 #

3. 数据包扩容 #

  • skb_push是在数据包的前面新增一段空间,实现上就是data地址前移,不超过head就没有问题
  • 一般每包装一层协议,就会进行一次push,如应用层到tcp层,push一个tcp头部长度,tcp层到ip层,push一个ip头长度
 1// net/core/skbuff.c
 2/**
 3 *	skb_push - add data to the start of a buffer
 4 *	@skb: buffer to use
 5 *	@len: amount of data to add
 6 *
 7 *	This function extends the used data area of the buffer at the buffer
 8 *	start. If this would exceed the total buffer headroom the kernel will
 9 *	panic. A pointer to the first byte of the extra data is returned.
10 */
11void *skb_push(struct sk_buff *skb, unsigned int len)
12{
13	skb->data -= len;
14	skb->len  += len;
15	if (unlikely(skb->data < skb->head))
16		skb_under_panic(skb, len, __builtin_return_address(0));
17	return skb->data;
18}
19EXPORT_SYMBOL(skb_push);