1// include/linux/skbuff.h
2/**
3 * DOC: Basic sk_buff geometry
4 *
5 * struct sk_buff itself is a metadata structure and does not hold any packet
6 * data. All the data is held in associated buffers.
7 *
8 * &sk_buff.head points to the main "head" buffer. The head buffer is divided
9 * into two parts:
10 *
11 * - data buffer, containing headers and sometimes payload;
12 * this is the part of the skb operated on by the common helpers
13 * such as skb_put() or skb_pull();
14 * - shared info (struct skb_shared_info) which holds an array of pointers
15 * to read-only data in the (page, offset, length) format.
16 *
17 * Optionally &skb_shared_info.frag_list may point to another skb.
18 *
19 * Basic diagram may look like this::
20 *
21 * ---------------
22 * | sk_buff |
23 * ---------------
24 * ,--------------------------- + head
25 * / ,----------------- + data
26 * / / ,----------- + tail
27 * | | | , + end
28 * | | | |
29 * v v v v
30 * -----------------------------------------------
31 * | headroom | data | tailroom | skb_shared_info |
32 * -----------------------------------------------
33 * + [page frag]
34 * + [page frag]
35 * + [page frag]
36 * + [page frag] ---------
37 * + frag_list --> | sk_buff |
38 * ---------
39 *
40 */
41
42/**
43 * struct sk_buff - socket buffer
44 * @next: Next buffer in list
45 * @prev: Previous buffer in list
46 * @tstamp: Time we arrived/left
47 * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
48 * for retransmit timer
49 * @rbnode: RB tree node, alternative to next/prev for netem/tcp
50 * @list: queue head
51 * @ll_node: anchor in an llist (eg socket defer_list)
52 * @sk: Socket we are owned by
53 * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
54 * fragmentation management
55 * @dev: Device we arrived on/are leaving by
56 * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
57 * @cb: Control buffer. Free for use by every layer. Put private vars here
58 * @_skb_refdst: destination entry (with norefcount bit)
59 * @sp: the security path, used for xfrm
60 * @len: Length of actual data
61 * @data_len: Data length
62 * @mac_len: Length of link layer header
63 * @hdr_len: writable header length of cloned skb
64 * @csum: Checksum (must include start/offset pair)
65 * @csum_start: Offset from skb->head where checksumming should start
66 * @csum_offset: Offset from csum_start where checksum should be stored
67 * @priority: Packet queueing priority
68 * @ignore_df: allow local fragmentation
69 * @cloned: Head may be cloned (check refcnt to be sure)
70 * @ip_summed: Driver fed us an IP checksum
71 * @nohdr: Payload reference only, must not modify header
72 * @pkt_type: Packet class
73 * @fclone: skbuff clone status
74 * @ipvs_property: skbuff is owned by ipvs
75 * @inner_protocol_type: whether the inner protocol is
76 * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
77 * @remcsum_offload: remote checksum offload is enabled
78 * @offload_fwd_mark: Packet was L2-forwarded in hardware
79 * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
80 * @tc_skip_classify: do not classify packet. set by IFB device
81 * @tc_at_ingress: used within tc_classify to distinguish in/egress
82 * @redirected: packet was redirected by packet classifier
83 * @from_ingress: packet was redirected from the ingress path
84 * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
85 * @peeked: this packet has been seen already, so stats have been
86 * done for it, don't do them again
87 * @nf_trace: netfilter packet trace flag
88 * @protocol: Packet protocol from driver
89 * @destructor: Destruct function
90 * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
91 * @_sk_redir: socket redirection information for skmsg
92 * @_nfct: Associated connection, if any (with nfctinfo bits)
93 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
94 * @skb_iif: ifindex of device we arrived on
95 * @tc_index: Traffic control index
96 * @hash: the packet hash
97 * @queue_mapping: Queue mapping for multiqueue devices
98 * @head_frag: skb was allocated from page fragments,
99 * not allocated by kmalloc() or vmalloc().
100 * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
101 * @pp_recycle: mark the packet for recycling instead of freeing (implies
102 * page_pool support on driver)
103 * @active_extensions: active extensions (skb_ext_id types)
104 * @ndisc_nodetype: router type (from link layer)
105 * @ooo_okay: allow the mapping of a socket to a queue to be changed
106 * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
107 * ports.
108 * @sw_hash: indicates hash was computed in software stack
109 * @wifi_acked_valid: wifi_acked was set
110 * @wifi_acked: whether frame was acked on wifi or not
111 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
112 * @encapsulation: indicates the inner headers in the skbuff are valid
113 * @encap_hdr_csum: software checksum is needed
114 * @csum_valid: checksum is already valid
115 * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
116 * @csum_complete_sw: checksum was completed by software
117 * @csum_level: indicates the number of consecutive checksums found in
118 * the packet minus one that have been verified as
119 * CHECKSUM_UNNECESSARY (max 3)
120 * @dst_pending_confirm: need to confirm neighbour
121 * @decrypted: Decrypted SKB
122 * @slow_gro: state present at GRO time, slower prepare step required
123 * @mono_delivery_time: When set, skb->tstamp has the
124 * delivery_time in mono clock base (i.e. EDT). Otherwise, the
125 * skb->tstamp has the (rcv) timestamp at ingress and
126 * delivery_time at egress.
127 * @napi_id: id of the NAPI struct this skb came from
128 * @sender_cpu: (aka @napi_id) source CPU in XPS
129 * @alloc_cpu: CPU which did the skb allocation.
130 * @secmark: security marking
131 * @mark: Generic packet mark
132 * @reserved_tailroom: (aka @mark) number of bytes of free space available
133 * at the tail of an sk_buff
134 * @vlan_present: VLAN tag is present
135 * @vlan_proto: vlan encapsulation protocol
136 * @vlan_tci: vlan tag control information
137 * @inner_protocol: Protocol (encapsulation)
138 * @inner_ipproto: (aka @inner_protocol) stores ipproto when
139 * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
140 * @inner_transport_header: Inner transport layer header (encapsulation)
141 * @inner_network_header: Network layer header (encapsulation)
142 * @inner_mac_header: Link layer header (encapsulation)
143 * @transport_header: Transport layer header
144 * @network_header: Network layer header
145 * @mac_header: Link layer header
146 * @kcov_handle: KCOV remote handle for remote coverage collection
147 * @tail: Tail pointer
148 * @end: End pointer
149 * @head: Head of buffer
150 * @data: Data head pointer
151 * @truesize: Buffer size
152 * @users: User count - see {datagram,tcp}.c
153 * @extensions: allocated extensions, valid if active_extensions is nonzero
154 */
155
156struct sk_buff {
157 union {
158 struct {
159 /* These two members must be first to match sk_buff_head. */
160 struct sk_buff *next;
161 struct sk_buff *prev;
162
163 union {
164 struct net_device *dev;
165 /* Some protocols might use this space to store information,
166 * while device pointer would be NULL.
167 * UDP receive path is one user.
168 */
169 unsigned long dev_scratch;
170 };
171 };
172 struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
173 struct list_head list;
174 struct llist_node ll_node;
175 };
176
177 union {
178 struct sock *sk;
179 int ip_defrag_offset;
180 };
181
182 union {
183 ktime_t tstamp;
184 u64 skb_mstamp_ns; /* earliest departure time */
185 };
186 /*
187 * This is the control buffer. It is free to use for every
188 * layer. Please put your private variables there. If you
189 * want to keep them across layers you have to do a skb_clone()
190 * first. This is owned by whoever has the skb queued ATM.
191 */
192 char cb[48] __aligned(8);
193
194 union {
195 struct {
196 unsigned long _skb_refdst;
197 void (*destructor)(struct sk_buff *skb);
198 };
199 struct list_head tcp_tsorted_anchor;
200#ifdef CONFIG_NET_SOCK_MSG
201 unsigned long _sk_redir;
202#endif
203 };
204
205#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
206 unsigned long _nfct;
207#endif
208 unsigned int len,
209 data_len;
210 __u16 mac_len,
211 hdr_len;
212
213 /* Following fields are _not_ copied in __copy_skb_header()
214 * Note that queue_mapping is here mostly to fill a hole.
215 */
216 __u16 queue_mapping;
217
218/* if you move cloned around you also must adapt those constants */
219#ifdef __BIG_ENDIAN_BITFIELD
220#define CLONED_MASK (1 << 7)
221#else
222#define CLONED_MASK 1
223#endif
224#define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset)
225
226 /* private: */
227 __u8 __cloned_offset[0];
228 /* public: */
229 __u8 cloned:1,
230 nohdr:1,
231 fclone:2,
232 peeked:1,
233 head_frag:1,
234 pfmemalloc:1,
235 pp_recycle:1; /* page_pool recycle indicator */
236#ifdef CONFIG_SKB_EXTENSIONS
237 __u8 active_extensions;
238#endif
239
240 /* Fields enclosed in headers group are copied
241 * using a single memcpy() in __copy_skb_header()
242 */
243 struct_group(headers,
244
245 /* private: */
246 __u8 __pkt_type_offset[0];
247 /* public: */
248 __u8 pkt_type:3; /* see PKT_TYPE_MAX */
249 __u8 ignore_df:1;
250 __u8 nf_trace:1;
251 __u8 ip_summed:2;
252 __u8 ooo_okay:1;
253
254 __u8 l4_hash:1;
255 __u8 sw_hash:1;
256 __u8 wifi_acked_valid:1;
257 __u8 wifi_acked:1;
258 __u8 no_fcs:1;
259 /* Indicates the inner headers are valid in the skbuff. */
260 __u8 encapsulation:1;
261 __u8 encap_hdr_csum:1;
262 __u8 csum_valid:1;
263
264 /* private: */
265 __u8 __pkt_vlan_present_offset[0];
266 /* public: */
267 __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */
268 __u8 csum_complete_sw:1;
269 __u8 csum_level:2;
270 __u8 dst_pending_confirm:1;
271 __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
272#ifdef CONFIG_NET_CLS_ACT
273 __u8 tc_skip_classify:1;
274 __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
275#endif
276#ifdef CONFIG_IPV6_NDISC_NODETYPE
277 __u8 ndisc_nodetype:2;
278#endif
279
280 __u8 ipvs_property:1;
281 __u8 inner_protocol_type:1;
282 __u8 remcsum_offload:1;
283#ifdef CONFIG_NET_SWITCHDEV
284 __u8 offload_fwd_mark:1;
285 __u8 offload_l3_fwd_mark:1;
286#endif
287 __u8 redirected:1;
288#ifdef CONFIG_NET_REDIRECT
289 __u8 from_ingress:1;
290#endif
291#ifdef CONFIG_NETFILTER_SKIP_EGRESS
292 __u8 nf_skip_egress:1;
293#endif
294#ifdef CONFIG_TLS_DEVICE
295 __u8 decrypted:1;
296#endif
297 __u8 slow_gro:1;
298 __u8 csum_not_inet:1;
299
300#ifdef CONFIG_NET_SCHED
301 __u16 tc_index; /* traffic control index */
302#endif
303
304 union {
305 __wsum csum;
306 struct {
307 __u16 csum_start;
308 __u16 csum_offset;
309 };
310 };
311 __u32 priority;
312 int skb_iif;
313 __u32 hash;
314 __be16 vlan_proto;
315 __u16 vlan_tci;
316#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
317 union {
318 unsigned int napi_id;
319 unsigned int sender_cpu;
320 };
321#endif
322 u16 alloc_cpu;
323#ifdef CONFIG_NETWORK_SECMARK
324 __u32 secmark;
325#endif
326
327 union {
328 __u32 mark;
329 __u32 reserved_tailroom;
330 };
331
332 union {
333 __be16 inner_protocol;
334 __u8 inner_ipproto;
335 };
336
337 __u16 inner_transport_header;
338 __u16 inner_network_header;
339 __u16 inner_mac_header;
340
341 __be16 protocol;
342 __u16 transport_header;
343 __u16 network_header;
344 __u16 mac_header;
345
346#ifdef CONFIG_KCOV
347 u64 kcov_handle;
348#endif
349
350 ); /* end headers group */
351
352 /* These elements must be at the end, see alloc_skb() for details. */
353 sk_buff_data_t tail;
354 sk_buff_data_t end;
355 unsigned char *head,
356 *data;
357 unsigned int truesize;
358 refcount_t users;
359
360#ifdef CONFIG_SKB_EXTENSIONS
361 /* only useable after checking ->active_extensions != 0 */
362 struct skb_ext *extensions;
363#endif
364};
1// net/core/skbuff.c
2/**
3 * skb_push - add data to the start of a buffer
4 * @skb: buffer to use
5 * @len: amount of data to add
6 *
7 * This function extends the used data area of the buffer at the buffer
8 * start. If this would exceed the total buffer headroom the kernel will
9 * panic. A pointer to the first byte of the extra data is returned.
10 */
11void *skb_push(struct sk_buff *skb, unsigned int len)
12{
13 skb->data -= len;
14 skb->len += len;
15 if (unlikely(skb->data < skb->head))
16 skb_under_panic(skb, len, __builtin_return_address(0));
17 return skb->data;
18}
19EXPORT_SYMBOL(skb_push);