一、igb操作结构体
#
1// drivers/net/ethernet/intel/igb/igb_main.c
2static const struct net_device_ops igb_netdev_ops = {
3 .ndo_open = igb_open,
4 .ndo_stop = igb_close,
5 .ndo_start_xmit = igb_xmit_frame,
6 .ndo_get_stats64 = igb_get_stats64,
7 .ndo_set_rx_mode = igb_set_rx_mode,
8 .ndo_set_mac_address = igb_set_mac,
9 .ndo_change_mtu = igb_change_mtu,
10 .ndo_eth_ioctl = igb_ioctl,
11 .ndo_tx_timeout = igb_tx_timeout,
12 .ndo_validate_addr = eth_validate_addr,
13 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
14 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
15 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
16 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
17 .ndo_set_vf_rate = igb_ndo_set_vf_bw,
18 .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk,
19 .ndo_set_vf_trust = igb_ndo_set_vf_trust,
20 .ndo_get_vf_config = igb_ndo_get_vf_config,
21 .ndo_fix_features = igb_fix_features,
22 .ndo_set_features = igb_set_features,
23 .ndo_fdb_add = igb_ndo_fdb_add,
24 .ndo_features_check = igb_features_check,
25 .ndo_setup_tc = igb_setup_tc,
26 .ndo_bpf = igb_xdp,
27 .ndo_xdp_xmit = igb_xdp_xmit,
28};
二、启动过程
#
1. .ndo_open = igb_open
#
1// drivers/net/ethernet/intel/igb/igb_main.c
2int igb_open(struct net_device *netdev)
3{
4 return __igb_open(netdev, false);
5}
1// drivers/net/ethernet/intel/igb/igb_main.c
2/**
3 * __igb_open - Called when a network interface is made active
4 * @netdev: network interface device structure
5 * @resuming: indicates whether we are in a resume call
6 *
7 * Returns 0 on success, negative value on failure
8 *
9 * The open entry point is called when a network interface is made
10 * active by the system (IFF_UP). At this point all resources needed
11 * for transmit and receive operations are allocated, the interrupt
12 * handler is registered with the OS, the watchdog timer is started,
13 * and the stack is notified that the interface is ready.
14 **/
15static int __igb_open(struct net_device *netdev, bool resuming)
16{
17 ...
18 // 注册中断
19 err = igb_request_irq(adapter);
20 if (err)
21 goto err_req_irq;
22 ...
23}
1// drivers/net/ethernet/intel/igb/igb_main.c
2/**
3 * igb_request_irq - initialize interrupts
4 * @adapter: board private structure to initialize
5 *
6 * Attempts to configure interrupts using the best available
7 * capabilities of the hardware and kernel.
8 **/
9static int igb_request_irq(struct igb_adapter *adapter)
10{
11 ...
12 if (adapter->flags & IGB_FLAG_HAS_MSIX) {
13 err = igb_request_msix(adapter);
14 ...
15 }
16 ...
17}
1// drivers/net/ethernet/intel/igb/igb_main.c
2/**
3 * igb_request_msix - Initialize MSI-X interrupts
4 * @adapter: board private structure to initialize
5 *
6 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
7 * kernel.
8 **/
9static int igb_request_msix(struct igb_adapter *adapter)
10{
11 ...
12 for (i = 0; i < num_q_vectors; i++) {
13 ...
14 err = request_irq(adapter->msix_entries[vector].vector,
15 igb_msix_ring, 0, q_vector->name,
16 q_vector);
17 if (err)
18 goto err_free;
19 }
20 ...
21}
- 在
igb_msix_ring
中就是设置软中断NET_RX_SOFTIRQ
1// drivers/net/ethernet/intel/igb/igb_main.c
2static irqreturn_t igb_msix_ring(int irq, void *data)
3{
4 struct igb_q_vector *q_vector = data;
5
6 /* Write the ITR value calculated from the previous interrupt. */
7 igb_write_itr(q_vector);
8
9 napi_schedule(&q_vector->napi);
10
11 return IRQ_HANDLED;
12}
13
14// include/linux/netdevice.h
15/**
16 * napi_schedule - schedule NAPI poll
17 * @n: NAPI context
18 *
19 * Schedule NAPI poll routine to be called if it is not already
20 * running.
21 */
22static inline void napi_schedule(struct napi_struct *n)
23{
24 if (napi_schedule_prep(n))
25 __napi_schedule(n);
26}
27
28// net/core/dev.c
29/**
30 * __napi_schedule - schedule for receive
31 * @n: entry to schedule
32 *
33 * The entry's receive function will be scheduled to run.
34 * Consider using __napi_schedule_irqoff() if hard irqs are masked.
35 */
36void __napi_schedule(struct napi_struct *n)
37{
38 unsigned long flags;
39
40 local_irq_save(flags);
41 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
42 local_irq_restore(flags);
43}
44EXPORT_SYMBOL(__napi_schedule);
45
46// net/core/dev.c
47/* Called with irq disabled */
48static inline void ____napi_schedule(struct softnet_data *sd,
49 struct napi_struct *napi)
50{
51 struct task_struct *thread;
52
53 lockdep_assert_irqs_disabled();
54
55 if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
56 /* Paired with smp_mb__before_atomic() in
57 * napi_enable()/dev_set_threaded().
58 * Use READ_ONCE() to guarantee a complete
59 * read on napi->thread. Only call
60 * wake_up_process() when it's not NULL.
61 */
62 thread = READ_ONCE(napi->thread);
63 if (thread) {
64 /* Avoid doing set_bit() if the thread is in
65 * INTERRUPTIBLE state, cause napi_thread_wait()
66 * makes sure to proceed with napi polling
67 * if the thread is explicitly woken from here.
68 */
69 if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
70 set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
71 wake_up_process(thread);
72 return;
73 }
74 }
75
76 list_add_tail(&napi->poll_list, &sd->poll_list);
77 // 这里设置软中断NET_RX_SOFTIRQ
78 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
79}
2. 注册poll到napi
#
1/**
2 * igb_alloc_q_vector - Allocate memory for a single interrupt vector
3 * @adapter: board private structure to initialize
4 * @v_count: q_vectors allocated on adapter, used for ring interleaving
5 * @v_idx: index of vector in adapter struct
6 * @txr_count: total number of Tx rings to allocate
7 * @txr_idx: index of first Tx ring to allocate
8 * @rxr_count: total number of Rx rings to allocate
9 * @rxr_idx: index of first Rx ring to allocate
10 *
11 * We allocate one q_vector. If allocation fails we return -ENOMEM.
12 **/
13static int igb_alloc_q_vector(struct igb_adapter *adapter,
14 int v_count, int v_idx,
15 int txr_count, int txr_idx,
16 int rxr_count, int rxr_idx)
17{
18 ...
19 /* initialize NAPI */
20 netif_napi_add(adapter->netdev, &q_vector->napi,
21 igb_poll, 64);
22 ...
23}
三、收包处理
#
- 内核软中断处理收包后会调用注册的poll函数,也就是这里的
igb_poll
1// drivers/net/ethernet/intel/igb/igb_main.c
2/**
3 * igb_poll - NAPI Rx polling callback
4 * @napi: napi polling structure
5 * @budget: count of how many packets we should handle
6 **/
7static int igb_poll(struct napi_struct *napi, int budget)
8{
9 struct igb_q_vector *q_vector = container_of(napi,
10 struct igb_q_vector,
11 napi);
12 bool clean_complete = true;
13 int work_done = 0;
14
15#ifdef CONFIG_IGB_DCA
16 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
17 igb_update_dca(q_vector);
18#endif
19 if (q_vector->tx.ring)
20 clean_complete = igb_clean_tx_irq(q_vector, budget);
21
22 if (q_vector->rx.ring) {
23 int cleaned = igb_clean_rx_irq(q_vector, budget);
24
25 work_done += cleaned;
26 if (cleaned >= budget)
27 clean_complete = false;
28 }
29
30 /* If all work not completed, return budget and keep polling */
31 if (!clean_complete)
32 return budget;
33
34 /* Exit the polling mode, but don't re-enable interrupts if stack might
35 * poll us due to busy-polling
36 */
37 if (likely(napi_complete_done(napi, work_done)))
38 igb_ring_irq_enable(q_vector);
39
40 return work_done;
41}
- 在
igb_clean_rx_irq
函数里面调用到napi_gro_receive
1// drivers/net/ethernet/intel/igb/igb_main.c
2static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
3{
4 struct igb_adapter *adapter = q_vector->adapter;
5 struct igb_ring *rx_ring = q_vector->rx.ring;
6 struct sk_buff *skb = rx_ring->skb;
7 unsigned int total_bytes = 0, total_packets = 0;
8 u16 cleaned_count = igb_desc_unused(rx_ring);
9 unsigned int xdp_xmit = 0;
10 struct xdp_buff xdp;
11 u32 frame_sz = 0;
12 int rx_buf_pgcnt;
13
14 /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
15#if (PAGE_SIZE < 8192)
16 frame_sz = igb_rx_frame_truesize(rx_ring, 0);
17#endif
18 xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
19
20 while (likely(total_packets < budget)) {
21 ...
22 napi_gro_receive(&q_vector->napi, skb);
23
24 /* reset skb pointer */
25 skb = NULL;
26
27 /* update budget accounting */
28 total_packets++;
29 }
30 ...
31}