Igb

一、igb操作结构体 #

 1// drivers/net/ethernet/intel/igb/igb_main.c
 2static const struct net_device_ops igb_netdev_ops = {
 3	.ndo_open		= igb_open,
 4	.ndo_stop		= igb_close,
 5	.ndo_start_xmit		= igb_xmit_frame,
 6	.ndo_get_stats64	= igb_get_stats64,
 7	.ndo_set_rx_mode	= igb_set_rx_mode,
 8	.ndo_set_mac_address	= igb_set_mac,
 9	.ndo_change_mtu		= igb_change_mtu,
10	.ndo_eth_ioctl		= igb_ioctl,
11	.ndo_tx_timeout		= igb_tx_timeout,
12	.ndo_validate_addr	= eth_validate_addr,
13	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
14	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
15	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
16	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
17	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
18	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
19	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
20	.ndo_get_vf_config	= igb_ndo_get_vf_config,
21	.ndo_fix_features	= igb_fix_features,
22	.ndo_set_features	= igb_set_features,
23	.ndo_fdb_add		= igb_ndo_fdb_add,
24	.ndo_features_check	= igb_features_check,
25	.ndo_setup_tc		= igb_setup_tc,
26	.ndo_bpf		= igb_xdp,
27	.ndo_xdp_xmit		= igb_xdp_xmit,
28};

二、启动过程 #

1. .ndo_open = igb_open #

1// drivers/net/ethernet/intel/igb/igb_main.c
2int igb_open(struct net_device *netdev)
3{
4	return __igb_open(netdev, false);
5}
  • open里面最重要的就是这个注册硬中断
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2/**
 3 *  __igb_open - Called when a network interface is made active
 4 *  @netdev: network interface device structure
 5 *  @resuming: indicates whether we are in a resume call
 6 *
 7 *  Returns 0 on success, negative value on failure
 8 *
 9 *  The open entry point is called when a network interface is made
10 *  active by the system (IFF_UP).  At this point all resources needed
11 *  for transmit and receive operations are allocated, the interrupt
12 *  handler is registered with the OS, the watchdog timer is started,
13 *  and the stack is notified that the interface is ready.
14 **/
15static int __igb_open(struct net_device *netdev, bool resuming)
16{
17    ...
18    // 注册中断
19	err = igb_request_irq(adapter);
20	if (err)
21		goto err_req_irq;
22    ...
23}
  • 调用igb_request_msix注册中断
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2/**
 3 *  igb_request_irq - initialize interrupts
 4 *  @adapter: board private structure to initialize
 5 *
 6 *  Attempts to configure interrupts using the best available
 7 *  capabilities of the hardware and kernel.
 8 **/
 9static int igb_request_irq(struct igb_adapter *adapter)
10{
11    ...
12    if (adapter->flags & IGB_FLAG_HAS_MSIX) {
13		err = igb_request_msix(adapter);
14    ...
15    }
16    ...
17}
  • 注册igb_msix_ring处理函数
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2/**
 3 *  igb_request_msix - Initialize MSI-X interrupts
 4 *  @adapter: board private structure to initialize
 5 *
 6 *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
 7 *  kernel.
 8 **/
 9static int igb_request_msix(struct igb_adapter *adapter)
10{
11    ...
12	for (i = 0; i < num_q_vectors; i++) {
13        ...
14		err = request_irq(adapter->msix_entries[vector].vector,
15				  igb_msix_ring, 0, q_vector->name,
16				  q_vector);
17		if (err)
18			goto err_free;
19	}
20    ...
21}
  • igb_msix_ring中就是设置软中断NET_RX_SOFTIRQ
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2static irqreturn_t igb_msix_ring(int irq, void *data)
 3{
 4	struct igb_q_vector *q_vector = data;
 5
 6	/* Write the ITR value calculated from the previous interrupt. */
 7	igb_write_itr(q_vector);
 8
 9	napi_schedule(&q_vector->napi);
10
11	return IRQ_HANDLED;
12}
13
14// include/linux/netdevice.h
15/**
16 *	napi_schedule - schedule NAPI poll
17 *	@n: NAPI context
18 *
19 * Schedule NAPI poll routine to be called if it is not already
20 * running.
21 */
22static inline void napi_schedule(struct napi_struct *n)
23{
24	if (napi_schedule_prep(n))
25		__napi_schedule(n);
26}
27
28// net/core/dev.c
29/**
30 * __napi_schedule - schedule for receive
31 * @n: entry to schedule
32 *
33 * The entry's receive function will be scheduled to run.
34 * Consider using __napi_schedule_irqoff() if hard irqs are masked.
35 */
36void __napi_schedule(struct napi_struct *n)
37{
38	unsigned long flags;
39
40	local_irq_save(flags);
41	____napi_schedule(this_cpu_ptr(&softnet_data), n);
42	local_irq_restore(flags);
43}
44EXPORT_SYMBOL(__napi_schedule);
45
46// net/core/dev.c
47/* Called with irq disabled */
48static inline void ____napi_schedule(struct softnet_data *sd,
49				     struct napi_struct *napi)
50{
51	struct task_struct *thread;
52
53	lockdep_assert_irqs_disabled();
54
55	if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
56		/* Paired with smp_mb__before_atomic() in
57		 * napi_enable()/dev_set_threaded().
58		 * Use READ_ONCE() to guarantee a complete
59		 * read on napi->thread. Only call
60		 * wake_up_process() when it's not NULL.
61		 */
62		thread = READ_ONCE(napi->thread);
63		if (thread) {
64			/* Avoid doing set_bit() if the thread is in
65			 * INTERRUPTIBLE state, cause napi_thread_wait()
66			 * makes sure to proceed with napi polling
67			 * if the thread is explicitly woken from here.
68			 */
69			if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
70				set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
71			wake_up_process(thread);
72			return;
73		}
74	}
75
76	list_add_tail(&napi->poll_list, &sd->poll_list);
77    // 这里设置软中断NET_RX_SOFTIRQ
78	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
79}

2. 注册poll到napi #

 1/**
 2 *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
 3 *  @adapter: board private structure to initialize
 4 *  @v_count: q_vectors allocated on adapter, used for ring interleaving
 5 *  @v_idx: index of vector in adapter struct
 6 *  @txr_count: total number of Tx rings to allocate
 7 *  @txr_idx: index of first Tx ring to allocate
 8 *  @rxr_count: total number of Rx rings to allocate
 9 *  @rxr_idx: index of first Rx ring to allocate
10 *
11 *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
12 **/
13static int igb_alloc_q_vector(struct igb_adapter *adapter,
14			      int v_count, int v_idx,
15			      int txr_count, int txr_idx,
16			      int rxr_count, int rxr_idx)
17{
18	...
19	/* initialize NAPI */
20	netif_napi_add(adapter->netdev, &q_vector->napi,
21		       igb_poll, 64);
22	...
23}

三、收包处理 #

  • 内核软中断处理收包后会调用注册的poll函数,也就是这里的igb_poll
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2/**
 3 *  igb_poll - NAPI Rx polling callback
 4 *  @napi: napi polling structure
 5 *  @budget: count of how many packets we should handle
 6 **/
 7static int igb_poll(struct napi_struct *napi, int budget)
 8{
 9	struct igb_q_vector *q_vector = container_of(napi,
10						     struct igb_q_vector,
11						     napi);
12	bool clean_complete = true;
13	int work_done = 0;
14
15#ifdef CONFIG_IGB_DCA
16	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
17		igb_update_dca(q_vector);
18#endif
19	if (q_vector->tx.ring)
20		clean_complete = igb_clean_tx_irq(q_vector, budget);
21
22	if (q_vector->rx.ring) {
23		int cleaned = igb_clean_rx_irq(q_vector, budget);
24
25		work_done += cleaned;
26		if (cleaned >= budget)
27			clean_complete = false;
28	}
29
30	/* If all work not completed, return budget and keep polling */
31	if (!clean_complete)
32		return budget;
33
34	/* Exit the polling mode, but don't re-enable interrupts if stack might
35	 * poll us due to busy-polling
36	 */
37	if (likely(napi_complete_done(napi, work_done)))
38		igb_ring_irq_enable(q_vector);
39
40	return work_done;
41}
  • igb_clean_rx_irq函数里面调用到napi_gro_receive
 1// drivers/net/ethernet/intel/igb/igb_main.c
 2static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 3{
 4	struct igb_adapter *adapter = q_vector->adapter;
 5	struct igb_ring *rx_ring = q_vector->rx.ring;
 6	struct sk_buff *skb = rx_ring->skb;
 7	unsigned int total_bytes = 0, total_packets = 0;
 8	u16 cleaned_count = igb_desc_unused(rx_ring);
 9	unsigned int xdp_xmit = 0;
10	struct xdp_buff xdp;
11	u32 frame_sz = 0;
12	int rx_buf_pgcnt;
13
14	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
15#if (PAGE_SIZE < 8192)
16	frame_sz = igb_rx_frame_truesize(rx_ring, 0);
17#endif
18	xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
19
20	while (likely(total_packets < budget)) {
21		...
22		napi_gro_receive(&q_vector->napi, skb);
23
24		/* reset skb pointer */
25		skb = NULL;
26
27		/* update budget accounting */
28		total_packets++;
29	}
30	...
31}