1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2018 Solarflare Communications Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11 #include "net_driver.h"
12 #include <linux/module.h>
13 #include "efx_channels.h"
14 #include "efx.h"
15 #include "efx_common.h"
16 #include "tx_common.h"
17 #include "rx_common.h"
18 #include "nic.h"
19 #include "sriov.h"
20 #include "workarounds.h"
21
22 /* This is the first interrupt mode to try out of:
23 * 0 => MSI-X
24 * 1 => MSI
25 * 2 => legacy
26 */
27 unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
28
29 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
30 * i.e. the number of CPUs among which we may distribute simultaneous
31 * interrupt handling.
32 *
33 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
34 * The default (0) means to assign an interrupt to each core.
35 */
36 unsigned int rss_cpus;
37
38 static unsigned int irq_adapt_low_thresh = 8000;
39 module_param(irq_adapt_low_thresh, uint, 0644);
40 MODULE_PARM_DESC(irq_adapt_low_thresh,
41 "Threshold score for reducing IRQ moderation");
42
43 static unsigned int irq_adapt_high_thresh = 16000;
44 module_param(irq_adapt_high_thresh, uint, 0644);
45 MODULE_PARM_DESC(irq_adapt_high_thresh,
46 "Threshold score for increasing IRQ moderation");
47
48 /* This is the weight assigned to each of the (per-channel) virtual
49 * NAPI devices.
50 */
51 static int napi_weight = 64;
52
53 /***************
54 * Housekeeping
55 ***************/
56
efx_channel_dummy_op_int(struct efx_channel * channel)57 int efx_channel_dummy_op_int(struct efx_channel *channel)
58 {
59 return 0;
60 }
61
efx_channel_dummy_op_void(struct efx_channel * channel)62 void efx_channel_dummy_op_void(struct efx_channel *channel)
63 {
64 }
65
66 static const struct efx_channel_type efx_default_channel_type = {
67 .pre_probe = efx_channel_dummy_op_int,
68 .post_remove = efx_channel_dummy_op_void,
69 .get_name = efx_get_channel_name,
70 .copy = efx_copy_channel,
71 .want_txqs = efx_default_channel_want_txqs,
72 .keep_eventq = false,
73 .want_pio = true,
74 };
75
76 /*************
77 * INTERRUPTS
78 *************/
79
efx_wanted_parallelism(struct efx_nic * efx)80 static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
81 {
82 cpumask_var_t thread_mask;
83 unsigned int count;
84 int cpu;
85
86 if (rss_cpus) {
87 count = rss_cpus;
88 } else {
89 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
90 netif_warn(efx, probe, efx->net_dev,
91 "RSS disabled due to allocation failure\n");
92 return 1;
93 }
94
95 count = 0;
96 for_each_online_cpu(cpu) {
97 if (!cpumask_test_cpu(cpu, thread_mask)) {
98 ++count;
99 cpumask_or(thread_mask, thread_mask,
100 topology_sibling_cpumask(cpu));
101 }
102 }
103
104 free_cpumask_var(thread_mask);
105 }
106
107 if (count > EFX_MAX_RX_QUEUES) {
108 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
109 "Reducing number of rx queues from %u to %u.\n",
110 count, EFX_MAX_RX_QUEUES);
111 count = EFX_MAX_RX_QUEUES;
112 }
113
114 /* If RSS is requested for the PF *and* VFs then we can't write RSS
115 * table entries that are inaccessible to VFs
116 */
117 #ifdef CONFIG_SFC_SRIOV
118 if (efx->type->sriov_wanted) {
119 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
120 count > efx_vf_size(efx)) {
121 netif_warn(efx, probe, efx->net_dev,
122 "Reducing number of RSS channels from %u to %u for "
123 "VF support. Increase vf-msix-limit to use more "
124 "channels on the PF.\n",
125 count, efx_vf_size(efx));
126 count = efx_vf_size(efx);
127 }
128 }
129 #endif
130
131 return count;
132 }
133
efx_allocate_msix_channels(struct efx_nic * efx,unsigned int max_channels,unsigned int extra_channels,unsigned int parallelism)134 static int efx_allocate_msix_channels(struct efx_nic *efx,
135 unsigned int max_channels,
136 unsigned int extra_channels,
137 unsigned int parallelism)
138 {
139 unsigned int n_channels = parallelism;
140 int vec_count;
141 int tx_per_ev;
142 int n_xdp_tx;
143 int n_xdp_ev;
144
145 if (efx_separate_tx_channels)
146 n_channels *= 2;
147 n_channels += extra_channels;
148
149 /* To allow XDP transmit to happen from arbitrary NAPI contexts
150 * we allocate a TX queue per CPU. We share event queues across
151 * multiple tx queues, assuming tx and ev queues are both
152 * maximum size.
153 */
154 tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
155 tx_per_ev = min(tx_per_ev, EFX_MAX_TXQ_PER_CHANNEL);
156 n_xdp_tx = num_possible_cpus();
157 n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
158
159 vec_count = pci_msix_vec_count(efx->pci_dev);
160 if (vec_count < 0)
161 return vec_count;
162
163 max_channels = min_t(unsigned int, vec_count, max_channels);
164
165 /* Check resources.
166 * We need a channel per event queue, plus a VI per tx queue.
167 * This may be more pessimistic than it needs to be.
168 */
169 if (n_channels >= max_channels) {
170 efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
171 netif_warn(efx, drv, efx->net_dev,
172 "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
173 n_xdp_ev, n_channels, max_channels);
174 netif_warn(efx, drv, efx->net_dev,
175 "XDP_TX and XDP_REDIRECT might decrease device's performance\n");
176 } else if (n_channels + n_xdp_tx > efx->max_vis) {
177 efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
178 netif_warn(efx, drv, efx->net_dev,
179 "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
180 n_xdp_tx, n_channels, efx->max_vis);
181 netif_warn(efx, drv, efx->net_dev,
182 "XDP_TX and XDP_REDIRECT might decrease device's performance\n");
183 } else if (n_channels + n_xdp_ev > max_channels) {
184 efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_SHARED;
185 netif_warn(efx, drv, efx->net_dev,
186 "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
187 n_xdp_ev, n_channels, max_channels);
188
189 n_xdp_ev = max_channels - n_channels;
190 netif_warn(efx, drv, efx->net_dev,
191 "XDP_TX and XDP_REDIRECT will work with reduced performance (%d cpus/tx_queue)\n",
192 DIV_ROUND_UP(n_xdp_tx, tx_per_ev * n_xdp_ev));
193 } else {
194 efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DEDICATED;
195 }
196
197 if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_BORROWED) {
198 efx->n_xdp_channels = n_xdp_ev;
199 efx->xdp_tx_per_channel = tx_per_ev;
200 efx->xdp_tx_queue_count = n_xdp_tx;
201 n_channels += n_xdp_ev;
202 netif_dbg(efx, drv, efx->net_dev,
203 "Allocating %d TX and %d event queues for XDP\n",
204 n_xdp_ev * tx_per_ev, n_xdp_ev);
205 } else {
206 efx->n_xdp_channels = 0;
207 efx->xdp_tx_per_channel = 0;
208 efx->xdp_tx_queue_count = n_xdp_tx;
209 }
210
211 if (vec_count < n_channels) {
212 netif_err(efx, drv, efx->net_dev,
213 "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
214 vec_count, n_channels);
215 netif_err(efx, drv, efx->net_dev,
216 "WARNING: Performance may be reduced.\n");
217 n_channels = vec_count;
218 }
219
220 n_channels = min(n_channels, max_channels);
221
222 efx->n_channels = n_channels;
223
224 /* Ignore XDP tx channels when creating rx channels. */
225 n_channels -= efx->n_xdp_channels;
226
227 if (efx_separate_tx_channels) {
228 efx->n_tx_channels =
229 min(max(n_channels / 2, 1U),
230 efx->max_tx_channels);
231 efx->tx_channel_offset =
232 n_channels - efx->n_tx_channels;
233 efx->n_rx_channels =
234 max(n_channels -
235 efx->n_tx_channels, 1U);
236 } else {
237 efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
238 efx->tx_channel_offset = 0;
239 efx->n_rx_channels = n_channels;
240 }
241
242 efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
243 efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
244
245 efx->xdp_channel_offset = n_channels;
246
247 netif_dbg(efx, drv, efx->net_dev,
248 "Allocating %u RX channels\n",
249 efx->n_rx_channels);
250
251 return efx->n_channels;
252 }
253
254 /* Probe the number and type of interrupts we are able to obtain, and
255 * the resulting numbers of channels and RX queues.
256 */
efx_probe_interrupts(struct efx_nic * efx)257 int efx_probe_interrupts(struct efx_nic *efx)
258 {
259 unsigned int extra_channels = 0;
260 unsigned int rss_spread;
261 unsigned int i, j;
262 int rc;
263
264 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
265 if (efx->extra_channel_type[i])
266 ++extra_channels;
267
268 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
269 unsigned int parallelism = efx_wanted_parallelism(efx);
270 struct msix_entry xentries[EFX_MAX_CHANNELS];
271 unsigned int n_channels;
272
273 rc = efx_allocate_msix_channels(efx, efx->max_channels,
274 extra_channels, parallelism);
275 if (rc >= 0) {
276 n_channels = rc;
277 for (i = 0; i < n_channels; i++)
278 xentries[i].entry = i;
279 rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
280 n_channels);
281 }
282 if (rc < 0) {
283 /* Fall back to single channel MSI */
284 netif_err(efx, drv, efx->net_dev,
285 "could not enable MSI-X\n");
286 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
287 efx->interrupt_mode = EFX_INT_MODE_MSI;
288 else
289 return rc;
290 } else if (rc < n_channels) {
291 netif_err(efx, drv, efx->net_dev,
292 "WARNING: Insufficient MSI-X vectors"
293 " available (%d < %u).\n", rc, n_channels);
294 netif_err(efx, drv, efx->net_dev,
295 "WARNING: Performance may be reduced.\n");
296 n_channels = rc;
297 }
298
299 if (rc > 0) {
300 for (i = 0; i < efx->n_channels; i++)
301 efx_get_channel(efx, i)->irq =
302 xentries[i].vector;
303 }
304 }
305
306 /* Try single interrupt MSI */
307 if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
308 efx->n_channels = 1;
309 efx->n_rx_channels = 1;
310 efx->n_tx_channels = 1;
311 efx->n_xdp_channels = 0;
312 efx->xdp_channel_offset = efx->n_channels;
313 rc = pci_enable_msi(efx->pci_dev);
314 if (rc == 0) {
315 efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
316 } else {
317 netif_err(efx, drv, efx->net_dev,
318 "could not enable MSI\n");
319 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
320 efx->interrupt_mode = EFX_INT_MODE_LEGACY;
321 else
322 return rc;
323 }
324 }
325
326 /* Assume legacy interrupts */
327 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
328 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
329 efx->n_rx_channels = 1;
330 efx->n_tx_channels = 1;
331 efx->n_xdp_channels = 0;
332 efx->xdp_channel_offset = efx->n_channels;
333 efx->legacy_irq = efx->pci_dev->irq;
334 }
335
336 /* Assign extra channels if possible, before XDP channels */
337 efx->n_extra_tx_channels = 0;
338 j = efx->xdp_channel_offset;
339 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
340 if (!efx->extra_channel_type[i])
341 continue;
342 if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
343 efx->extra_channel_type[i]->handle_no_channel(efx);
344 } else {
345 --j;
346 efx_get_channel(efx, j)->type =
347 efx->extra_channel_type[i];
348 if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
349 efx->n_extra_tx_channels++;
350 }
351 }
352
353 rss_spread = efx->n_rx_channels;
354 /* RSS might be usable on VFs even if it is disabled on the PF */
355 #ifdef CONFIG_SFC_SRIOV
356 if (efx->type->sriov_wanted) {
357 efx->rss_spread = ((rss_spread > 1 ||
358 !efx->type->sriov_wanted(efx)) ?
359 rss_spread : efx_vf_size(efx));
360 return 0;
361 }
362 #endif
363 efx->rss_spread = rss_spread;
364
365 return 0;
366 }
367
368 #if defined(CONFIG_SMP)
efx_set_interrupt_affinity(struct efx_nic * efx)369 void efx_set_interrupt_affinity(struct efx_nic *efx)
370 {
371 struct efx_channel *channel;
372 unsigned int cpu;
373
374 efx_for_each_channel(channel, efx) {
375 cpu = cpumask_local_spread(channel->channel,
376 pcibus_to_node(efx->pci_dev->bus));
377 irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
378 }
379 }
380
efx_clear_interrupt_affinity(struct efx_nic * efx)381 void efx_clear_interrupt_affinity(struct efx_nic *efx)
382 {
383 struct efx_channel *channel;
384
385 efx_for_each_channel(channel, efx)
386 irq_set_affinity_hint(channel->irq, NULL);
387 }
388 #else
389 void
efx_set_interrupt_affinity(struct efx_nic * efx)390 efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
391 {
392 }
393
394 void
efx_clear_interrupt_affinity(struct efx_nic * efx)395 efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
396 {
397 }
398 #endif /* CONFIG_SMP */
399
efx_remove_interrupts(struct efx_nic * efx)400 void efx_remove_interrupts(struct efx_nic *efx)
401 {
402 struct efx_channel *channel;
403
404 /* Remove MSI/MSI-X interrupts */
405 efx_for_each_channel(channel, efx)
406 channel->irq = 0;
407 pci_disable_msi(efx->pci_dev);
408 pci_disable_msix(efx->pci_dev);
409
410 /* Remove legacy interrupt */
411 efx->legacy_irq = 0;
412 }
413
414 /***************
415 * EVENT QUEUES
416 ***************/
417
418 /* Create event queue
419 * Event queue memory allocations are done only once. If the channel
420 * is reset, the memory buffer will be reused; this guards against
421 * errors during channel reset and also simplifies interrupt handling.
422 */
efx_probe_eventq(struct efx_channel * channel)423 int efx_probe_eventq(struct efx_channel *channel)
424 {
425 struct efx_nic *efx = channel->efx;
426 unsigned long entries;
427
428 netif_dbg(efx, probe, efx->net_dev,
429 "chan %d create event queue\n", channel->channel);
430
431 /* Build an event queue with room for one event per tx and rx buffer,
432 * plus some extra for link state events and MCDI completions.
433 */
434 entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
435 EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
436 channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
437
438 return efx_nic_probe_eventq(channel);
439 }
440
441 /* Prepare channel's event queue */
efx_init_eventq(struct efx_channel * channel)442 int efx_init_eventq(struct efx_channel *channel)
443 {
444 struct efx_nic *efx = channel->efx;
445 int rc;
446
447 EFX_WARN_ON_PARANOID(channel->eventq_init);
448
449 netif_dbg(efx, drv, efx->net_dev,
450 "chan %d init event queue\n", channel->channel);
451
452 rc = efx_nic_init_eventq(channel);
453 if (rc == 0) {
454 efx->type->push_irq_moderation(channel);
455 channel->eventq_read_ptr = 0;
456 channel->eventq_init = true;
457 }
458 return rc;
459 }
460
461 /* Enable event queue processing and NAPI */
efx_start_eventq(struct efx_channel * channel)462 void efx_start_eventq(struct efx_channel *channel)
463 {
464 netif_dbg(channel->efx, ifup, channel->efx->net_dev,
465 "chan %d start event queue\n", channel->channel);
466
467 /* Make sure the NAPI handler sees the enabled flag set */
468 channel->enabled = true;
469 smp_wmb();
470
471 napi_enable(&channel->napi_str);
472 efx_nic_eventq_read_ack(channel);
473 }
474
475 /* Disable event queue processing and NAPI */
efx_stop_eventq(struct efx_channel * channel)476 void efx_stop_eventq(struct efx_channel *channel)
477 {
478 if (!channel->enabled)
479 return;
480
481 napi_disable(&channel->napi_str);
482 channel->enabled = false;
483 }
484
efx_fini_eventq(struct efx_channel * channel)485 void efx_fini_eventq(struct efx_channel *channel)
486 {
487 if (!channel->eventq_init)
488 return;
489
490 netif_dbg(channel->efx, drv, channel->efx->net_dev,
491 "chan %d fini event queue\n", channel->channel);
492
493 efx_nic_fini_eventq(channel);
494 channel->eventq_init = false;
495 }
496
efx_remove_eventq(struct efx_channel * channel)497 void efx_remove_eventq(struct efx_channel *channel)
498 {
499 netif_dbg(channel->efx, drv, channel->efx->net_dev,
500 "chan %d remove event queue\n", channel->channel);
501
502 efx_nic_remove_eventq(channel);
503 }
504
505 /**************************************************************************
506 *
507 * Channel handling
508 *
509 *************************************************************************/
510
511 #ifdef CONFIG_RFS_ACCEL
efx_filter_rfs_expire(struct work_struct * data)512 static void efx_filter_rfs_expire(struct work_struct *data)
513 {
514 struct delayed_work *dwork = to_delayed_work(data);
515 struct efx_channel *channel;
516 unsigned int time, quota;
517
518 channel = container_of(dwork, struct efx_channel, filter_work);
519 time = jiffies - channel->rfs_last_expiry;
520 quota = channel->rfs_filter_count * time / (30 * HZ);
521 if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
522 channel->rfs_last_expiry += time;
523 /* Ensure we do more work eventually even if NAPI poll is not happening */
524 schedule_delayed_work(dwork, 30 * HZ);
525 }
526 #endif
527
528 /* Allocate and initialise a channel structure. */
efx_alloc_channel(struct efx_nic * efx,int i)529 static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
530 {
531 struct efx_rx_queue *rx_queue;
532 struct efx_tx_queue *tx_queue;
533 struct efx_channel *channel;
534 int j;
535
536 channel = kzalloc(sizeof(*channel), GFP_KERNEL);
537 if (!channel)
538 return NULL;
539
540 channel->efx = efx;
541 channel->channel = i;
542 channel->type = &efx_default_channel_type;
543
544 for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
545 tx_queue = &channel->tx_queue[j];
546 tx_queue->efx = efx;
547 tx_queue->queue = -1;
548 tx_queue->label = j;
549 tx_queue->channel = channel;
550 }
551
552 #ifdef CONFIG_RFS_ACCEL
553 INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
554 #endif
555
556 rx_queue = &channel->rx_queue;
557 rx_queue->efx = efx;
558 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
559
560 return channel;
561 }
562
efx_init_channels(struct efx_nic * efx)563 int efx_init_channels(struct efx_nic *efx)
564 {
565 unsigned int i;
566
567 for (i = 0; i < EFX_MAX_CHANNELS; i++) {
568 efx->channel[i] = efx_alloc_channel(efx, i);
569 if (!efx->channel[i])
570 return -ENOMEM;
571 efx->msi_context[i].efx = efx;
572 efx->msi_context[i].index = i;
573 }
574
575 /* Higher numbered interrupt modes are less capable! */
576 efx->interrupt_mode = min(efx->type->min_interrupt_mode,
577 efx_interrupt_mode);
578
579 efx->max_channels = EFX_MAX_CHANNELS;
580 efx->max_tx_channels = EFX_MAX_CHANNELS;
581
582 return 0;
583 }
584
efx_fini_channels(struct efx_nic * efx)585 void efx_fini_channels(struct efx_nic *efx)
586 {
587 unsigned int i;
588
589 for (i = 0; i < EFX_MAX_CHANNELS; i++)
590 if (efx->channel[i]) {
591 kfree(efx->channel[i]);
592 efx->channel[i] = NULL;
593 }
594 }
595
596 /* Allocate and initialise a channel structure, copying parameters
597 * (but not resources) from an old channel structure.
598 */
efx_copy_channel(const struct efx_channel * old_channel)599 struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
600 {
601 struct efx_rx_queue *rx_queue;
602 struct efx_tx_queue *tx_queue;
603 struct efx_channel *channel;
604 int j;
605
606 channel = kmalloc(sizeof(*channel), GFP_KERNEL);
607 if (!channel)
608 return NULL;
609
610 *channel = *old_channel;
611
612 channel->napi_dev = NULL;
613 INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
614 channel->napi_str.napi_id = 0;
615 channel->napi_str.state = 0;
616 memset(&channel->eventq, 0, sizeof(channel->eventq));
617
618 for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
619 tx_queue = &channel->tx_queue[j];
620 if (tx_queue->channel)
621 tx_queue->channel = channel;
622 tx_queue->buffer = NULL;
623 tx_queue->cb_page = NULL;
624 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
625 }
626
627 rx_queue = &channel->rx_queue;
628 rx_queue->buffer = NULL;
629 memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
630 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
631 #ifdef CONFIG_RFS_ACCEL
632 INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
633 #endif
634
635 return channel;
636 }
637
efx_probe_channel(struct efx_channel * channel)638 static int efx_probe_channel(struct efx_channel *channel)
639 {
640 struct efx_tx_queue *tx_queue;
641 struct efx_rx_queue *rx_queue;
642 int rc;
643
644 netif_dbg(channel->efx, probe, channel->efx->net_dev,
645 "creating channel %d\n", channel->channel);
646
647 rc = channel->type->pre_probe(channel);
648 if (rc)
649 goto fail;
650
651 rc = efx_probe_eventq(channel);
652 if (rc)
653 goto fail;
654
655 efx_for_each_channel_tx_queue(tx_queue, channel) {
656 rc = efx_probe_tx_queue(tx_queue);
657 if (rc)
658 goto fail;
659 }
660
661 efx_for_each_channel_rx_queue(rx_queue, channel) {
662 rc = efx_probe_rx_queue(rx_queue);
663 if (rc)
664 goto fail;
665 }
666
667 channel->rx_list = NULL;
668
669 return 0;
670
671 fail:
672 efx_remove_channel(channel);
673 return rc;
674 }
675
efx_get_channel_name(struct efx_channel * channel,char * buf,size_t len)676 void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
677 {
678 struct efx_nic *efx = channel->efx;
679 const char *type;
680 int number;
681
682 number = channel->channel;
683
684 if (number >= efx->xdp_channel_offset &&
685 !WARN_ON_ONCE(!efx->n_xdp_channels)) {
686 type = "-xdp";
687 number -= efx->xdp_channel_offset;
688 } else if (efx->tx_channel_offset == 0) {
689 type = "";
690 } else if (number < efx->tx_channel_offset) {
691 type = "-rx";
692 } else {
693 type = "-tx";
694 number -= efx->tx_channel_offset;
695 }
696 snprintf(buf, len, "%s%s-%d", efx->name, type, number);
697 }
698
efx_set_channel_names(struct efx_nic * efx)699 void efx_set_channel_names(struct efx_nic *efx)
700 {
701 struct efx_channel *channel;
702
703 efx_for_each_channel(channel, efx)
704 channel->type->get_name(channel,
705 efx->msi_context[channel->channel].name,
706 sizeof(efx->msi_context[0].name));
707 }
708
efx_probe_channels(struct efx_nic * efx)709 int efx_probe_channels(struct efx_nic *efx)
710 {
711 struct efx_channel *channel;
712 int rc;
713
714 /* Restart special buffer allocation */
715 efx->next_buffer_table = 0;
716
717 /* Probe channels in reverse, so that any 'extra' channels
718 * use the start of the buffer table. This allows the traffic
719 * channels to be resized without moving them or wasting the
720 * entries before them.
721 */
722 efx_for_each_channel_rev(channel, efx) {
723 rc = efx_probe_channel(channel);
724 if (rc) {
725 netif_err(efx, probe, efx->net_dev,
726 "failed to create channel %d\n",
727 channel->channel);
728 goto fail;
729 }
730 }
731 efx_set_channel_names(efx);
732
733 return 0;
734
735 fail:
736 efx_remove_channels(efx);
737 return rc;
738 }
739
efx_remove_channel(struct efx_channel * channel)740 void efx_remove_channel(struct efx_channel *channel)
741 {
742 struct efx_tx_queue *tx_queue;
743 struct efx_rx_queue *rx_queue;
744
745 netif_dbg(channel->efx, drv, channel->efx->net_dev,
746 "destroy chan %d\n", channel->channel);
747
748 efx_for_each_channel_rx_queue(rx_queue, channel)
749 efx_remove_rx_queue(rx_queue);
750 efx_for_each_channel_tx_queue(tx_queue, channel)
751 efx_remove_tx_queue(tx_queue);
752 efx_remove_eventq(channel);
753 channel->type->post_remove(channel);
754 }
755
efx_remove_channels(struct efx_nic * efx)756 void efx_remove_channels(struct efx_nic *efx)
757 {
758 struct efx_channel *channel;
759
760 efx_for_each_channel(channel, efx)
761 efx_remove_channel(channel);
762
763 kfree(efx->xdp_tx_queues);
764 }
765
efx_realloc_channels(struct efx_nic * efx,u32 rxq_entries,u32 txq_entries)766 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
767 {
768 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
769 unsigned int i, next_buffer_table = 0;
770 u32 old_rxq_entries, old_txq_entries;
771 int rc, rc2;
772
773 rc = efx_check_disabled(efx);
774 if (rc)
775 return rc;
776
777 /* Not all channels should be reallocated. We must avoid
778 * reallocating their buffer table entries.
779 */
780 efx_for_each_channel(channel, efx) {
781 struct efx_rx_queue *rx_queue;
782 struct efx_tx_queue *tx_queue;
783
784 if (channel->type->copy)
785 continue;
786 next_buffer_table = max(next_buffer_table,
787 channel->eventq.index +
788 channel->eventq.entries);
789 efx_for_each_channel_rx_queue(rx_queue, channel)
790 next_buffer_table = max(next_buffer_table,
791 rx_queue->rxd.index +
792 rx_queue->rxd.entries);
793 efx_for_each_channel_tx_queue(tx_queue, channel)
794 next_buffer_table = max(next_buffer_table,
795 tx_queue->txd.index +
796 tx_queue->txd.entries);
797 }
798
799 efx_device_detach_sync(efx);
800 efx_stop_all(efx);
801 efx_soft_disable_interrupts(efx);
802
803 /* Clone channels (where possible) */
804 memset(other_channel, 0, sizeof(other_channel));
805 for (i = 0; i < efx->n_channels; i++) {
806 channel = efx->channel[i];
807 if (channel->type->copy)
808 channel = channel->type->copy(channel);
809 if (!channel) {
810 rc = -ENOMEM;
811 goto out;
812 }
813 other_channel[i] = channel;
814 }
815
816 /* Swap entry counts and channel pointers */
817 old_rxq_entries = efx->rxq_entries;
818 old_txq_entries = efx->txq_entries;
819 efx->rxq_entries = rxq_entries;
820 efx->txq_entries = txq_entries;
821 for (i = 0; i < efx->n_channels; i++) {
822 channel = efx->channel[i];
823 efx->channel[i] = other_channel[i];
824 other_channel[i] = channel;
825 }
826
827 /* Restart buffer table allocation */
828 efx->next_buffer_table = next_buffer_table;
829
830 for (i = 0; i < efx->n_channels; i++) {
831 channel = efx->channel[i];
832 if (!channel->type->copy)
833 continue;
834 rc = efx_probe_channel(channel);
835 if (rc)
836 goto rollback;
837 efx_init_napi_channel(efx->channel[i]);
838 }
839
840 out:
841 /* Destroy unused channel structures */
842 for (i = 0; i < efx->n_channels; i++) {
843 channel = other_channel[i];
844 if (channel && channel->type->copy) {
845 efx_fini_napi_channel(channel);
846 efx_remove_channel(channel);
847 kfree(channel);
848 }
849 }
850
851 rc2 = efx_soft_enable_interrupts(efx);
852 if (rc2) {
853 rc = rc ? rc : rc2;
854 netif_err(efx, drv, efx->net_dev,
855 "unable to restart interrupts on channel reallocation\n");
856 efx_schedule_reset(efx, RESET_TYPE_DISABLE);
857 } else {
858 efx_start_all(efx);
859 efx_device_attach_if_not_resetting(efx);
860 }
861 return rc;
862
863 rollback:
864 /* Swap back */
865 efx->rxq_entries = old_rxq_entries;
866 efx->txq_entries = old_txq_entries;
867 for (i = 0; i < efx->n_channels; i++) {
868 channel = efx->channel[i];
869 efx->channel[i] = other_channel[i];
870 other_channel[i] = channel;
871 }
872 goto out;
873 }
874
875 static inline int
efx_set_xdp_tx_queue(struct efx_nic * efx,int xdp_queue_number,struct efx_tx_queue * tx_queue)876 efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number,
877 struct efx_tx_queue *tx_queue)
878 {
879 if (xdp_queue_number >= efx->xdp_tx_queue_count)
880 return -EINVAL;
881
882 netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
883 tx_queue->channel->channel, tx_queue->label,
884 xdp_queue_number, tx_queue->queue);
885 efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
886 return 0;
887 }
888
efx_set_channels(struct efx_nic * efx)889 int efx_set_channels(struct efx_nic *efx)
890 {
891 struct efx_tx_queue *tx_queue;
892 struct efx_channel *channel;
893 unsigned int next_queue = 0;
894 int xdp_queue_number;
895 int rc;
896
897 efx->tx_channel_offset =
898 efx_separate_tx_channels ?
899 efx->n_channels - efx->n_tx_channels : 0;
900
901 if (efx->xdp_tx_queue_count) {
902 EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
903
904 /* Allocate array for XDP TX queue lookup. */
905 efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
906 sizeof(*efx->xdp_tx_queues),
907 GFP_KERNEL);
908 if (!efx->xdp_tx_queues)
909 return -ENOMEM;
910 }
911
912 /* We need to mark which channels really have RX and TX
913 * queues, and adjust the TX queue numbers if we have separate
914 * RX-only and TX-only channels.
915 */
916 xdp_queue_number = 0;
917 efx_for_each_channel(channel, efx) {
918 if (channel->channel < efx->n_rx_channels)
919 channel->rx_queue.core_index = channel->channel;
920 else
921 channel->rx_queue.core_index = -1;
922
923 if (channel->channel >= efx->tx_channel_offset) {
924 if (efx_channel_is_xdp_tx(channel)) {
925 efx_for_each_channel_tx_queue(tx_queue, channel) {
926 tx_queue->queue = next_queue++;
927 rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
928 if (rc == 0)
929 xdp_queue_number++;
930 }
931 } else {
932 efx_for_each_channel_tx_queue(tx_queue, channel) {
933 tx_queue->queue = next_queue++;
934 netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
935 channel->channel, tx_queue->label,
936 tx_queue->queue);
937 }
938
939 /* If XDP is borrowing queues from net stack, it must use the queue
940 * with no csum offload, which is the first one of the channel
941 * (note: channel->tx_queue_by_type is not initialized yet)
942 */
943 if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) {
944 tx_queue = &channel->tx_queue[0];
945 rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
946 if (rc == 0)
947 xdp_queue_number++;
948 }
949 }
950 }
951 }
952 WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED &&
953 xdp_queue_number != efx->xdp_tx_queue_count);
954 WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED &&
955 xdp_queue_number > efx->xdp_tx_queue_count);
956
957 /* If we have more CPUs than assigned XDP TX queues, assign the already
958 * existing queues to the exceeding CPUs
959 */
960 next_queue = 0;
961 while (xdp_queue_number < efx->xdp_tx_queue_count) {
962 tx_queue = efx->xdp_tx_queues[next_queue++];
963 rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue);
964 if (rc == 0)
965 xdp_queue_number++;
966 }
967
968 rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
969 if (rc)
970 return rc;
971 return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
972 }
973
efx_default_channel_want_txqs(struct efx_channel * channel)974 bool efx_default_channel_want_txqs(struct efx_channel *channel)
975 {
976 return channel->channel - channel->efx->tx_channel_offset <
977 channel->efx->n_tx_channels;
978 }
979
980 /*************
981 * START/STOP
982 *************/
983
efx_soft_enable_interrupts(struct efx_nic * efx)984 int efx_soft_enable_interrupts(struct efx_nic *efx)
985 {
986 struct efx_channel *channel, *end_channel;
987 int rc;
988
989 BUG_ON(efx->state == STATE_DISABLED);
990
991 efx->irq_soft_enabled = true;
992 smp_wmb();
993
994 efx_for_each_channel(channel, efx) {
995 if (!channel->type->keep_eventq) {
996 rc = efx_init_eventq(channel);
997 if (rc)
998 goto fail;
999 }
1000 efx_start_eventq(channel);
1001 }
1002
1003 efx_mcdi_mode_event(efx);
1004
1005 return 0;
1006 fail:
1007 end_channel = channel;
1008 efx_for_each_channel(channel, efx) {
1009 if (channel == end_channel)
1010 break;
1011 efx_stop_eventq(channel);
1012 if (!channel->type->keep_eventq)
1013 efx_fini_eventq(channel);
1014 }
1015
1016 return rc;
1017 }
1018
efx_soft_disable_interrupts(struct efx_nic * efx)1019 void efx_soft_disable_interrupts(struct efx_nic *efx)
1020 {
1021 struct efx_channel *channel;
1022
1023 if (efx->state == STATE_DISABLED)
1024 return;
1025
1026 efx_mcdi_mode_poll(efx);
1027
1028 efx->irq_soft_enabled = false;
1029 smp_wmb();
1030
1031 if (efx->legacy_irq)
1032 synchronize_irq(efx->legacy_irq);
1033
1034 efx_for_each_channel(channel, efx) {
1035 if (channel->irq)
1036 synchronize_irq(channel->irq);
1037
1038 efx_stop_eventq(channel);
1039 if (!channel->type->keep_eventq)
1040 efx_fini_eventq(channel);
1041 }
1042
1043 /* Flush the asynchronous MCDI request queue */
1044 efx_mcdi_flush_async(efx);
1045 }
1046
efx_enable_interrupts(struct efx_nic * efx)1047 int efx_enable_interrupts(struct efx_nic *efx)
1048 {
1049 struct efx_channel *channel, *end_channel;
1050 int rc;
1051
1052 /* TODO: Is this really a bug? */
1053 BUG_ON(efx->state == STATE_DISABLED);
1054
1055 if (efx->eeh_disabled_legacy_irq) {
1056 enable_irq(efx->legacy_irq);
1057 efx->eeh_disabled_legacy_irq = false;
1058 }
1059
1060 efx->type->irq_enable_master(efx);
1061
1062 efx_for_each_channel(channel, efx) {
1063 if (channel->type->keep_eventq) {
1064 rc = efx_init_eventq(channel);
1065 if (rc)
1066 goto fail;
1067 }
1068 }
1069
1070 rc = efx_soft_enable_interrupts(efx);
1071 if (rc)
1072 goto fail;
1073
1074 return 0;
1075
1076 fail:
1077 end_channel = channel;
1078 efx_for_each_channel(channel, efx) {
1079 if (channel == end_channel)
1080 break;
1081 if (channel->type->keep_eventq)
1082 efx_fini_eventq(channel);
1083 }
1084
1085 efx->type->irq_disable_non_ev(efx);
1086
1087 return rc;
1088 }
1089
efx_disable_interrupts(struct efx_nic * efx)1090 void efx_disable_interrupts(struct efx_nic *efx)
1091 {
1092 struct efx_channel *channel;
1093
1094 efx_soft_disable_interrupts(efx);
1095
1096 efx_for_each_channel(channel, efx) {
1097 if (channel->type->keep_eventq)
1098 efx_fini_eventq(channel);
1099 }
1100
1101 efx->type->irq_disable_non_ev(efx);
1102 }
1103
efx_start_channels(struct efx_nic * efx)1104 void efx_start_channels(struct efx_nic *efx)
1105 {
1106 struct efx_tx_queue *tx_queue;
1107 struct efx_rx_queue *rx_queue;
1108 struct efx_channel *channel;
1109
1110 efx_for_each_channel(channel, efx) {
1111 efx_for_each_channel_tx_queue(tx_queue, channel) {
1112 efx_init_tx_queue(tx_queue);
1113 atomic_inc(&efx->active_queues);
1114 }
1115
1116 efx_for_each_channel_rx_queue(rx_queue, channel) {
1117 efx_init_rx_queue(rx_queue);
1118 atomic_inc(&efx->active_queues);
1119 efx_stop_eventq(channel);
1120 efx_fast_push_rx_descriptors(rx_queue, false);
1121 efx_start_eventq(channel);
1122 }
1123
1124 WARN_ON(channel->rx_pkt_n_frags);
1125 }
1126 }
1127
efx_stop_channels(struct efx_nic * efx)1128 void efx_stop_channels(struct efx_nic *efx)
1129 {
1130 struct efx_tx_queue *tx_queue;
1131 struct efx_rx_queue *rx_queue;
1132 struct efx_channel *channel;
1133 int rc = 0;
1134
1135 /* Stop RX refill */
1136 efx_for_each_channel(channel, efx) {
1137 efx_for_each_channel_rx_queue(rx_queue, channel)
1138 rx_queue->refill_enabled = false;
1139 }
1140
1141 efx_for_each_channel(channel, efx) {
1142 /* RX packet processing is pipelined, so wait for the
1143 * NAPI handler to complete. At least event queue 0
1144 * might be kept active by non-data events, so don't
1145 * use napi_synchronize() but actually disable NAPI
1146 * temporarily.
1147 */
1148 if (efx_channel_has_rx_queue(channel)) {
1149 efx_stop_eventq(channel);
1150 efx_start_eventq(channel);
1151 }
1152 }
1153
1154 if (efx->type->fini_dmaq)
1155 rc = efx->type->fini_dmaq(efx);
1156
1157 if (rc) {
1158 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1159 } else {
1160 netif_dbg(efx, drv, efx->net_dev,
1161 "successfully flushed all queues\n");
1162 }
1163
1164 efx_for_each_channel(channel, efx) {
1165 efx_for_each_channel_rx_queue(rx_queue, channel)
1166 efx_fini_rx_queue(rx_queue);
1167 efx_for_each_channel_tx_queue(tx_queue, channel)
1168 efx_fini_tx_queue(tx_queue);
1169 }
1170 }
1171
1172 /**************************************************************************
1173 *
1174 * NAPI interface
1175 *
1176 *************************************************************************/
1177
1178 /* Process channel's event queue
1179 *
1180 * This function is responsible for processing the event queue of a
1181 * single channel. The caller must guarantee that this function will
1182 * never be concurrently called more than once on the same channel,
1183 * though different channels may be being processed concurrently.
1184 */
efx_process_channel(struct efx_channel * channel,int budget)1185 static int efx_process_channel(struct efx_channel *channel, int budget)
1186 {
1187 struct efx_tx_queue *tx_queue;
1188 struct list_head rx_list;
1189 int spent;
1190
1191 if (unlikely(!channel->enabled))
1192 return 0;
1193
1194 /* Prepare the batch receive list */
1195 EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1196 INIT_LIST_HEAD(&rx_list);
1197 channel->rx_list = &rx_list;
1198
1199 efx_for_each_channel_tx_queue(tx_queue, channel) {
1200 tx_queue->pkts_compl = 0;
1201 tx_queue->bytes_compl = 0;
1202 }
1203
1204 spent = efx_nic_process_eventq(channel, budget);
1205 if (spent && efx_channel_has_rx_queue(channel)) {
1206 struct efx_rx_queue *rx_queue =
1207 efx_channel_get_rx_queue(channel);
1208
1209 efx_rx_flush_packet(channel);
1210 efx_fast_push_rx_descriptors(rx_queue, true);
1211 }
1212
1213 /* Update BQL */
1214 efx_for_each_channel_tx_queue(tx_queue, channel) {
1215 if (tx_queue->bytes_compl) {
1216 netdev_tx_completed_queue(tx_queue->core_txq,
1217 tx_queue->pkts_compl,
1218 tx_queue->bytes_compl);
1219 }
1220 }
1221
1222 /* Receive any packets we queued up */
1223 netif_receive_skb_list(channel->rx_list);
1224 channel->rx_list = NULL;
1225
1226 return spent;
1227 }
1228
efx_update_irq_mod(struct efx_nic * efx,struct efx_channel * channel)1229 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1230 {
1231 int step = efx->irq_mod_step_us;
1232
1233 if (channel->irq_mod_score < irq_adapt_low_thresh) {
1234 if (channel->irq_moderation_us > step) {
1235 channel->irq_moderation_us -= step;
1236 efx->type->push_irq_moderation(channel);
1237 }
1238 } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1239 if (channel->irq_moderation_us <
1240 efx->irq_rx_moderation_us) {
1241 channel->irq_moderation_us += step;
1242 efx->type->push_irq_moderation(channel);
1243 }
1244 }
1245
1246 channel->irq_count = 0;
1247 channel->irq_mod_score = 0;
1248 }
1249
1250 /* NAPI poll handler
1251 *
1252 * NAPI guarantees serialisation of polls of the same device, which
1253 * provides the guarantee required by efx_process_channel().
1254 */
efx_poll(struct napi_struct * napi,int budget)1255 static int efx_poll(struct napi_struct *napi, int budget)
1256 {
1257 struct efx_channel *channel =
1258 container_of(napi, struct efx_channel, napi_str);
1259 struct efx_nic *efx = channel->efx;
1260 #ifdef CONFIG_RFS_ACCEL
1261 unsigned int time;
1262 #endif
1263 int spent;
1264
1265 netif_vdbg(efx, intr, efx->net_dev,
1266 "channel %d NAPI poll executing on CPU %d\n",
1267 channel->channel, raw_smp_processor_id());
1268
1269 spent = efx_process_channel(channel, budget);
1270
1271 xdp_do_flush_map();
1272
1273 if (spent < budget) {
1274 if (efx_channel_has_rx_queue(channel) &&
1275 efx->irq_rx_adaptive &&
1276 unlikely(++channel->irq_count == 1000)) {
1277 efx_update_irq_mod(efx, channel);
1278 }
1279
1280 #ifdef CONFIG_RFS_ACCEL
1281 /* Perhaps expire some ARFS filters */
1282 time = jiffies - channel->rfs_last_expiry;
1283 /* Would our quota be >= 20? */
1284 if (channel->rfs_filter_count * time >= 600 * HZ)
1285 mod_delayed_work(system_wq, &channel->filter_work, 0);
1286 #endif
1287
1288 /* There is no race here; although napi_disable() will
1289 * only wait for napi_complete(), this isn't a problem
1290 * since efx_nic_eventq_read_ack() will have no effect if
1291 * interrupts have already been disabled.
1292 */
1293 if (napi_complete_done(napi, spent))
1294 efx_nic_eventq_read_ack(channel);
1295 }
1296
1297 return spent;
1298 }
1299
efx_init_napi_channel(struct efx_channel * channel)1300 void efx_init_napi_channel(struct efx_channel *channel)
1301 {
1302 struct efx_nic *efx = channel->efx;
1303
1304 channel->napi_dev = efx->net_dev;
1305 netif_napi_add(channel->napi_dev, &channel->napi_str,
1306 efx_poll, napi_weight);
1307 }
1308
efx_init_napi(struct efx_nic * efx)1309 void efx_init_napi(struct efx_nic *efx)
1310 {
1311 struct efx_channel *channel;
1312
1313 efx_for_each_channel(channel, efx)
1314 efx_init_napi_channel(channel);
1315 }
1316
efx_fini_napi_channel(struct efx_channel * channel)1317 void efx_fini_napi_channel(struct efx_channel *channel)
1318 {
1319 if (channel->napi_dev)
1320 netif_napi_del(&channel->napi_str);
1321
1322 channel->napi_dev = NULL;
1323 }
1324
efx_fini_napi(struct efx_nic * efx)1325 void efx_fini_napi(struct efx_nic *efx)
1326 {
1327 struct efx_channel *channel;
1328
1329 efx_for_each_channel(channel, efx)
1330 efx_fini_napi_channel(channel);
1331 }
1332