1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <linux/utsname.h>
16 #include <linux/version.h>
17 #include <net/sch_generic.h>
18 #include "gve.h"
19 #include "gve_dqo.h"
20 #include "gve_adminq.h"
21 #include "gve_register.h"
22
23 #define GVE_DEFAULT_RX_COPYBREAK (256)
24
25 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
26 #define GVE_VERSION "1.0.0"
27 #define GVE_VERSION_PREFIX "GVE-"
28
29 // Minimum amount of time between queue kicks in msec (10 seconds)
30 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
31
32 const char gve_version_str[] = GVE_VERSION;
33 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
34
gve_verify_driver_compatibility(struct gve_priv * priv)35 static int gve_verify_driver_compatibility(struct gve_priv *priv)
36 {
37 int err;
38 struct gve_driver_info *driver_info;
39 dma_addr_t driver_info_bus;
40
41 driver_info = dma_alloc_coherent(&priv->pdev->dev,
42 sizeof(struct gve_driver_info),
43 &driver_info_bus, GFP_KERNEL);
44 if (!driver_info)
45 return -ENOMEM;
46
47 *driver_info = (struct gve_driver_info) {
48 .os_type = 1, /* Linux */
49 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
50 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
51 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
52 .driver_capability_flags = {
53 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
54 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
55 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
56 cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
57 },
58 };
59 strscpy(driver_info->os_version_str1, utsname()->release,
60 sizeof(driver_info->os_version_str1));
61 strscpy(driver_info->os_version_str2, utsname()->version,
62 sizeof(driver_info->os_version_str2));
63
64 err = gve_adminq_verify_driver_compatibility(priv,
65 sizeof(struct gve_driver_info),
66 driver_info_bus);
67
68 /* It's ok if the device doesn't support this */
69 if (err == -EOPNOTSUPP)
70 err = 0;
71
72 dma_free_coherent(&priv->pdev->dev,
73 sizeof(struct gve_driver_info),
74 driver_info, driver_info_bus);
75 return err;
76 }
77
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)78 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
79 {
80 struct gve_priv *priv = netdev_priv(dev);
81
82 if (gve_is_gqi(priv))
83 return gve_tx(skb, dev);
84 else
85 return gve_tx_dqo(skb, dev);
86 }
87
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)88 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
89 {
90 struct gve_priv *priv = netdev_priv(dev);
91 unsigned int start;
92 u64 packets, bytes;
93 int ring;
94
95 if (priv->rx) {
96 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
97 do {
98 start =
99 u64_stats_fetch_begin(&priv->rx[ring].statss);
100 packets = priv->rx[ring].rpackets;
101 bytes = priv->rx[ring].rbytes;
102 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
103 start));
104 s->rx_packets += packets;
105 s->rx_bytes += bytes;
106 }
107 }
108 if (priv->tx) {
109 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
110 do {
111 start =
112 u64_stats_fetch_begin(&priv->tx[ring].statss);
113 packets = priv->tx[ring].pkt_done;
114 bytes = priv->tx[ring].bytes_done;
115 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
116 start));
117 s->tx_packets += packets;
118 s->tx_bytes += bytes;
119 }
120 }
121 }
122
gve_alloc_counter_array(struct gve_priv * priv)123 static int gve_alloc_counter_array(struct gve_priv *priv)
124 {
125 priv->counter_array =
126 dma_alloc_coherent(&priv->pdev->dev,
127 priv->num_event_counters *
128 sizeof(*priv->counter_array),
129 &priv->counter_array_bus, GFP_KERNEL);
130 if (!priv->counter_array)
131 return -ENOMEM;
132
133 return 0;
134 }
135
gve_free_counter_array(struct gve_priv * priv)136 static void gve_free_counter_array(struct gve_priv *priv)
137 {
138 if (!priv->counter_array)
139 return;
140
141 dma_free_coherent(&priv->pdev->dev,
142 priv->num_event_counters *
143 sizeof(*priv->counter_array),
144 priv->counter_array, priv->counter_array_bus);
145 priv->counter_array = NULL;
146 }
147
148 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)149 static void gve_stats_report_task(struct work_struct *work)
150 {
151 struct gve_priv *priv = container_of(work, struct gve_priv,
152 stats_report_task);
153 if (gve_get_do_report_stats(priv)) {
154 gve_handle_report_stats(priv);
155 gve_clear_do_report_stats(priv);
156 }
157 }
158
gve_stats_report_schedule(struct gve_priv * priv)159 static void gve_stats_report_schedule(struct gve_priv *priv)
160 {
161 if (!gve_get_probe_in_progress(priv) &&
162 !gve_get_reset_in_progress(priv)) {
163 gve_set_do_report_stats(priv);
164 queue_work(priv->gve_wq, &priv->stats_report_task);
165 }
166 }
167
gve_stats_report_timer(struct timer_list * t)168 static void gve_stats_report_timer(struct timer_list *t)
169 {
170 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
171
172 mod_timer(&priv->stats_report_timer,
173 round_jiffies(jiffies +
174 msecs_to_jiffies(priv->stats_report_timer_period)));
175 gve_stats_report_schedule(priv);
176 }
177
gve_alloc_stats_report(struct gve_priv * priv)178 static int gve_alloc_stats_report(struct gve_priv *priv)
179 {
180 int tx_stats_num, rx_stats_num;
181
182 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
183 priv->tx_cfg.num_queues;
184 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
185 priv->rx_cfg.num_queues;
186 priv->stats_report_len = struct_size(priv->stats_report, stats,
187 tx_stats_num + rx_stats_num);
188 priv->stats_report =
189 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
190 &priv->stats_report_bus, GFP_KERNEL);
191 if (!priv->stats_report)
192 return -ENOMEM;
193 /* Set up timer for the report-stats task */
194 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
195 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
196 return 0;
197 }
198
gve_free_stats_report(struct gve_priv * priv)199 static void gve_free_stats_report(struct gve_priv *priv)
200 {
201 if (!priv->stats_report)
202 return;
203
204 del_timer_sync(&priv->stats_report_timer);
205 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
206 priv->stats_report, priv->stats_report_bus);
207 priv->stats_report = NULL;
208 }
209
gve_mgmnt_intr(int irq,void * arg)210 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
211 {
212 struct gve_priv *priv = arg;
213
214 queue_work(priv->gve_wq, &priv->service_task);
215 return IRQ_HANDLED;
216 }
217
gve_intr(int irq,void * arg)218 static irqreturn_t gve_intr(int irq, void *arg)
219 {
220 struct gve_notify_block *block = arg;
221 struct gve_priv *priv = block->priv;
222
223 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
224 napi_schedule_irqoff(&block->napi);
225 return IRQ_HANDLED;
226 }
227
gve_intr_dqo(int irq,void * arg)228 static irqreturn_t gve_intr_dqo(int irq, void *arg)
229 {
230 struct gve_notify_block *block = arg;
231
232 /* Interrupts are automatically masked */
233 napi_schedule_irqoff(&block->napi);
234 return IRQ_HANDLED;
235 }
236
gve_napi_poll(struct napi_struct * napi,int budget)237 static int gve_napi_poll(struct napi_struct *napi, int budget)
238 {
239 struct gve_notify_block *block;
240 __be32 __iomem *irq_doorbell;
241 bool reschedule = false;
242 struct gve_priv *priv;
243 int work_done = 0;
244
245 block = container_of(napi, struct gve_notify_block, napi);
246 priv = block->priv;
247
248 if (block->tx)
249 reschedule |= gve_tx_poll(block, budget);
250 if (block->rx) {
251 work_done = gve_rx_poll(block, budget);
252 reschedule |= work_done == budget;
253 }
254
255 if (reschedule)
256 return budget;
257
258 /* Complete processing - don't unmask irq if busy polling is enabled */
259 if (likely(napi_complete_done(napi, work_done))) {
260 irq_doorbell = gve_irq_doorbell(priv, block);
261 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
262
263 /* Ensure IRQ ACK is visible before we check pending work.
264 * If queue had issued updates, it would be truly visible.
265 */
266 mb();
267
268 if (block->tx)
269 reschedule |= gve_tx_clean_pending(priv, block->tx);
270 if (block->rx)
271 reschedule |= gve_rx_work_pending(block->rx);
272
273 if (reschedule && napi_reschedule(napi))
274 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
275 }
276 return work_done;
277 }
278
gve_napi_poll_dqo(struct napi_struct * napi,int budget)279 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
280 {
281 struct gve_notify_block *block =
282 container_of(napi, struct gve_notify_block, napi);
283 struct gve_priv *priv = block->priv;
284 bool reschedule = false;
285 int work_done = 0;
286
287 /* Clear PCI MSI-X Pending Bit Array (PBA)
288 *
289 * This bit is set if an interrupt event occurs while the vector is
290 * masked. If this bit is set and we reenable the interrupt, it will
291 * fire again. Since we're just about to poll the queue state, we don't
292 * need it to fire again.
293 *
294 * Under high softirq load, it's possible that the interrupt condition
295 * is triggered twice before we got the chance to process it.
296 */
297 gve_write_irq_doorbell_dqo(priv, block,
298 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
299
300 if (block->tx)
301 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
302
303 if (block->rx) {
304 work_done = gve_rx_poll_dqo(block, budget);
305 reschedule |= work_done == budget;
306 }
307
308 if (reschedule)
309 return budget;
310
311 if (likely(napi_complete_done(napi, work_done))) {
312 /* Enable interrupts again.
313 *
314 * We don't need to repoll afterwards because HW supports the
315 * PCI MSI-X PBA feature.
316 *
317 * Another interrupt would be triggered if a new event came in
318 * since the last one.
319 */
320 gve_write_irq_doorbell_dqo(priv, block,
321 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
322 }
323
324 return work_done;
325 }
326
gve_alloc_notify_blocks(struct gve_priv * priv)327 static int gve_alloc_notify_blocks(struct gve_priv *priv)
328 {
329 int num_vecs_requested = priv->num_ntfy_blks + 1;
330 unsigned int active_cpus;
331 int vecs_enabled;
332 int i, j;
333 int err;
334
335 priv->msix_vectors = kvcalloc(num_vecs_requested,
336 sizeof(*priv->msix_vectors), GFP_KERNEL);
337 if (!priv->msix_vectors)
338 return -ENOMEM;
339 for (i = 0; i < num_vecs_requested; i++)
340 priv->msix_vectors[i].entry = i;
341 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
342 GVE_MIN_MSIX, num_vecs_requested);
343 if (vecs_enabled < 0) {
344 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
345 GVE_MIN_MSIX, vecs_enabled);
346 err = vecs_enabled;
347 goto abort_with_msix_vectors;
348 }
349 if (vecs_enabled != num_vecs_requested) {
350 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
351 int vecs_per_type = new_num_ntfy_blks / 2;
352 int vecs_left = new_num_ntfy_blks % 2;
353
354 priv->num_ntfy_blks = new_num_ntfy_blks;
355 priv->mgmt_msix_idx = priv->num_ntfy_blks;
356 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
357 vecs_per_type);
358 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
359 vecs_per_type + vecs_left);
360 dev_err(&priv->pdev->dev,
361 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
362 vecs_enabled, priv->tx_cfg.max_queues,
363 priv->rx_cfg.max_queues);
364 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
365 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
366 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
367 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
368 }
369 /* Half the notification blocks go to TX and half to RX */
370 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
371
372 /* Setup Management Vector - the last vector */
373 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
374 pci_name(priv->pdev));
375 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
376 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
377 if (err) {
378 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
379 goto abort_with_msix_enabled;
380 }
381 priv->irq_db_indices =
382 dma_alloc_coherent(&priv->pdev->dev,
383 priv->num_ntfy_blks *
384 sizeof(*priv->irq_db_indices),
385 &priv->irq_db_indices_bus, GFP_KERNEL);
386 if (!priv->irq_db_indices) {
387 err = -ENOMEM;
388 goto abort_with_mgmt_vector;
389 }
390
391 priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
392 sizeof(*priv->ntfy_blocks), GFP_KERNEL);
393 if (!priv->ntfy_blocks) {
394 err = -ENOMEM;
395 goto abort_with_irq_db_indices;
396 }
397
398 /* Setup the other blocks - the first n-1 vectors */
399 for (i = 0; i < priv->num_ntfy_blks; i++) {
400 struct gve_notify_block *block = &priv->ntfy_blocks[i];
401 int msix_idx = i;
402
403 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
404 i, pci_name(priv->pdev));
405 block->priv = priv;
406 err = request_irq(priv->msix_vectors[msix_idx].vector,
407 gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
408 0, block->name, block);
409 if (err) {
410 dev_err(&priv->pdev->dev,
411 "Failed to receive msix vector %d\n", i);
412 goto abort_with_some_ntfy_blocks;
413 }
414 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
415 get_cpu_mask(i % active_cpus));
416 block->irq_db_index = &priv->irq_db_indices[i].index;
417 }
418 return 0;
419 abort_with_some_ntfy_blocks:
420 for (j = 0; j < i; j++) {
421 struct gve_notify_block *block = &priv->ntfy_blocks[j];
422 int msix_idx = j;
423
424 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
425 NULL);
426 free_irq(priv->msix_vectors[msix_idx].vector, block);
427 }
428 kvfree(priv->ntfy_blocks);
429 priv->ntfy_blocks = NULL;
430 abort_with_irq_db_indices:
431 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
432 sizeof(*priv->irq_db_indices),
433 priv->irq_db_indices, priv->irq_db_indices_bus);
434 priv->irq_db_indices = NULL;
435 abort_with_mgmt_vector:
436 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
437 abort_with_msix_enabled:
438 pci_disable_msix(priv->pdev);
439 abort_with_msix_vectors:
440 kvfree(priv->msix_vectors);
441 priv->msix_vectors = NULL;
442 return err;
443 }
444
gve_free_notify_blocks(struct gve_priv * priv)445 static void gve_free_notify_blocks(struct gve_priv *priv)
446 {
447 int i;
448
449 if (!priv->msix_vectors)
450 return;
451
452 /* Free the irqs */
453 for (i = 0; i < priv->num_ntfy_blks; i++) {
454 struct gve_notify_block *block = &priv->ntfy_blocks[i];
455 int msix_idx = i;
456
457 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
458 NULL);
459 free_irq(priv->msix_vectors[msix_idx].vector, block);
460 }
461 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
462 kvfree(priv->ntfy_blocks);
463 priv->ntfy_blocks = NULL;
464 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
465 sizeof(*priv->irq_db_indices),
466 priv->irq_db_indices, priv->irq_db_indices_bus);
467 priv->irq_db_indices = NULL;
468 pci_disable_msix(priv->pdev);
469 kvfree(priv->msix_vectors);
470 priv->msix_vectors = NULL;
471 }
472
gve_setup_device_resources(struct gve_priv * priv)473 static int gve_setup_device_resources(struct gve_priv *priv)
474 {
475 int err;
476
477 err = gve_alloc_counter_array(priv);
478 if (err)
479 return err;
480 err = gve_alloc_notify_blocks(priv);
481 if (err)
482 goto abort_with_counter;
483 err = gve_alloc_stats_report(priv);
484 if (err)
485 goto abort_with_ntfy_blocks;
486 err = gve_adminq_configure_device_resources(priv,
487 priv->counter_array_bus,
488 priv->num_event_counters,
489 priv->irq_db_indices_bus,
490 priv->num_ntfy_blks);
491 if (unlikely(err)) {
492 dev_err(&priv->pdev->dev,
493 "could not setup device_resources: err=%d\n", err);
494 err = -ENXIO;
495 goto abort_with_stats_report;
496 }
497
498 if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
499 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
500 GFP_KERNEL);
501 if (!priv->ptype_lut_dqo) {
502 err = -ENOMEM;
503 goto abort_with_stats_report;
504 }
505 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
506 if (err) {
507 dev_err(&priv->pdev->dev,
508 "Failed to get ptype map: err=%d\n", err);
509 goto abort_with_ptype_lut;
510 }
511 }
512
513 err = gve_adminq_report_stats(priv, priv->stats_report_len,
514 priv->stats_report_bus,
515 GVE_STATS_REPORT_TIMER_PERIOD);
516 if (err)
517 dev_err(&priv->pdev->dev,
518 "Failed to report stats: err=%d\n", err);
519 gve_set_device_resources_ok(priv);
520 return 0;
521
522 abort_with_ptype_lut:
523 kvfree(priv->ptype_lut_dqo);
524 priv->ptype_lut_dqo = NULL;
525 abort_with_stats_report:
526 gve_free_stats_report(priv);
527 abort_with_ntfy_blocks:
528 gve_free_notify_blocks(priv);
529 abort_with_counter:
530 gve_free_counter_array(priv);
531
532 return err;
533 }
534
535 static void gve_trigger_reset(struct gve_priv *priv);
536
gve_teardown_device_resources(struct gve_priv * priv)537 static void gve_teardown_device_resources(struct gve_priv *priv)
538 {
539 int err;
540
541 /* Tell device its resources are being freed */
542 if (gve_get_device_resources_ok(priv)) {
543 /* detach the stats report */
544 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
545 if (err) {
546 dev_err(&priv->pdev->dev,
547 "Failed to detach stats report: err=%d\n", err);
548 gve_trigger_reset(priv);
549 }
550 err = gve_adminq_deconfigure_device_resources(priv);
551 if (err) {
552 dev_err(&priv->pdev->dev,
553 "Could not deconfigure device resources: err=%d\n",
554 err);
555 gve_trigger_reset(priv);
556 }
557 }
558
559 kvfree(priv->ptype_lut_dqo);
560 priv->ptype_lut_dqo = NULL;
561
562 gve_free_counter_array(priv);
563 gve_free_notify_blocks(priv);
564 gve_free_stats_report(priv);
565 gve_clear_device_resources_ok(priv);
566 }
567
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))568 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
569 int (*gve_poll)(struct napi_struct *, int))
570 {
571 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
572
573 netif_napi_add(priv->dev, &block->napi, gve_poll);
574 }
575
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)576 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
577 {
578 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
579
580 netif_napi_del(&block->napi);
581 }
582
gve_register_qpls(struct gve_priv * priv)583 static int gve_register_qpls(struct gve_priv *priv)
584 {
585 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
586 int err;
587 int i;
588
589 for (i = 0; i < num_qpls; i++) {
590 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
591 if (err) {
592 netif_err(priv, drv, priv->dev,
593 "failed to register queue page list %d\n",
594 priv->qpls[i].id);
595 /* This failure will trigger a reset - no need to clean
596 * up
597 */
598 return err;
599 }
600 }
601 return 0;
602 }
603
gve_unregister_qpls(struct gve_priv * priv)604 static int gve_unregister_qpls(struct gve_priv *priv)
605 {
606 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
607 int err;
608 int i;
609
610 for (i = 0; i < num_qpls; i++) {
611 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
612 /* This failure will trigger a reset - no need to clean up */
613 if (err) {
614 netif_err(priv, drv, priv->dev,
615 "Failed to unregister queue page list %d\n",
616 priv->qpls[i].id);
617 return err;
618 }
619 }
620 return 0;
621 }
622
gve_create_rings(struct gve_priv * priv)623 static int gve_create_rings(struct gve_priv *priv)
624 {
625 int err;
626 int i;
627
628 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
629 if (err) {
630 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
631 priv->tx_cfg.num_queues);
632 /* This failure will trigger a reset - no need to clean
633 * up
634 */
635 return err;
636 }
637 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
638 priv->tx_cfg.num_queues);
639
640 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
641 if (err) {
642 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
643 priv->rx_cfg.num_queues);
644 /* This failure will trigger a reset - no need to clean
645 * up
646 */
647 return err;
648 }
649 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
650 priv->rx_cfg.num_queues);
651
652 if (gve_is_gqi(priv)) {
653 /* Rx data ring has been prefilled with packet buffers at queue
654 * allocation time.
655 *
656 * Write the doorbell to provide descriptor slots and packet
657 * buffers to the NIC.
658 */
659 for (i = 0; i < priv->rx_cfg.num_queues; i++)
660 gve_rx_write_doorbell(priv, &priv->rx[i]);
661 } else {
662 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
663 /* Post buffers and ring doorbell. */
664 gve_rx_post_buffers_dqo(&priv->rx[i]);
665 }
666 }
667
668 return 0;
669 }
670
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))671 static void add_napi_init_sync_stats(struct gve_priv *priv,
672 int (*napi_poll)(struct napi_struct *napi,
673 int budget))
674 {
675 int i;
676
677 /* Add tx napi & init sync stats*/
678 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
679 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
680
681 u64_stats_init(&priv->tx[i].statss);
682 priv->tx[i].ntfy_id = ntfy_idx;
683 gve_add_napi(priv, ntfy_idx, napi_poll);
684 }
685 /* Add rx napi & init sync stats*/
686 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
687 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
688
689 u64_stats_init(&priv->rx[i].statss);
690 priv->rx[i].ntfy_id = ntfy_idx;
691 gve_add_napi(priv, ntfy_idx, napi_poll);
692 }
693 }
694
gve_tx_free_rings(struct gve_priv * priv)695 static void gve_tx_free_rings(struct gve_priv *priv)
696 {
697 if (gve_is_gqi(priv)) {
698 gve_tx_free_rings_gqi(priv);
699 } else {
700 gve_tx_free_rings_dqo(priv);
701 }
702 }
703
gve_alloc_rings(struct gve_priv * priv)704 static int gve_alloc_rings(struct gve_priv *priv)
705 {
706 int err;
707
708 /* Setup tx rings */
709 priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx),
710 GFP_KERNEL);
711 if (!priv->tx)
712 return -ENOMEM;
713
714 if (gve_is_gqi(priv))
715 err = gve_tx_alloc_rings(priv);
716 else
717 err = gve_tx_alloc_rings_dqo(priv);
718 if (err)
719 goto free_tx;
720
721 /* Setup rx rings */
722 priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx),
723 GFP_KERNEL);
724 if (!priv->rx) {
725 err = -ENOMEM;
726 goto free_tx_queue;
727 }
728
729 if (gve_is_gqi(priv))
730 err = gve_rx_alloc_rings(priv);
731 else
732 err = gve_rx_alloc_rings_dqo(priv);
733 if (err)
734 goto free_rx;
735
736 if (gve_is_gqi(priv))
737 add_napi_init_sync_stats(priv, gve_napi_poll);
738 else
739 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
740
741 return 0;
742
743 free_rx:
744 kvfree(priv->rx);
745 priv->rx = NULL;
746 free_tx_queue:
747 gve_tx_free_rings(priv);
748 free_tx:
749 kvfree(priv->tx);
750 priv->tx = NULL;
751 return err;
752 }
753
gve_destroy_rings(struct gve_priv * priv)754 static int gve_destroy_rings(struct gve_priv *priv)
755 {
756 int err;
757
758 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
759 if (err) {
760 netif_err(priv, drv, priv->dev,
761 "failed to destroy tx queues\n");
762 /* This failure will trigger a reset - no need to clean up */
763 return err;
764 }
765 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
766 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
767 if (err) {
768 netif_err(priv, drv, priv->dev,
769 "failed to destroy rx queues\n");
770 /* This failure will trigger a reset - no need to clean up */
771 return err;
772 }
773 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
774 return 0;
775 }
776
gve_rx_free_rings(struct gve_priv * priv)777 static void gve_rx_free_rings(struct gve_priv *priv)
778 {
779 if (gve_is_gqi(priv))
780 gve_rx_free_rings_gqi(priv);
781 else
782 gve_rx_free_rings_dqo(priv);
783 }
784
gve_free_rings(struct gve_priv * priv)785 static void gve_free_rings(struct gve_priv *priv)
786 {
787 int ntfy_idx;
788 int i;
789
790 if (priv->tx) {
791 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
792 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
793 gve_remove_napi(priv, ntfy_idx);
794 }
795 gve_tx_free_rings(priv);
796 kvfree(priv->tx);
797 priv->tx = NULL;
798 }
799 if (priv->rx) {
800 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
801 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
802 gve_remove_napi(priv, ntfy_idx);
803 }
804 gve_rx_free_rings(priv);
805 kvfree(priv->rx);
806 priv->rx = NULL;
807 }
808 }
809
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)810 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
811 struct page **page, dma_addr_t *dma,
812 enum dma_data_direction dir, gfp_t gfp_flags)
813 {
814 *page = alloc_page(gfp_flags);
815 if (!*page) {
816 priv->page_alloc_fail++;
817 return -ENOMEM;
818 }
819 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
820 if (dma_mapping_error(dev, *dma)) {
821 priv->dma_mapping_error++;
822 put_page(*page);
823 return -ENOMEM;
824 }
825 return 0;
826 }
827
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)828 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
829 int pages)
830 {
831 struct gve_queue_page_list *qpl = &priv->qpls[id];
832 int err;
833 int i;
834
835 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
836 netif_err(priv, drv, priv->dev,
837 "Reached max number of registered pages %llu > %llu\n",
838 pages + priv->num_registered_pages,
839 priv->max_registered_pages);
840 return -EINVAL;
841 }
842
843 qpl->id = id;
844 qpl->num_entries = 0;
845 qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
846 /* caller handles clean up */
847 if (!qpl->pages)
848 return -ENOMEM;
849 qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
850 /* caller handles clean up */
851 if (!qpl->page_buses)
852 return -ENOMEM;
853
854 for (i = 0; i < pages; i++) {
855 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
856 &qpl->page_buses[i],
857 gve_qpl_dma_dir(priv, id), GFP_KERNEL);
858 /* caller handles clean up */
859 if (err)
860 return -ENOMEM;
861 qpl->num_entries++;
862 }
863 priv->num_registered_pages += pages;
864
865 return 0;
866 }
867
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)868 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
869 enum dma_data_direction dir)
870 {
871 if (!dma_mapping_error(dev, dma))
872 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
873 if (page)
874 put_page(page);
875 }
876
gve_free_queue_page_list(struct gve_priv * priv,u32 id)877 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
878 {
879 struct gve_queue_page_list *qpl = &priv->qpls[id];
880 int i;
881
882 if (!qpl->pages)
883 return;
884 if (!qpl->page_buses)
885 goto free_pages;
886
887 for (i = 0; i < qpl->num_entries; i++)
888 gve_free_page(&priv->pdev->dev, qpl->pages[i],
889 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
890
891 kvfree(qpl->page_buses);
892 free_pages:
893 kvfree(qpl->pages);
894 priv->num_registered_pages -= qpl->num_entries;
895 }
896
gve_alloc_qpls(struct gve_priv * priv)897 static int gve_alloc_qpls(struct gve_priv *priv)
898 {
899 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
900 int i, j;
901 int err;
902
903 if (num_qpls == 0)
904 return 0;
905
906 priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL);
907 if (!priv->qpls)
908 return -ENOMEM;
909
910 for (i = 0; i < gve_num_tx_qpls(priv); i++) {
911 err = gve_alloc_queue_page_list(priv, i,
912 priv->tx_pages_per_qpl);
913 if (err)
914 goto free_qpls;
915 }
916 for (; i < num_qpls; i++) {
917 err = gve_alloc_queue_page_list(priv, i,
918 priv->rx_data_slot_cnt);
919 if (err)
920 goto free_qpls;
921 }
922
923 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
924 sizeof(unsigned long) * BITS_PER_BYTE;
925 priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls),
926 sizeof(unsigned long), GFP_KERNEL);
927 if (!priv->qpl_cfg.qpl_id_map) {
928 err = -ENOMEM;
929 goto free_qpls;
930 }
931
932 return 0;
933
934 free_qpls:
935 for (j = 0; j <= i; j++)
936 gve_free_queue_page_list(priv, j);
937 kvfree(priv->qpls);
938 return err;
939 }
940
gve_free_qpls(struct gve_priv * priv)941 static void gve_free_qpls(struct gve_priv *priv)
942 {
943 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
944 int i;
945
946 if (num_qpls == 0)
947 return;
948
949 kvfree(priv->qpl_cfg.qpl_id_map);
950
951 for (i = 0; i < num_qpls; i++)
952 gve_free_queue_page_list(priv, i);
953
954 kvfree(priv->qpls);
955 }
956
957 /* Use this to schedule a reset when the device is capable of continuing
958 * to handle other requests in its current state. If it is not, do a reset
959 * in thread instead.
960 */
gve_schedule_reset(struct gve_priv * priv)961 void gve_schedule_reset(struct gve_priv *priv)
962 {
963 gve_set_do_reset(priv);
964 queue_work(priv->gve_wq, &priv->service_task);
965 }
966
967 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
968 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
969 static void gve_turndown(struct gve_priv *priv);
970 static void gve_turnup(struct gve_priv *priv);
971
gve_open(struct net_device * dev)972 static int gve_open(struct net_device *dev)
973 {
974 struct gve_priv *priv = netdev_priv(dev);
975 int err;
976
977 err = gve_alloc_qpls(priv);
978 if (err)
979 return err;
980
981 err = gve_alloc_rings(priv);
982 if (err)
983 goto free_qpls;
984
985 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
986 if (err)
987 goto free_rings;
988 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
989 if (err)
990 goto free_rings;
991
992 err = gve_register_qpls(priv);
993 if (err)
994 goto reset;
995
996 if (!gve_is_gqi(priv)) {
997 /* Hard code this for now. This may be tuned in the future for
998 * performance.
999 */
1000 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1001 }
1002 err = gve_create_rings(priv);
1003 if (err)
1004 goto reset;
1005
1006 gve_set_device_rings_ok(priv);
1007
1008 if (gve_get_report_stats(priv))
1009 mod_timer(&priv->stats_report_timer,
1010 round_jiffies(jiffies +
1011 msecs_to_jiffies(priv->stats_report_timer_period)));
1012
1013 gve_turnup(priv);
1014 queue_work(priv->gve_wq, &priv->service_task);
1015 priv->interface_up_cnt++;
1016 return 0;
1017
1018 free_rings:
1019 gve_free_rings(priv);
1020 free_qpls:
1021 gve_free_qpls(priv);
1022 return err;
1023
1024 reset:
1025 /* This must have been called from a reset due to the rtnl lock
1026 * so just return at this point.
1027 */
1028 if (gve_get_reset_in_progress(priv))
1029 return err;
1030 /* Otherwise reset before returning */
1031 gve_reset_and_teardown(priv, true);
1032 /* if this fails there is nothing we can do so just ignore the return */
1033 gve_reset_recovery(priv, false);
1034 /* return the original error */
1035 return err;
1036 }
1037
gve_close(struct net_device * dev)1038 static int gve_close(struct net_device *dev)
1039 {
1040 struct gve_priv *priv = netdev_priv(dev);
1041 int err;
1042
1043 netif_carrier_off(dev);
1044 if (gve_get_device_rings_ok(priv)) {
1045 gve_turndown(priv);
1046 err = gve_destroy_rings(priv);
1047 if (err)
1048 goto err;
1049 err = gve_unregister_qpls(priv);
1050 if (err)
1051 goto err;
1052 gve_clear_device_rings_ok(priv);
1053 }
1054 del_timer_sync(&priv->stats_report_timer);
1055
1056 gve_free_rings(priv);
1057 gve_free_qpls(priv);
1058 priv->interface_down_cnt++;
1059 return 0;
1060
1061 err:
1062 /* This must have been called from a reset due to the rtnl lock
1063 * so just return at this point.
1064 */
1065 if (gve_get_reset_in_progress(priv))
1066 return err;
1067 /* Otherwise reset before returning */
1068 gve_reset_and_teardown(priv, true);
1069 return gve_reset_recovery(priv, false);
1070 }
1071
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1072 int gve_adjust_queues(struct gve_priv *priv,
1073 struct gve_queue_config new_rx_config,
1074 struct gve_queue_config new_tx_config)
1075 {
1076 int err;
1077
1078 if (netif_carrier_ok(priv->dev)) {
1079 /* To make this process as simple as possible we teardown the
1080 * device, set the new configuration, and then bring the device
1081 * up again.
1082 */
1083 err = gve_close(priv->dev);
1084 /* we have already tried to reset in close,
1085 * just fail at this point
1086 */
1087 if (err)
1088 return err;
1089 priv->tx_cfg = new_tx_config;
1090 priv->rx_cfg = new_rx_config;
1091
1092 err = gve_open(priv->dev);
1093 if (err)
1094 goto err;
1095
1096 return 0;
1097 }
1098 /* Set the config for the next up. */
1099 priv->tx_cfg = new_tx_config;
1100 priv->rx_cfg = new_rx_config;
1101
1102 return 0;
1103 err:
1104 netif_err(priv, drv, priv->dev,
1105 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1106 gve_turndown(priv);
1107 return err;
1108 }
1109
gve_turndown(struct gve_priv * priv)1110 static void gve_turndown(struct gve_priv *priv)
1111 {
1112 int idx;
1113
1114 if (netif_carrier_ok(priv->dev))
1115 netif_carrier_off(priv->dev);
1116
1117 if (!gve_get_napi_enabled(priv))
1118 return;
1119
1120 /* Disable napi to prevent more work from coming in */
1121 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1122 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1123 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1124
1125 napi_disable(&block->napi);
1126 }
1127 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1128 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1129 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1130
1131 napi_disable(&block->napi);
1132 }
1133
1134 /* Stop tx queues */
1135 netif_tx_disable(priv->dev);
1136
1137 gve_clear_napi_enabled(priv);
1138 gve_clear_report_stats(priv);
1139 }
1140
gve_turnup(struct gve_priv * priv)1141 static void gve_turnup(struct gve_priv *priv)
1142 {
1143 int idx;
1144
1145 /* Start the tx queues */
1146 netif_tx_start_all_queues(priv->dev);
1147
1148 /* Enable napi and unmask interrupts for all queues */
1149 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1150 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1151 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1152
1153 napi_enable(&block->napi);
1154 if (gve_is_gqi(priv)) {
1155 iowrite32be(0, gve_irq_doorbell(priv, block));
1156 } else {
1157 gve_set_itr_coalesce_usecs_dqo(priv, block,
1158 priv->tx_coalesce_usecs);
1159 }
1160 }
1161 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1162 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1163 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1164
1165 napi_enable(&block->napi);
1166 if (gve_is_gqi(priv)) {
1167 iowrite32be(0, gve_irq_doorbell(priv, block));
1168 } else {
1169 gve_set_itr_coalesce_usecs_dqo(priv, block,
1170 priv->rx_coalesce_usecs);
1171 }
1172 }
1173
1174 gve_set_napi_enabled(priv);
1175 }
1176
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1177 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1178 {
1179 struct gve_notify_block *block;
1180 struct gve_tx_ring *tx = NULL;
1181 struct gve_priv *priv;
1182 u32 last_nic_done;
1183 u32 current_time;
1184 u32 ntfy_idx;
1185
1186 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1187 priv = netdev_priv(dev);
1188 if (txqueue > priv->tx_cfg.num_queues)
1189 goto reset;
1190
1191 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1192 if (ntfy_idx >= priv->num_ntfy_blks)
1193 goto reset;
1194
1195 block = &priv->ntfy_blocks[ntfy_idx];
1196 tx = block->tx;
1197
1198 current_time = jiffies_to_msecs(jiffies);
1199 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1200 goto reset;
1201
1202 /* Check to see if there are missed completions, which will allow us to
1203 * kick the queue.
1204 */
1205 last_nic_done = gve_tx_load_event_counter(priv, tx);
1206 if (last_nic_done - tx->done) {
1207 netdev_info(dev, "Kicking queue %d", txqueue);
1208 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1209 napi_schedule(&block->napi);
1210 tx->last_kick_msec = current_time;
1211 goto out;
1212 } // Else reset.
1213
1214 reset:
1215 gve_schedule_reset(priv);
1216
1217 out:
1218 if (tx)
1219 tx->queue_timeout++;
1220 priv->tx_timeo_cnt++;
1221 }
1222
gve_set_features(struct net_device * netdev,netdev_features_t features)1223 static int gve_set_features(struct net_device *netdev,
1224 netdev_features_t features)
1225 {
1226 const netdev_features_t orig_features = netdev->features;
1227 struct gve_priv *priv = netdev_priv(netdev);
1228 int err;
1229
1230 if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1231 netdev->features ^= NETIF_F_LRO;
1232 if (netif_carrier_ok(netdev)) {
1233 /* To make this process as simple as possible we
1234 * teardown the device, set the new configuration,
1235 * and then bring the device up again.
1236 */
1237 err = gve_close(netdev);
1238 /* We have already tried to reset in close, just fail
1239 * at this point.
1240 */
1241 if (err)
1242 goto err;
1243
1244 err = gve_open(netdev);
1245 if (err)
1246 goto err;
1247 }
1248 }
1249
1250 return 0;
1251 err:
1252 /* Reverts the change on error. */
1253 netdev->features = orig_features;
1254 netif_err(priv, drv, netdev,
1255 "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1256 return err;
1257 }
1258
1259 static const struct net_device_ops gve_netdev_ops = {
1260 .ndo_start_xmit = gve_start_xmit,
1261 .ndo_open = gve_open,
1262 .ndo_stop = gve_close,
1263 .ndo_get_stats64 = gve_get_stats,
1264 .ndo_tx_timeout = gve_tx_timeout,
1265 .ndo_set_features = gve_set_features,
1266 };
1267
gve_handle_status(struct gve_priv * priv,u32 status)1268 static void gve_handle_status(struct gve_priv *priv, u32 status)
1269 {
1270 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1271 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1272 gve_set_do_reset(priv);
1273 }
1274 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1275 priv->stats_report_trigger_cnt++;
1276 gve_set_do_report_stats(priv);
1277 }
1278 }
1279
gve_handle_reset(struct gve_priv * priv)1280 static void gve_handle_reset(struct gve_priv *priv)
1281 {
1282 /* A service task will be scheduled at the end of probe to catch any
1283 * resets that need to happen, and we don't want to reset until
1284 * probe is done.
1285 */
1286 if (gve_get_probe_in_progress(priv))
1287 return;
1288
1289 if (gve_get_do_reset(priv)) {
1290 rtnl_lock();
1291 gve_reset(priv, false);
1292 rtnl_unlock();
1293 }
1294 }
1295
gve_handle_report_stats(struct gve_priv * priv)1296 void gve_handle_report_stats(struct gve_priv *priv)
1297 {
1298 struct stats *stats = priv->stats_report->stats;
1299 int idx, stats_idx = 0;
1300 unsigned int start = 0;
1301 u64 tx_bytes;
1302
1303 if (!gve_get_report_stats(priv))
1304 return;
1305
1306 be64_add_cpu(&priv->stats_report->written_count, 1);
1307 /* tx stats */
1308 if (priv->tx) {
1309 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1310 u32 last_completion = 0;
1311 u32 tx_frames = 0;
1312
1313 /* DQO doesn't currently support these metrics. */
1314 if (gve_is_gqi(priv)) {
1315 last_completion = priv->tx[idx].done;
1316 tx_frames = priv->tx[idx].req;
1317 }
1318
1319 do {
1320 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1321 tx_bytes = priv->tx[idx].bytes_done;
1322 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1323 stats[stats_idx++] = (struct stats) {
1324 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1325 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1326 .queue_id = cpu_to_be32(idx),
1327 };
1328 stats[stats_idx++] = (struct stats) {
1329 .stat_name = cpu_to_be32(TX_STOP_CNT),
1330 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1331 .queue_id = cpu_to_be32(idx),
1332 };
1333 stats[stats_idx++] = (struct stats) {
1334 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1335 .value = cpu_to_be64(tx_frames),
1336 .queue_id = cpu_to_be32(idx),
1337 };
1338 stats[stats_idx++] = (struct stats) {
1339 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1340 .value = cpu_to_be64(tx_bytes),
1341 .queue_id = cpu_to_be32(idx),
1342 };
1343 stats[stats_idx++] = (struct stats) {
1344 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1345 .value = cpu_to_be64(last_completion),
1346 .queue_id = cpu_to_be32(idx),
1347 };
1348 stats[stats_idx++] = (struct stats) {
1349 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1350 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1351 .queue_id = cpu_to_be32(idx),
1352 };
1353 }
1354 }
1355 /* rx stats */
1356 if (priv->rx) {
1357 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1358 stats[stats_idx++] = (struct stats) {
1359 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1360 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1361 .queue_id = cpu_to_be32(idx),
1362 };
1363 stats[stats_idx++] = (struct stats) {
1364 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1365 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1366 .queue_id = cpu_to_be32(idx),
1367 };
1368 }
1369 }
1370 }
1371
gve_handle_link_status(struct gve_priv * priv,bool link_status)1372 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1373 {
1374 if (!gve_get_napi_enabled(priv))
1375 return;
1376
1377 if (link_status == netif_carrier_ok(priv->dev))
1378 return;
1379
1380 if (link_status) {
1381 netdev_info(priv->dev, "Device link is up.\n");
1382 netif_carrier_on(priv->dev);
1383 } else {
1384 netdev_info(priv->dev, "Device link is down.\n");
1385 netif_carrier_off(priv->dev);
1386 }
1387 }
1388
1389 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1390 static void gve_service_task(struct work_struct *work)
1391 {
1392 struct gve_priv *priv = container_of(work, struct gve_priv,
1393 service_task);
1394 u32 status = ioread32be(&priv->reg_bar0->device_status);
1395
1396 gve_handle_status(priv, status);
1397
1398 gve_handle_reset(priv);
1399 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1400 }
1401
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)1402 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1403 {
1404 int num_ntfy;
1405 int err;
1406
1407 /* Set up the adminq */
1408 err = gve_adminq_alloc(&priv->pdev->dev, priv);
1409 if (err) {
1410 dev_err(&priv->pdev->dev,
1411 "Failed to alloc admin queue: err=%d\n", err);
1412 return err;
1413 }
1414
1415 err = gve_verify_driver_compatibility(priv);
1416 if (err) {
1417 dev_err(&priv->pdev->dev,
1418 "Could not verify driver compatibility: err=%d\n", err);
1419 goto err;
1420 }
1421
1422 if (skip_describe_device)
1423 goto setup_device;
1424
1425 priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
1426 /* Get the initial information we need from the device */
1427 err = gve_adminq_describe_device(priv);
1428 if (err) {
1429 dev_err(&priv->pdev->dev,
1430 "Could not get device information: err=%d\n", err);
1431 goto err;
1432 }
1433 priv->dev->mtu = priv->dev->max_mtu;
1434 num_ntfy = pci_msix_vec_count(priv->pdev);
1435 if (num_ntfy <= 0) {
1436 dev_err(&priv->pdev->dev,
1437 "could not count MSI-x vectors: err=%d\n", num_ntfy);
1438 err = num_ntfy;
1439 goto err;
1440 } else if (num_ntfy < GVE_MIN_MSIX) {
1441 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1442 GVE_MIN_MSIX, num_ntfy);
1443 err = -EINVAL;
1444 goto err;
1445 }
1446
1447 priv->num_registered_pages = 0;
1448 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1449 /* gvnic has one Notification Block per MSI-x vector, except for the
1450 * management vector
1451 */
1452 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1453 priv->mgmt_msix_idx = priv->num_ntfy_blks;
1454
1455 priv->tx_cfg.max_queues =
1456 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1457 priv->rx_cfg.max_queues =
1458 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1459
1460 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1461 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1462 if (priv->default_num_queues > 0) {
1463 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1464 priv->tx_cfg.num_queues);
1465 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1466 priv->rx_cfg.num_queues);
1467 }
1468
1469 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1470 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1471 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1472 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1473
1474 if (!gve_is_gqi(priv)) {
1475 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
1476 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
1477 }
1478
1479 setup_device:
1480 err = gve_setup_device_resources(priv);
1481 if (!err)
1482 return 0;
1483 err:
1484 gve_adminq_free(&priv->pdev->dev, priv);
1485 return err;
1486 }
1487
gve_teardown_priv_resources(struct gve_priv * priv)1488 static void gve_teardown_priv_resources(struct gve_priv *priv)
1489 {
1490 gve_teardown_device_resources(priv);
1491 gve_adminq_free(&priv->pdev->dev, priv);
1492 }
1493
gve_trigger_reset(struct gve_priv * priv)1494 static void gve_trigger_reset(struct gve_priv *priv)
1495 {
1496 /* Reset the device by releasing the AQ */
1497 gve_adminq_release(priv);
1498 }
1499
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)1500 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1501 {
1502 gve_trigger_reset(priv);
1503 /* With the reset having already happened, close cannot fail */
1504 if (was_up)
1505 gve_close(priv->dev);
1506 gve_teardown_priv_resources(priv);
1507 }
1508
gve_reset_recovery(struct gve_priv * priv,bool was_up)1509 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1510 {
1511 int err;
1512
1513 err = gve_init_priv(priv, true);
1514 if (err)
1515 goto err;
1516 if (was_up) {
1517 err = gve_open(priv->dev);
1518 if (err)
1519 goto err;
1520 }
1521 return 0;
1522 err:
1523 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1524 gve_turndown(priv);
1525 return err;
1526 }
1527
gve_reset(struct gve_priv * priv,bool attempt_teardown)1528 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1529 {
1530 bool was_up = netif_carrier_ok(priv->dev);
1531 int err;
1532
1533 dev_info(&priv->pdev->dev, "Performing reset\n");
1534 gve_clear_do_reset(priv);
1535 gve_set_reset_in_progress(priv);
1536 /* If we aren't attempting to teardown normally, just go turndown and
1537 * reset right away.
1538 */
1539 if (!attempt_teardown) {
1540 gve_turndown(priv);
1541 gve_reset_and_teardown(priv, was_up);
1542 } else {
1543 /* Otherwise attempt to close normally */
1544 if (was_up) {
1545 err = gve_close(priv->dev);
1546 /* If that fails reset as we did above */
1547 if (err)
1548 gve_reset_and_teardown(priv, was_up);
1549 }
1550 /* Clean up any remaining resources */
1551 gve_teardown_priv_resources(priv);
1552 }
1553
1554 /* Set it all back up */
1555 err = gve_reset_recovery(priv, was_up);
1556 gve_clear_reset_in_progress(priv);
1557 priv->reset_cnt++;
1558 priv->interface_up_cnt = 0;
1559 priv->interface_down_cnt = 0;
1560 priv->stats_report_trigger_cnt = 0;
1561 return err;
1562 }
1563
gve_write_version(u8 __iomem * driver_version_register)1564 static void gve_write_version(u8 __iomem *driver_version_register)
1565 {
1566 const char *c = gve_version_prefix;
1567
1568 while (*c) {
1569 writeb(*c, driver_version_register);
1570 c++;
1571 }
1572
1573 c = gve_version_str;
1574 while (*c) {
1575 writeb(*c, driver_version_register);
1576 c++;
1577 }
1578 writeb('\n', driver_version_register);
1579 }
1580
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1581 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1582 {
1583 int max_tx_queues, max_rx_queues;
1584 struct net_device *dev;
1585 __be32 __iomem *db_bar;
1586 struct gve_registers __iomem *reg_bar;
1587 struct gve_priv *priv;
1588 int err;
1589
1590 err = pci_enable_device(pdev);
1591 if (err)
1592 return err;
1593
1594 err = pci_request_regions(pdev, "gvnic-cfg");
1595 if (err)
1596 goto abort_with_enabled;
1597
1598 pci_set_master(pdev);
1599
1600 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1601 if (err) {
1602 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1603 goto abort_with_pci_region;
1604 }
1605
1606 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1607 if (!reg_bar) {
1608 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1609 err = -ENOMEM;
1610 goto abort_with_pci_region;
1611 }
1612
1613 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1614 if (!db_bar) {
1615 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1616 err = -ENOMEM;
1617 goto abort_with_reg_bar;
1618 }
1619
1620 gve_write_version(®_bar->driver_version);
1621 /* Get max queues to alloc etherdev */
1622 max_tx_queues = ioread32be(®_bar->max_tx_queues);
1623 max_rx_queues = ioread32be(®_bar->max_rx_queues);
1624 /* Alloc and setup the netdev and priv */
1625 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1626 if (!dev) {
1627 dev_err(&pdev->dev, "could not allocate netdev\n");
1628 err = -ENOMEM;
1629 goto abort_with_db_bar;
1630 }
1631 SET_NETDEV_DEV(dev, &pdev->dev);
1632 pci_set_drvdata(pdev, dev);
1633 dev->ethtool_ops = &gve_ethtool_ops;
1634 dev->netdev_ops = &gve_netdev_ops;
1635
1636 /* Set default and supported features.
1637 *
1638 * Features might be set in other locations as well (such as
1639 * `gve_adminq_describe_device`).
1640 */
1641 dev->hw_features = NETIF_F_HIGHDMA;
1642 dev->hw_features |= NETIF_F_SG;
1643 dev->hw_features |= NETIF_F_HW_CSUM;
1644 dev->hw_features |= NETIF_F_TSO;
1645 dev->hw_features |= NETIF_F_TSO6;
1646 dev->hw_features |= NETIF_F_TSO_ECN;
1647 dev->hw_features |= NETIF_F_RXCSUM;
1648 dev->hw_features |= NETIF_F_RXHASH;
1649 dev->features = dev->hw_features;
1650 dev->watchdog_timeo = 5 * HZ;
1651 dev->min_mtu = ETH_MIN_MTU;
1652 netif_carrier_off(dev);
1653
1654 priv = netdev_priv(dev);
1655 priv->dev = dev;
1656 priv->pdev = pdev;
1657 priv->msg_enable = DEFAULT_MSG_LEVEL;
1658 priv->reg_bar0 = reg_bar;
1659 priv->db_bar2 = db_bar;
1660 priv->service_task_flags = 0x0;
1661 priv->state_flags = 0x0;
1662 priv->ethtool_flags = 0x0;
1663
1664 gve_set_probe_in_progress(priv);
1665 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1666 if (!priv->gve_wq) {
1667 dev_err(&pdev->dev, "Could not allocate workqueue");
1668 err = -ENOMEM;
1669 goto abort_with_netdev;
1670 }
1671 INIT_WORK(&priv->service_task, gve_service_task);
1672 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1673 priv->tx_cfg.max_queues = max_tx_queues;
1674 priv->rx_cfg.max_queues = max_rx_queues;
1675
1676 err = gve_init_priv(priv, false);
1677 if (err)
1678 goto abort_with_wq;
1679
1680 err = register_netdev(dev);
1681 if (err)
1682 goto abort_with_gve_init;
1683
1684 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1685 dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
1686 gve_clear_probe_in_progress(priv);
1687 queue_work(priv->gve_wq, &priv->service_task);
1688 return 0;
1689
1690 abort_with_gve_init:
1691 gve_teardown_priv_resources(priv);
1692
1693 abort_with_wq:
1694 destroy_workqueue(priv->gve_wq);
1695
1696 abort_with_netdev:
1697 free_netdev(dev);
1698
1699 abort_with_db_bar:
1700 pci_iounmap(pdev, db_bar);
1701
1702 abort_with_reg_bar:
1703 pci_iounmap(pdev, reg_bar);
1704
1705 abort_with_pci_region:
1706 pci_release_regions(pdev);
1707
1708 abort_with_enabled:
1709 pci_disable_device(pdev);
1710 return err;
1711 }
1712
gve_remove(struct pci_dev * pdev)1713 static void gve_remove(struct pci_dev *pdev)
1714 {
1715 struct net_device *netdev = pci_get_drvdata(pdev);
1716 struct gve_priv *priv = netdev_priv(netdev);
1717 __be32 __iomem *db_bar = priv->db_bar2;
1718 void __iomem *reg_bar = priv->reg_bar0;
1719
1720 unregister_netdev(netdev);
1721 gve_teardown_priv_resources(priv);
1722 destroy_workqueue(priv->gve_wq);
1723 free_netdev(netdev);
1724 pci_iounmap(pdev, db_bar);
1725 pci_iounmap(pdev, reg_bar);
1726 pci_release_regions(pdev);
1727 pci_disable_device(pdev);
1728 }
1729
gve_shutdown(struct pci_dev * pdev)1730 static void gve_shutdown(struct pci_dev *pdev)
1731 {
1732 struct net_device *netdev = pci_get_drvdata(pdev);
1733 struct gve_priv *priv = netdev_priv(netdev);
1734 bool was_up = netif_carrier_ok(priv->dev);
1735
1736 rtnl_lock();
1737 if (was_up && gve_close(priv->dev)) {
1738 /* If the dev was up, attempt to close, if close fails, reset */
1739 gve_reset_and_teardown(priv, was_up);
1740 } else {
1741 /* If the dev wasn't up or close worked, finish tearing down */
1742 gve_teardown_priv_resources(priv);
1743 }
1744 rtnl_unlock();
1745 }
1746
1747 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)1748 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
1749 {
1750 struct net_device *netdev = pci_get_drvdata(pdev);
1751 struct gve_priv *priv = netdev_priv(netdev);
1752 bool was_up = netif_carrier_ok(priv->dev);
1753
1754 priv->suspend_cnt++;
1755 rtnl_lock();
1756 if (was_up && gve_close(priv->dev)) {
1757 /* If the dev was up, attempt to close, if close fails, reset */
1758 gve_reset_and_teardown(priv, was_up);
1759 } else {
1760 /* If the dev wasn't up or close worked, finish tearing down */
1761 gve_teardown_priv_resources(priv);
1762 }
1763 priv->up_before_suspend = was_up;
1764 rtnl_unlock();
1765 return 0;
1766 }
1767
gve_resume(struct pci_dev * pdev)1768 static int gve_resume(struct pci_dev *pdev)
1769 {
1770 struct net_device *netdev = pci_get_drvdata(pdev);
1771 struct gve_priv *priv = netdev_priv(netdev);
1772 int err;
1773
1774 priv->resume_cnt++;
1775 rtnl_lock();
1776 err = gve_reset_recovery(priv, priv->up_before_suspend);
1777 rtnl_unlock();
1778 return err;
1779 }
1780 #endif /* CONFIG_PM */
1781
1782 static const struct pci_device_id gve_id_table[] = {
1783 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1784 { }
1785 };
1786
1787 static struct pci_driver gvnic_driver = {
1788 .name = "gvnic",
1789 .id_table = gve_id_table,
1790 .probe = gve_probe,
1791 .remove = gve_remove,
1792 .shutdown = gve_shutdown,
1793 #ifdef CONFIG_PM
1794 .suspend = gve_suspend,
1795 .resume = gve_resume,
1796 #endif
1797 };
1798
1799 module_pci_driver(gvnic_driver);
1800
1801 MODULE_DEVICE_TABLE(pci, gve_id_table);
1802 MODULE_AUTHOR("Google, Inc.");
1803 MODULE_DESCRIPTION("gVNIC Driver");
1804 MODULE_LICENSE("Dual MIT/GPL");
1805 MODULE_VERSION(GVE_VERSION);
1806