1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/module.h>
5 #include <linux/sizes.h>
6 #include <linux/mutex.h>
7 #include <linux/list.h>
8 #include <linux/pci.h>
9 #include <linux/io.h>
10 #include "cxlmem.h"
11 #include "pci.h"
12 #include "cxl.h"
13
14 /**
15 * DOC: cxl pci
16 *
17 * This implements the PCI exclusive functionality for a CXL device as it is
18 * defined by the Compute Express Link specification. CXL devices may surface
19 * certain functionality even if it isn't CXL enabled. While this driver is
20 * focused around the PCI specific aspects of a CXL device, it binds to the
21 * specific CXL memory device class code, and therefore the implementation of
22 * cxl_pci is focused around CXL memory devices.
23 *
24 * The driver has several responsibilities, mainly:
25 * - Create the memX device and register on the CXL bus.
26 * - Enumerate device's register interface and map them.
27 * - Registers nvdimm bridge device with cxl_core.
28 * - Registers a CXL mailbox with cxl_core.
29 */
30
31 #define cxl_doorbell_busy(cxlm) \
32 (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
33 CXLDEV_MBOX_CTRL_DOORBELL)
34
35 /* CXL 2.0 - 8.2.8.4 */
36 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
37
cxl_pci_mbox_wait_for_doorbell(struct cxl_mem * cxlm)38 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_mem *cxlm)
39 {
40 const unsigned long start = jiffies;
41 unsigned long end = start;
42
43 while (cxl_doorbell_busy(cxlm)) {
44 end = jiffies;
45
46 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
47 /* Check again in case preempted before timeout test */
48 if (!cxl_doorbell_busy(cxlm))
49 break;
50 return -ETIMEDOUT;
51 }
52 cpu_relax();
53 }
54
55 dev_dbg(cxlm->dev, "Doorbell wait took %dms",
56 jiffies_to_msecs(end) - jiffies_to_msecs(start));
57 return 0;
58 }
59
cxl_pci_mbox_timeout(struct cxl_mem * cxlm,struct cxl_mbox_cmd * mbox_cmd)60 static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm,
61 struct cxl_mbox_cmd *mbox_cmd)
62 {
63 struct device *dev = cxlm->dev;
64
65 dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
66 mbox_cmd->opcode, mbox_cmd->size_in);
67 }
68
69 /**
70 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
71 * @cxlm: The CXL memory device to communicate with.
72 * @mbox_cmd: Command to send to the memory device.
73 *
74 * Context: Any context. Expects mbox_mutex to be held.
75 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
76 * Caller should check the return code in @mbox_cmd to make sure it
77 * succeeded.
78 *
79 * This is a generic form of the CXL mailbox send command thus only using the
80 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
81 * devices, and perhaps other types of CXL devices may have further information
82 * available upon error conditions. Driver facilities wishing to send mailbox
83 * commands should use the wrapper command.
84 *
85 * The CXL spec allows for up to two mailboxes. The intention is for the primary
86 * mailbox to be OS controlled and the secondary mailbox to be used by system
87 * firmware. This allows the OS and firmware to communicate with the device and
88 * not need to coordinate with each other. The driver only uses the primary
89 * mailbox.
90 */
__cxl_pci_mbox_send_cmd(struct cxl_mem * cxlm,struct cxl_mbox_cmd * mbox_cmd)91 static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
92 struct cxl_mbox_cmd *mbox_cmd)
93 {
94 void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
95 struct device *dev = cxlm->dev;
96 u64 cmd_reg, status_reg;
97 size_t out_len;
98 int rc;
99
100 lockdep_assert_held(&cxlm->mbox_mutex);
101
102 /*
103 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
104 * 1. Caller reads MB Control Register to verify doorbell is clear
105 * 2. Caller writes Command Register
106 * 3. Caller writes Command Payload Registers if input payload is non-empty
107 * 4. Caller writes MB Control Register to set doorbell
108 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured
109 * 6. Caller reads MB Status Register to fetch Return code
110 * 7. If command successful, Caller reads Command Register to get Payload Length
111 * 8. If output payload is non-empty, host reads Command Payload Registers
112 *
113 * Hardware is free to do whatever it wants before the doorbell is rung,
114 * and isn't allowed to change anything after it clears the doorbell. As
115 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
116 * also happen in any order (though some orders might not make sense).
117 */
118
119 /* #1 */
120 if (cxl_doorbell_busy(cxlm)) {
121 dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
122 return -EBUSY;
123 }
124
125 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
126 mbox_cmd->opcode);
127 if (mbox_cmd->size_in) {
128 if (WARN_ON(!mbox_cmd->payload_in))
129 return -EINVAL;
130
131 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
132 mbox_cmd->size_in);
133 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
134 }
135
136 /* #2, #3 */
137 writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
138
139 /* #4 */
140 dev_dbg(dev, "Sending command\n");
141 writel(CXLDEV_MBOX_CTRL_DOORBELL,
142 cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
143
144 /* #5 */
145 rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
146 if (rc == -ETIMEDOUT) {
147 cxl_pci_mbox_timeout(cxlm, mbox_cmd);
148 return rc;
149 }
150
151 /* #6 */
152 status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
153 mbox_cmd->return_code =
154 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
155
156 if (mbox_cmd->return_code != 0) {
157 dev_dbg(dev, "Mailbox operation had an error\n");
158 return 0;
159 }
160
161 /* #7 */
162 cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
163 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
164
165 /* #8 */
166 if (out_len && mbox_cmd->payload_out) {
167 /*
168 * Sanitize the copy. If hardware misbehaves, out_len per the
169 * spec can actually be greater than the max allowed size (21
170 * bits available but spec defined 1M max). The caller also may
171 * have requested less data than the hardware supplied even
172 * within spec.
173 */
174 size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
175
176 memcpy_fromio(mbox_cmd->payload_out, payload, n);
177 mbox_cmd->size_out = n;
178 } else {
179 mbox_cmd->size_out = 0;
180 }
181
182 return 0;
183 }
184
185 /**
186 * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
187 * @cxlm: The memory device to gain access to.
188 *
189 * Context: Any context. Takes the mbox_mutex.
190 * Return: 0 if exclusive access was acquired.
191 */
cxl_pci_mbox_get(struct cxl_mem * cxlm)192 static int cxl_pci_mbox_get(struct cxl_mem *cxlm)
193 {
194 struct device *dev = cxlm->dev;
195 u64 md_status;
196 int rc;
197
198 mutex_lock_io(&cxlm->mbox_mutex);
199
200 /*
201 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
202 * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
203 * bit is to allow firmware running on the device to notify the driver
204 * that it's ready to receive commands. It is unclear if the bit needs
205 * to be read for each transaction mailbox, ie. the firmware can switch
206 * it on and off as needed. Second, there is no defined timeout for
207 * mailbox ready, like there is for the doorbell interface.
208 *
209 * Assumptions:
210 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
211 * it for every command.
212 *
213 * 2. If the doorbell is clear, the firmware should have first set the
214 * Mailbox Interface Ready bit. Therefore, waiting for the doorbell
215 * to be ready is sufficient.
216 */
217 rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
218 if (rc) {
219 dev_warn(dev, "Mailbox interface not ready\n");
220 goto out;
221 }
222
223 md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
224 if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
225 dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
226 rc = -EBUSY;
227 goto out;
228 }
229
230 /*
231 * Hardware shouldn't allow a ready status but also have failure bits
232 * set. Spit out an error, this should be a bug report
233 */
234 rc = -EFAULT;
235 if (md_status & CXLMDEV_DEV_FATAL) {
236 dev_err(dev, "mbox: reported ready, but fatal\n");
237 goto out;
238 }
239 if (md_status & CXLMDEV_FW_HALT) {
240 dev_err(dev, "mbox: reported ready, but halted\n");
241 goto out;
242 }
243 if (CXLMDEV_RESET_NEEDED(md_status)) {
244 dev_err(dev, "mbox: reported ready, but reset needed\n");
245 goto out;
246 }
247
248 /* with lock held */
249 return 0;
250
251 out:
252 mutex_unlock(&cxlm->mbox_mutex);
253 return rc;
254 }
255
256 /**
257 * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
258 * @cxlm: The CXL memory device to communicate with.
259 *
260 * Context: Any context. Expects mbox_mutex to be held.
261 */
cxl_pci_mbox_put(struct cxl_mem * cxlm)262 static void cxl_pci_mbox_put(struct cxl_mem *cxlm)
263 {
264 mutex_unlock(&cxlm->mbox_mutex);
265 }
266
cxl_pci_mbox_send(struct cxl_mem * cxlm,struct cxl_mbox_cmd * cmd)267 static int cxl_pci_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
268 {
269 int rc;
270
271 rc = cxl_pci_mbox_get(cxlm);
272 if (rc)
273 return rc;
274
275 rc = __cxl_pci_mbox_send_cmd(cxlm, cmd);
276 cxl_pci_mbox_put(cxlm);
277
278 return rc;
279 }
280
cxl_pci_setup_mailbox(struct cxl_mem * cxlm)281 static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm)
282 {
283 const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
284
285 cxlm->mbox_send = cxl_pci_mbox_send;
286 cxlm->payload_size =
287 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
288
289 /*
290 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
291 *
292 * If the size is too small, mandatory commands will not work and so
293 * there's no point in going forward. If the size is too large, there's
294 * no harm is soft limiting it.
295 */
296 cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
297 if (cxlm->payload_size < 256) {
298 dev_err(cxlm->dev, "Mailbox is too small (%zub)",
299 cxlm->payload_size);
300 return -ENXIO;
301 }
302
303 dev_dbg(cxlm->dev, "Mailbox payload sized %zu",
304 cxlm->payload_size);
305
306 return 0;
307 }
308
cxl_map_regblock(struct pci_dev * pdev,struct cxl_register_map * map)309 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
310 {
311 void __iomem *addr;
312 int bar = map->barno;
313 struct device *dev = &pdev->dev;
314 resource_size_t offset = map->block_offset;
315
316 /* Basic sanity check that BAR is big enough */
317 if (pci_resource_len(pdev, bar) < offset) {
318 dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
319 &pdev->resource[bar], &offset);
320 return -ENXIO;
321 }
322
323 addr = pci_iomap(pdev, bar, 0);
324 if (!addr) {
325 dev_err(dev, "failed to map registers\n");
326 return -ENOMEM;
327 }
328
329 dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
330 bar, &offset);
331
332 map->base = addr + map->block_offset;
333 return 0;
334 }
335
cxl_unmap_regblock(struct pci_dev * pdev,struct cxl_register_map * map)336 static void cxl_unmap_regblock(struct pci_dev *pdev,
337 struct cxl_register_map *map)
338 {
339 pci_iounmap(pdev, map->base - map->block_offset);
340 map->base = NULL;
341 }
342
cxl_probe_regs(struct pci_dev * pdev,struct cxl_register_map * map)343 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
344 {
345 struct cxl_component_reg_map *comp_map;
346 struct cxl_device_reg_map *dev_map;
347 struct device *dev = &pdev->dev;
348 void __iomem *base = map->base;
349
350 switch (map->reg_type) {
351 case CXL_REGLOC_RBI_COMPONENT:
352 comp_map = &map->component_map;
353 cxl_probe_component_regs(dev, base, comp_map);
354 if (!comp_map->hdm_decoder.valid) {
355 dev_err(dev, "HDM decoder registers not found\n");
356 return -ENXIO;
357 }
358
359 dev_dbg(dev, "Set up component registers\n");
360 break;
361 case CXL_REGLOC_RBI_MEMDEV:
362 dev_map = &map->device_map;
363 cxl_probe_device_regs(dev, base, dev_map);
364 if (!dev_map->status.valid || !dev_map->mbox.valid ||
365 !dev_map->memdev.valid) {
366 dev_err(dev, "registers not found: %s%s%s\n",
367 !dev_map->status.valid ? "status " : "",
368 !dev_map->mbox.valid ? "mbox " : "",
369 !dev_map->memdev.valid ? "memdev " : "");
370 return -ENXIO;
371 }
372
373 dev_dbg(dev, "Probing device registers...\n");
374 break;
375 default:
376 break;
377 }
378
379 return 0;
380 }
381
cxl_map_regs(struct cxl_mem * cxlm,struct cxl_register_map * map)382 static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
383 {
384 struct device *dev = cxlm->dev;
385 struct pci_dev *pdev = to_pci_dev(dev);
386
387 switch (map->reg_type) {
388 case CXL_REGLOC_RBI_COMPONENT:
389 cxl_map_component_regs(pdev, &cxlm->regs.component, map);
390 dev_dbg(dev, "Mapping component registers...\n");
391 break;
392 case CXL_REGLOC_RBI_MEMDEV:
393 cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
394 dev_dbg(dev, "Probing device registers...\n");
395 break;
396 default:
397 break;
398 }
399
400 return 0;
401 }
402
cxl_decode_regblock(u32 reg_lo,u32 reg_hi,struct cxl_register_map * map)403 static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
404 struct cxl_register_map *map)
405 {
406 map->block_offset =
407 ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
408 map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
409 map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
410 }
411
412 /**
413 * cxl_find_regblock() - Locate register blocks by type
414 * @pdev: The CXL PCI device to enumerate.
415 * @type: Register Block Indicator id
416 * @map: Enumeration output, clobbered on error
417 *
418 * Return: 0 if register block enumerated, negative error code otherwise
419 *
420 * A CXL DVSEC may point to one or more register blocks, search for them
421 * by @type.
422 */
cxl_find_regblock(struct pci_dev * pdev,enum cxl_regloc_type type,struct cxl_register_map * map)423 static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
424 struct cxl_register_map *map)
425 {
426 u32 regloc_size, regblocks;
427 int regloc, i;
428
429 regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
430 PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
431 if (!regloc)
432 return -ENXIO;
433
434 pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size);
435 regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
436
437 regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
438 regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
439
440 for (i = 0; i < regblocks; i++, regloc += 8) {
441 u32 reg_lo, reg_hi;
442
443 pci_read_config_dword(pdev, regloc, ®_lo);
444 pci_read_config_dword(pdev, regloc + 4, ®_hi);
445
446 cxl_decode_regblock(reg_lo, reg_hi, map);
447
448 if (map->reg_type == type)
449 return 0;
450 }
451
452 return -ENODEV;
453 }
454
cxl_setup_regs(struct pci_dev * pdev,enum cxl_regloc_type type,struct cxl_register_map * map)455 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
456 struct cxl_register_map *map)
457 {
458 int rc;
459
460 rc = cxl_find_regblock(pdev, type, map);
461 if (rc)
462 return rc;
463
464 rc = cxl_map_regblock(pdev, map);
465 if (rc)
466 return rc;
467
468 rc = cxl_probe_regs(pdev, map);
469 cxl_unmap_regblock(pdev, map);
470
471 return rc;
472 }
473
cxl_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)474 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
475 {
476 struct cxl_register_map map;
477 struct cxl_memdev *cxlmd;
478 struct cxl_mem *cxlm;
479 int rc;
480
481 /*
482 * Double check the anonymous union trickery in struct cxl_regs
483 * FIXME switch to struct_group()
484 */
485 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
486 offsetof(struct cxl_regs, device_regs.memdev));
487
488 rc = pcim_enable_device(pdev);
489 if (rc)
490 return rc;
491
492 cxlm = cxl_mem_create(&pdev->dev);
493 if (IS_ERR(cxlm))
494 return PTR_ERR(cxlm);
495
496 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
497 if (rc)
498 return rc;
499
500 rc = cxl_map_regs(cxlm, &map);
501 if (rc)
502 return rc;
503
504 rc = cxl_pci_setup_mailbox(cxlm);
505 if (rc)
506 return rc;
507
508 rc = cxl_mem_enumerate_cmds(cxlm);
509 if (rc)
510 return rc;
511
512 rc = cxl_mem_identify(cxlm);
513 if (rc)
514 return rc;
515
516 rc = cxl_mem_create_range_info(cxlm);
517 if (rc)
518 return rc;
519
520 cxlmd = devm_cxl_add_memdev(cxlm);
521 if (IS_ERR(cxlmd))
522 return PTR_ERR(cxlmd);
523
524 if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
525 rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
526
527 return rc;
528 }
529
530 static const struct pci_device_id cxl_mem_pci_tbl[] = {
531 /* PCI class code for CXL.mem Type-3 Devices */
532 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
533 { /* terminate list */ },
534 };
535 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
536
537 static struct pci_driver cxl_pci_driver = {
538 .name = KBUILD_MODNAME,
539 .id_table = cxl_mem_pci_tbl,
540 .probe = cxl_pci_probe,
541 .driver = {
542 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
543 },
544 };
545
546 MODULE_LICENSE("GPL v2");
547 module_pci_driver(cxl_pci_driver);
548 MODULE_IMPORT_NS(CXL);
549