1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6
7 #include "erdma.h"
8
arm_cmdq_cq(struct erdma_cmdq * cmdq)9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
10 {
11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
16
17 *cmdq->cq.db_record = db_data;
18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
19
20 atomic64_inc(&cmdq->cq.armed_num);
21 }
22
kick_cmdq_db(struct erdma_cmdq * cmdq)23 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
24 {
25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
27
28 *cmdq->sq.db_record = db_data;
29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
30 }
31
get_comp_wait(struct erdma_cmdq * cmdq)32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
33 {
34 int comp_idx;
35
36 spin_lock(&cmdq->lock);
37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
38 cmdq->max_outstandings);
39 if (comp_idx == cmdq->max_outstandings) {
40 spin_unlock(&cmdq->lock);
41 return ERR_PTR(-ENOMEM);
42 }
43
44 __set_bit(comp_idx, cmdq->comp_wait_bitmap);
45 spin_unlock(&cmdq->lock);
46
47 return &cmdq->wait_pool[comp_idx];
48 }
49
put_comp_wait(struct erdma_cmdq * cmdq,struct erdma_comp_wait * comp_wait)50 static void put_comp_wait(struct erdma_cmdq *cmdq,
51 struct erdma_comp_wait *comp_wait)
52 {
53 int used;
54
55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
56 spin_lock(&cmdq->lock);
57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
58 spin_unlock(&cmdq->lock);
59
60 WARN_ON(!used);
61 }
62
erdma_cmdq_wait_res_init(struct erdma_dev * dev,struct erdma_cmdq * cmdq)63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
64 struct erdma_cmdq *cmdq)
65 {
66 int i;
67
68 cmdq->wait_pool =
69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
70 sizeof(struct erdma_comp_wait), GFP_KERNEL);
71 if (!cmdq->wait_pool)
72 return -ENOMEM;
73
74 spin_lock_init(&cmdq->lock);
75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
77 if (!cmdq->comp_wait_bitmap)
78 return -ENOMEM;
79
80 for (i = 0; i < cmdq->max_outstandings; i++) {
81 init_completion(&cmdq->wait_pool[i].wait_event);
82 cmdq->wait_pool[i].ctx_id = i;
83 }
84
85 return 0;
86 }
87
erdma_cmdq_sq_init(struct erdma_dev * dev)88 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
89 {
90 struct erdma_cmdq *cmdq = &dev->cmdq;
91 struct erdma_cmdq_sq *sq = &cmdq->sq;
92 u32 buf_size;
93
94 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
95 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
96
97 buf_size = sq->depth << SQEBB_SHIFT;
98
99 sq->qbuf =
100 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
101 &sq->qbuf_dma_addr, GFP_KERNEL);
102 if (!sq->qbuf)
103 return -ENOMEM;
104
105 sq->db_record = (u64 *)(sq->qbuf + buf_size);
106
107 spin_lock_init(&sq->lock);
108
109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
110 upper_32_bits(sq->qbuf_dma_addr));
111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
112 lower_32_bits(sq->qbuf_dma_addr));
113 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
114 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
115 sq->qbuf_dma_addr + buf_size);
116
117 return 0;
118 }
119
erdma_cmdq_cq_init(struct erdma_dev * dev)120 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
121 {
122 struct erdma_cmdq *cmdq = &dev->cmdq;
123 struct erdma_cmdq_cq *cq = &cmdq->cq;
124 u32 buf_size;
125
126 cq->depth = cmdq->sq.depth;
127 buf_size = cq->depth << CQE_SHIFT;
128
129 cq->qbuf =
130 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
131 &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
132 if (!cq->qbuf)
133 return -ENOMEM;
134
135 spin_lock_init(&cq->lock);
136
137 cq->db_record = (u64 *)(cq->qbuf + buf_size);
138
139 atomic64_set(&cq->armed_num, 0);
140
141 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
142 upper_32_bits(cq->qbuf_dma_addr));
143 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
144 lower_32_bits(cq->qbuf_dma_addr));
145 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
146 cq->qbuf_dma_addr + buf_size);
147
148 return 0;
149 }
150
erdma_cmdq_eq_init(struct erdma_dev * dev)151 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
152 {
153 struct erdma_cmdq *cmdq = &dev->cmdq;
154 struct erdma_eq *eq = &cmdq->eq;
155 u32 buf_size;
156
157 eq->depth = cmdq->max_outstandings;
158 buf_size = eq->depth << EQE_SHIFT;
159
160 eq->qbuf =
161 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
162 &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
163 if (!eq->qbuf)
164 return -ENOMEM;
165
166 spin_lock_init(&eq->lock);
167 atomic64_set(&eq->event_num, 0);
168
169 eq->db_addr =
170 (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
171 eq->db_record = (u64 *)(eq->qbuf + buf_size);
172
173 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
174 upper_32_bits(eq->qbuf_dma_addr));
175 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
176 lower_32_bits(eq->qbuf_dma_addr));
177 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
178 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
179 eq->qbuf_dma_addr + buf_size);
180
181 return 0;
182 }
183
erdma_cmdq_init(struct erdma_dev * dev)184 int erdma_cmdq_init(struct erdma_dev *dev)
185 {
186 int err, i;
187 struct erdma_cmdq *cmdq = &dev->cmdq;
188 u32 sts, ctrl;
189
190 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
191 cmdq->use_event = false;
192
193 sema_init(&cmdq->credits, cmdq->max_outstandings);
194
195 err = erdma_cmdq_wait_res_init(dev, cmdq);
196 if (err)
197 return err;
198
199 err = erdma_cmdq_sq_init(dev);
200 if (err)
201 return err;
202
203 err = erdma_cmdq_cq_init(dev);
204 if (err)
205 goto err_destroy_sq;
206
207 err = erdma_cmdq_eq_init(dev);
208 if (err)
209 goto err_destroy_cq;
210
211 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
212 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
213
214 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
215 sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
216 ERDMA_REG_DEV_ST_INIT_DONE_MASK);
217 if (sts)
218 break;
219
220 msleep(ERDMA_REG_ACCESS_WAIT_MS);
221 }
222
223 if (i == ERDMA_WAIT_DEV_DONE_CNT) {
224 dev_err(&dev->pdev->dev, "wait init done failed.\n");
225 err = -ETIMEDOUT;
226 goto err_destroy_eq;
227 }
228
229 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
230
231 return 0;
232
233 err_destroy_eq:
234 dma_free_coherent(&dev->pdev->dev,
235 (cmdq->eq.depth << EQE_SHIFT) +
236 ERDMA_EXTRA_BUFFER_SIZE,
237 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
238
239 err_destroy_cq:
240 dma_free_coherent(&dev->pdev->dev,
241 (cmdq->cq.depth << CQE_SHIFT) +
242 ERDMA_EXTRA_BUFFER_SIZE,
243 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
244
245 err_destroy_sq:
246 dma_free_coherent(&dev->pdev->dev,
247 (cmdq->sq.depth << SQEBB_SHIFT) +
248 ERDMA_EXTRA_BUFFER_SIZE,
249 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
250
251 return err;
252 }
253
erdma_finish_cmdq_init(struct erdma_dev * dev)254 void erdma_finish_cmdq_init(struct erdma_dev *dev)
255 {
256 /* after device init successfully, change cmdq to event mode. */
257 dev->cmdq.use_event = true;
258 arm_cmdq_cq(&dev->cmdq);
259 }
260
erdma_cmdq_destroy(struct erdma_dev * dev)261 void erdma_cmdq_destroy(struct erdma_dev *dev)
262 {
263 struct erdma_cmdq *cmdq = &dev->cmdq;
264
265 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
266
267 dma_free_coherent(&dev->pdev->dev,
268 (cmdq->eq.depth << EQE_SHIFT) +
269 ERDMA_EXTRA_BUFFER_SIZE,
270 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
271 dma_free_coherent(&dev->pdev->dev,
272 (cmdq->sq.depth << SQEBB_SHIFT) +
273 ERDMA_EXTRA_BUFFER_SIZE,
274 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
275 dma_free_coherent(&dev->pdev->dev,
276 (cmdq->cq.depth << CQE_SHIFT) +
277 ERDMA_EXTRA_BUFFER_SIZE,
278 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
279 }
280
get_next_valid_cmdq_cqe(struct erdma_cmdq * cmdq)281 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
282 {
283 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
284 cmdq->cq.depth, CQE_SHIFT);
285 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
286 __be32_to_cpu(READ_ONCE(*cqe)));
287
288 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
289 }
290
push_cmdq_sqe(struct erdma_cmdq * cmdq,u64 * req,size_t req_len,struct erdma_comp_wait * comp_wait)291 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
292 struct erdma_comp_wait *comp_wait)
293 {
294 __le64 *wqe;
295 u64 hdr = *req;
296
297 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
298 reinit_completion(&comp_wait->wait_event);
299 comp_wait->sq_pi = cmdq->sq.pi;
300
301 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
302 SQEBB_SHIFT);
303 memcpy(wqe, req, req_len);
304
305 cmdq->sq.pi += cmdq->sq.wqebb_cnt;
306 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
307 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
308 comp_wait->ctx_id) |
309 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
310 *wqe = cpu_to_le64(hdr);
311
312 kick_cmdq_db(cmdq);
313 }
314
erdma_poll_single_cmd_completion(struct erdma_cmdq * cmdq)315 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
316 {
317 struct erdma_comp_wait *comp_wait;
318 u32 hdr0, sqe_idx;
319 __be32 *cqe;
320 u16 ctx_id;
321 u64 *sqe;
322 int i;
323
324 cqe = get_next_valid_cmdq_cqe(cmdq);
325 if (!cqe)
326 return -EAGAIN;
327
328 cmdq->cq.ci++;
329
330 dma_rmb();
331 hdr0 = __be32_to_cpu(*cqe);
332 sqe_idx = __be32_to_cpu(*(cqe + 1));
333
334 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
335 SQEBB_SHIFT);
336 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
337 comp_wait = &cmdq->wait_pool[ctx_id];
338 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
339 return -EIO;
340
341 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
342 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
343 cmdq->sq.ci += cmdq->sq.wqebb_cnt;
344
345 for (i = 0; i < 4; i++)
346 comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
347
348 if (cmdq->use_event)
349 complete(&comp_wait->wait_event);
350
351 return 0;
352 }
353
erdma_polling_cmd_completions(struct erdma_cmdq * cmdq)354 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
355 {
356 unsigned long flags;
357 u16 comp_num;
358
359 spin_lock_irqsave(&cmdq->cq.lock, flags);
360
361 /* We must have less than # of max_outstandings
362 * completions at one time.
363 */
364 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
365 if (erdma_poll_single_cmd_completion(cmdq))
366 break;
367
368 if (comp_num && cmdq->use_event)
369 arm_cmdq_cq(cmdq);
370
371 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
372 }
373
erdma_cmdq_completion_handler(struct erdma_cmdq * cmdq)374 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
375 {
376 int got_event = 0;
377
378 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
379 !cmdq->use_event)
380 return;
381
382 while (get_next_valid_eqe(&cmdq->eq)) {
383 cmdq->eq.ci++;
384 got_event++;
385 }
386
387 if (got_event) {
388 cmdq->cq.cmdsn++;
389 erdma_polling_cmd_completions(cmdq);
390 }
391
392 notify_eq(&cmdq->eq);
393 }
394
erdma_poll_cmd_completion(struct erdma_comp_wait * comp_ctx,struct erdma_cmdq * cmdq,u32 timeout)395 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
396 struct erdma_cmdq *cmdq, u32 timeout)
397 {
398 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
399
400 while (1) {
401 erdma_polling_cmd_completions(cmdq);
402 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
403 break;
404
405 if (time_is_before_jiffies(comp_timeout))
406 return -ETIME;
407
408 msleep(20);
409 }
410
411 return 0;
412 }
413
erdma_wait_cmd_completion(struct erdma_comp_wait * comp_ctx,struct erdma_cmdq * cmdq,u32 timeout)414 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
415 struct erdma_cmdq *cmdq, u32 timeout)
416 {
417 unsigned long flags = 0;
418
419 wait_for_completion_timeout(&comp_ctx->wait_event,
420 msecs_to_jiffies(timeout));
421
422 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
423 spin_lock_irqsave(&cmdq->cq.lock, flags);
424 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
425 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
426 return -ETIME;
427 }
428
429 return 0;
430 }
431
erdma_cmdq_build_reqhdr(u64 * hdr,u32 mod,u32 op)432 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
433 {
434 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
435 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
436 }
437
erdma_post_cmd_wait(struct erdma_cmdq * cmdq,void * req,u32 req_size,u64 * resp0,u64 * resp1)438 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
439 u64 *resp0, u64 *resp1)
440 {
441 struct erdma_comp_wait *comp_wait;
442 int ret;
443
444 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
445 return -ENODEV;
446
447 down(&cmdq->credits);
448
449 comp_wait = get_comp_wait(cmdq);
450 if (IS_ERR(comp_wait)) {
451 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
452 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
453 up(&cmdq->credits);
454 return PTR_ERR(comp_wait);
455 }
456
457 spin_lock(&cmdq->sq.lock);
458 push_cmdq_sqe(cmdq, req, req_size, comp_wait);
459 spin_unlock(&cmdq->sq.lock);
460
461 if (cmdq->use_event)
462 ret = erdma_wait_cmd_completion(comp_wait, cmdq,
463 ERDMA_CMDQ_TIMEOUT_MS);
464 else
465 ret = erdma_poll_cmd_completion(comp_wait, cmdq,
466 ERDMA_CMDQ_TIMEOUT_MS);
467
468 if (ret) {
469 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
470 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
471 goto out;
472 }
473
474 if (comp_wait->comp_status)
475 ret = -EIO;
476
477 if (resp0 && resp1) {
478 *resp0 = *((u64 *)&comp_wait->comp_data[0]);
479 *resp1 = *((u64 *)&comp_wait->comp_data[2]);
480 }
481 put_comp_wait(cmdq, comp_wait);
482
483 out:
484 up(&cmdq->credits);
485
486 return ret;
487 }
488