1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2022 Qualcomm Innovation Center. All rights reserved.
4 *
5 * Authors:
6 * Asutosh Das <quic_asutoshd@quicinc.com>
7 * Can Guo <quic_cang@quicinc.com>
8 */
9
10 #include <asm/unaligned.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/module.h>
13 #include <linux/platform_device.h>
14 #include "ufshcd-priv.h"
15
16 #define MAX_QUEUE_SUP GENMASK(7, 0)
17 #define UFS_MCQ_MIN_RW_QUEUES 2
18 #define UFS_MCQ_MIN_READ_QUEUES 0
19 #define UFS_MCQ_NUM_DEV_CMD_QUEUES 1
20 #define UFS_MCQ_MIN_POLL_QUEUES 0
21 #define QUEUE_EN_OFFSET 31
22 #define QUEUE_ID_OFFSET 16
23
24 #define MAX_DEV_CMD_ENTRIES 2
25 #define MCQ_CFG_MAC_MASK GENMASK(16, 8)
26 #define MCQ_QCFG_SIZE 0x40
27 #define MCQ_ENTRY_SIZE_IN_DWORD 8
28 #define CQE_UCD_BA GENMASK_ULL(63, 7)
29
rw_queue_count_set(const char * val,const struct kernel_param * kp)30 static int rw_queue_count_set(const char *val, const struct kernel_param *kp)
31 {
32 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_RW_QUEUES,
33 num_possible_cpus());
34 }
35
36 static const struct kernel_param_ops rw_queue_count_ops = {
37 .set = rw_queue_count_set,
38 .get = param_get_uint,
39 };
40
41 static unsigned int rw_queues;
42 module_param_cb(rw_queues, &rw_queue_count_ops, &rw_queues, 0644);
43 MODULE_PARM_DESC(rw_queues,
44 "Number of interrupt driven I/O queues used for rw. Default value is nr_cpus");
45
read_queue_count_set(const char * val,const struct kernel_param * kp)46 static int read_queue_count_set(const char *val, const struct kernel_param *kp)
47 {
48 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_READ_QUEUES,
49 num_possible_cpus());
50 }
51
52 static const struct kernel_param_ops read_queue_count_ops = {
53 .set = read_queue_count_set,
54 .get = param_get_uint,
55 };
56
57 static unsigned int read_queues;
58 module_param_cb(read_queues, &read_queue_count_ops, &read_queues, 0644);
59 MODULE_PARM_DESC(read_queues,
60 "Number of interrupt driven read queues used for read. Default value is 0");
61
poll_queue_count_set(const char * val,const struct kernel_param * kp)62 static int poll_queue_count_set(const char *val, const struct kernel_param *kp)
63 {
64 return param_set_uint_minmax(val, kp, UFS_MCQ_MIN_POLL_QUEUES,
65 num_possible_cpus());
66 }
67
68 static const struct kernel_param_ops poll_queue_count_ops = {
69 .set = poll_queue_count_set,
70 .get = param_get_uint,
71 };
72
73 static unsigned int poll_queues = 1;
74 module_param_cb(poll_queues, &poll_queue_count_ops, &poll_queues, 0644);
75 MODULE_PARM_DESC(poll_queues,
76 "Number of poll queues used for r/w. Default value is 1");
77
78 /**
79 * ufshcd_mcq_config_mac - Set the #Max Activ Cmds.
80 * @hba: per adapter instance
81 * @max_active_cmds: maximum # of active commands to the device at any time.
82 *
83 * The controller won't send more than the max_active_cmds to the device at
84 * any time.
85 */
ufshcd_mcq_config_mac(struct ufs_hba * hba,u32 max_active_cmds)86 void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds)
87 {
88 u32 val;
89
90 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
91 val &= ~MCQ_CFG_MAC_MASK;
92 val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds);
93 ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
94 }
95
96 /**
97 * ufshcd_mcq_req_to_hwq - find the hardware queue on which the
98 * request would be issued.
99 * @hba: per adapter instance
100 * @req: pointer to the request to be issued
101 *
102 * Returns the hardware queue instance on which the request would
103 * be queued.
104 */
ufshcd_mcq_req_to_hwq(struct ufs_hba * hba,struct request * req)105 struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba,
106 struct request *req)
107 {
108 u32 utag = blk_mq_unique_tag(req);
109 u32 hwq = blk_mq_unique_tag_to_hwq(utag);
110
111 /* uhq[0] is used to serve device commands */
112 return &hba->uhq[hwq + UFSHCD_MCQ_IO_QUEUE_OFFSET];
113 }
114
115 /**
116 * ufshcd_mcq_decide_queue_depth - decide the queue depth
117 * @hba: per adapter instance
118 *
119 * Returns queue-depth on success, non-zero on error
120 *
121 * MAC - Max. Active Command of the Host Controller (HC)
122 * HC wouldn't send more than this commands to the device.
123 * It is mandatory to implement get_hba_mac() to enable MCQ mode.
124 * Calculates and adjusts the queue depth based on the depth
125 * supported by the HC and ufs device.
126 */
ufshcd_mcq_decide_queue_depth(struct ufs_hba * hba)127 int ufshcd_mcq_decide_queue_depth(struct ufs_hba *hba)
128 {
129 int mac;
130
131 /* Mandatory to implement get_hba_mac() */
132 mac = ufshcd_mcq_vops_get_hba_mac(hba);
133 if (mac < 0) {
134 dev_err(hba->dev, "Failed to get mac, err=%d\n", mac);
135 return mac;
136 }
137
138 WARN_ON_ONCE(!hba->dev_info.bqueuedepth);
139 /*
140 * max. value of bqueuedepth = 256, mac is host dependent.
141 * It is mandatory for UFS device to define bQueueDepth if
142 * shared queuing architecture is enabled.
143 */
144 return min_t(int, mac, hba->dev_info.bqueuedepth);
145 }
146
ufshcd_mcq_config_nr_queues(struct ufs_hba * hba)147 static int ufshcd_mcq_config_nr_queues(struct ufs_hba *hba)
148 {
149 int i;
150 u32 hba_maxq, rem, tot_queues;
151 struct Scsi_Host *host = hba->host;
152
153 hba_maxq = FIELD_GET(MAX_QUEUE_SUP, hba->mcq_capabilities);
154
155 tot_queues = UFS_MCQ_NUM_DEV_CMD_QUEUES + read_queues + poll_queues +
156 rw_queues;
157
158 if (hba_maxq < tot_queues) {
159 dev_err(hba->dev, "Total queues (%d) exceeds HC capacity (%d)\n",
160 tot_queues, hba_maxq);
161 return -EOPNOTSUPP;
162 }
163
164 rem = hba_maxq - UFS_MCQ_NUM_DEV_CMD_QUEUES;
165
166 if (rw_queues) {
167 hba->nr_queues[HCTX_TYPE_DEFAULT] = rw_queues;
168 rem -= hba->nr_queues[HCTX_TYPE_DEFAULT];
169 } else {
170 rw_queues = num_possible_cpus();
171 }
172
173 if (poll_queues) {
174 hba->nr_queues[HCTX_TYPE_POLL] = poll_queues;
175 rem -= hba->nr_queues[HCTX_TYPE_POLL];
176 }
177
178 if (read_queues) {
179 hba->nr_queues[HCTX_TYPE_READ] = read_queues;
180 rem -= hba->nr_queues[HCTX_TYPE_READ];
181 }
182
183 if (!hba->nr_queues[HCTX_TYPE_DEFAULT])
184 hba->nr_queues[HCTX_TYPE_DEFAULT] = min3(rem, rw_queues,
185 num_possible_cpus());
186
187 for (i = 0; i < HCTX_MAX_TYPES; i++)
188 host->nr_hw_queues += hba->nr_queues[i];
189
190 hba->nr_hw_queues = host->nr_hw_queues + UFS_MCQ_NUM_DEV_CMD_QUEUES;
191 return 0;
192 }
193
ufshcd_mcq_memory_alloc(struct ufs_hba * hba)194 int ufshcd_mcq_memory_alloc(struct ufs_hba *hba)
195 {
196 struct ufs_hw_queue *hwq;
197 size_t utrdl_size, cqe_size;
198 int i;
199
200 for (i = 0; i < hba->nr_hw_queues; i++) {
201 hwq = &hba->uhq[i];
202
203 utrdl_size = sizeof(struct utp_transfer_req_desc) *
204 hwq->max_entries;
205 hwq->sqe_base_addr = dmam_alloc_coherent(hba->dev, utrdl_size,
206 &hwq->sqe_dma_addr,
207 GFP_KERNEL);
208 if (!hwq->sqe_dma_addr) {
209 dev_err(hba->dev, "SQE allocation failed\n");
210 return -ENOMEM;
211 }
212
213 cqe_size = sizeof(struct cq_entry) * hwq->max_entries;
214 hwq->cqe_base_addr = dmam_alloc_coherent(hba->dev, cqe_size,
215 &hwq->cqe_dma_addr,
216 GFP_KERNEL);
217 if (!hwq->cqe_dma_addr) {
218 dev_err(hba->dev, "CQE allocation failed\n");
219 return -ENOMEM;
220 }
221 }
222
223 return 0;
224 }
225
226
227 /* Operation and runtime registers configuration */
228 #define MCQ_CFG_n(r, i) ((r) + MCQ_QCFG_SIZE * (i))
229 #define MCQ_OPR_OFFSET_n(p, i) \
230 (hba->mcq_opr[(p)].offset + hba->mcq_opr[(p)].stride * (i))
231
mcq_opr_base(struct ufs_hba * hba,enum ufshcd_mcq_opr n,int i)232 static void __iomem *mcq_opr_base(struct ufs_hba *hba,
233 enum ufshcd_mcq_opr n, int i)
234 {
235 struct ufshcd_mcq_opr_info_t *opr = &hba->mcq_opr[n];
236
237 return opr->base + opr->stride * i;
238 }
239
ufshcd_mcq_read_cqis(struct ufs_hba * hba,int i)240 u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i)
241 {
242 return readl(mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS);
243 }
244
ufshcd_mcq_write_cqis(struct ufs_hba * hba,u32 val,int i)245 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i)
246 {
247 writel(val, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIS);
248 }
249 EXPORT_SYMBOL_GPL(ufshcd_mcq_write_cqis);
250
251 /*
252 * Current MCQ specification doesn't provide a Task Tag or its equivalent in
253 * the Completion Queue Entry. Find the Task Tag using an indirect method.
254 */
ufshcd_mcq_get_tag(struct ufs_hba * hba,struct ufs_hw_queue * hwq,struct cq_entry * cqe)255 static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
256 struct ufs_hw_queue *hwq,
257 struct cq_entry *cqe)
258 {
259 u64 addr;
260
261 /* sizeof(struct utp_transfer_cmd_desc) must be a multiple of 128 */
262 BUILD_BUG_ON(sizeof(struct utp_transfer_cmd_desc) & GENMASK(6, 0));
263
264 /* Bits 63:7 UCD base address, 6:5 are reserved, 4:0 is SQ ID */
265 addr = (le64_to_cpu(cqe->command_desc_base_addr) & CQE_UCD_BA) -
266 hba->ucdl_dma_addr;
267
268 return div_u64(addr, sizeof(struct utp_transfer_cmd_desc));
269 }
270
ufshcd_mcq_process_cqe(struct ufs_hba * hba,struct ufs_hw_queue * hwq)271 static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
272 struct ufs_hw_queue *hwq)
273 {
274 struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
275 int tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
276
277 ufshcd_compl_one_cqe(hba, tag, cqe);
278 }
279
ufshcd_mcq_poll_cqe_nolock(struct ufs_hba * hba,struct ufs_hw_queue * hwq)280 unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
281 struct ufs_hw_queue *hwq)
282 {
283 unsigned long completed_reqs = 0;
284
285 ufshcd_mcq_update_cq_tail_slot(hwq);
286 while (!ufshcd_mcq_is_cq_empty(hwq)) {
287 ufshcd_mcq_process_cqe(hba, hwq);
288 ufshcd_mcq_inc_cq_head_slot(hwq);
289 completed_reqs++;
290 }
291
292 if (completed_reqs)
293 ufshcd_mcq_update_cq_head(hwq);
294
295 return completed_reqs;
296 }
297 EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock);
298
ufshcd_mcq_poll_cqe_lock(struct ufs_hba * hba,struct ufs_hw_queue * hwq)299 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
300 struct ufs_hw_queue *hwq)
301 {
302 unsigned long completed_reqs;
303
304 spin_lock(&hwq->cq_lock);
305 completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq);
306 spin_unlock(&hwq->cq_lock);
307
308 return completed_reqs;
309 }
310
ufshcd_mcq_make_queues_operational(struct ufs_hba * hba)311 void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
312 {
313 struct ufs_hw_queue *hwq;
314 u16 qsize;
315 int i;
316
317 for (i = 0; i < hba->nr_hw_queues; i++) {
318 hwq = &hba->uhq[i];
319 hwq->id = i;
320 qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD - 1;
321
322 /* Submission Queue Lower Base Address */
323 ufsmcq_writelx(hba, lower_32_bits(hwq->sqe_dma_addr),
324 MCQ_CFG_n(REG_SQLBA, i));
325 /* Submission Queue Upper Base Address */
326 ufsmcq_writelx(hba, upper_32_bits(hwq->sqe_dma_addr),
327 MCQ_CFG_n(REG_SQUBA, i));
328 /* Submission Queue Doorbell Address Offset */
329 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQD, i),
330 MCQ_CFG_n(REG_SQDAO, i));
331 /* Submission Queue Interrupt Status Address Offset */
332 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQIS, i),
333 MCQ_CFG_n(REG_SQISAO, i));
334
335 /* Completion Queue Lower Base Address */
336 ufsmcq_writelx(hba, lower_32_bits(hwq->cqe_dma_addr),
337 MCQ_CFG_n(REG_CQLBA, i));
338 /* Completion Queue Upper Base Address */
339 ufsmcq_writelx(hba, upper_32_bits(hwq->cqe_dma_addr),
340 MCQ_CFG_n(REG_CQUBA, i));
341 /* Completion Queue Doorbell Address Offset */
342 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQD, i),
343 MCQ_CFG_n(REG_CQDAO, i));
344 /* Completion Queue Interrupt Status Address Offset */
345 ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQIS, i),
346 MCQ_CFG_n(REG_CQISAO, i));
347
348 /* Save the base addresses for quicker access */
349 hwq->mcq_sq_head = mcq_opr_base(hba, OPR_SQD, i) + REG_SQHP;
350 hwq->mcq_sq_tail = mcq_opr_base(hba, OPR_SQD, i) + REG_SQTP;
351 hwq->mcq_cq_head = mcq_opr_base(hba, OPR_CQD, i) + REG_CQHP;
352 hwq->mcq_cq_tail = mcq_opr_base(hba, OPR_CQD, i) + REG_CQTP;
353
354 /* Reinitializing is needed upon HC reset */
355 hwq->sq_tail_slot = hwq->cq_tail_slot = hwq->cq_head_slot = 0;
356
357 /* Enable Tail Entry Push Status interrupt only for non-poll queues */
358 if (i < hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL])
359 writel(1, mcq_opr_base(hba, OPR_CQIS, i) + REG_CQIE);
360
361 /* Completion Queue Enable|Size to Completion Queue Attribute */
362 ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize,
363 MCQ_CFG_n(REG_CQATTR, i));
364
365 /*
366 * Submission Qeueue Enable|Size|Completion Queue ID to
367 * Submission Queue Attribute
368 */
369 ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize |
370 (i << QUEUE_ID_OFFSET),
371 MCQ_CFG_n(REG_SQATTR, i));
372 }
373 }
374
ufshcd_mcq_enable_esi(struct ufs_hba * hba)375 void ufshcd_mcq_enable_esi(struct ufs_hba *hba)
376 {
377 ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x2,
378 REG_UFS_MEM_CFG);
379 }
380 EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_esi);
381
ufshcd_mcq_config_esi(struct ufs_hba * hba,struct msi_msg * msg)382 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg)
383 {
384 ufshcd_writel(hba, msg->address_lo, REG_UFS_ESILBA);
385 ufshcd_writel(hba, msg->address_hi, REG_UFS_ESIUBA);
386 }
387 EXPORT_SYMBOL_GPL(ufshcd_mcq_config_esi);
388
ufshcd_mcq_init(struct ufs_hba * hba)389 int ufshcd_mcq_init(struct ufs_hba *hba)
390 {
391 struct Scsi_Host *host = hba->host;
392 struct ufs_hw_queue *hwq;
393 int ret, i;
394
395 ret = ufshcd_mcq_config_nr_queues(hba);
396 if (ret)
397 return ret;
398
399 ret = ufshcd_vops_mcq_config_resource(hba);
400 if (ret)
401 return ret;
402
403 ret = ufshcd_mcq_vops_op_runtime_config(hba);
404 if (ret) {
405 dev_err(hba->dev, "Operation runtime config failed, ret=%d\n",
406 ret);
407 return ret;
408 }
409 hba->uhq = devm_kzalloc(hba->dev,
410 hba->nr_hw_queues * sizeof(struct ufs_hw_queue),
411 GFP_KERNEL);
412 if (!hba->uhq) {
413 dev_err(hba->dev, "ufs hw queue memory allocation failed\n");
414 return -ENOMEM;
415 }
416
417 for (i = 0; i < hba->nr_hw_queues; i++) {
418 hwq = &hba->uhq[i];
419 hwq->max_entries = hba->nutrs;
420 spin_lock_init(&hwq->sq_lock);
421 spin_lock_init(&hwq->cq_lock);
422 }
423
424 /* The very first HW queue serves device commands */
425 hba->dev_cmd_queue = &hba->uhq[0];
426 /* Give dev_cmd_queue the minimal number of entries */
427 hba->dev_cmd_queue->max_entries = MAX_DEV_CMD_ENTRIES;
428
429 host->host_tagset = 1;
430 return 0;
431 }
432