1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2017 Intel Corporation
5 */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_gt.h"
12 #include "gt/intel_gt_requests.h"
13 #include "gt/intel_reset.h"
14 #include "i915_selftest.h"
15
16 #include "gem/selftests/igt_gem_utils.h"
17 #include "selftests/i915_random.h"
18 #include "selftests/igt_flush_test.h"
19 #include "selftests/igt_live_test.h"
20 #include "selftests/igt_reset.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/mock_drm.h"
23 #include "selftests/mock_gem_device.h"
24
25 #include "huge_gem_object.h"
26 #include "igt_gem_utils.h"
27
28 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
29
live_nop_switch(void * arg)30 static int live_nop_switch(void *arg)
31 {
32 const unsigned int nctx = 1024;
33 struct drm_i915_private *i915 = arg;
34 struct intel_engine_cs *engine;
35 struct i915_gem_context **ctx;
36 struct igt_live_test t;
37 struct file *file;
38 unsigned long n;
39 int err = -ENODEV;
40
41 /*
42 * Create as many contexts as we can feasibly get away with
43 * and check we can switch between them rapidly.
44 *
45 * Serves as very simple stress test for submission and HW switching
46 * between contexts.
47 */
48
49 if (!DRIVER_CAPS(i915)->has_logical_contexts)
50 return 0;
51
52 file = mock_file(i915);
53 if (IS_ERR(file))
54 return PTR_ERR(file);
55
56 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
57 if (!ctx) {
58 err = -ENOMEM;
59 goto out_file;
60 }
61
62 for (n = 0; n < nctx; n++) {
63 ctx[n] = live_context(i915, file);
64 if (IS_ERR(ctx[n])) {
65 err = PTR_ERR(ctx[n]);
66 goto out_file;
67 }
68 }
69
70 for_each_uabi_engine(engine, i915) {
71 struct i915_request *rq = NULL;
72 unsigned long end_time, prime;
73 ktime_t times[2] = {};
74
75 times[0] = ktime_get_raw();
76 for (n = 0; n < nctx; n++) {
77 struct i915_request *this;
78
79 this = igt_request_alloc(ctx[n], engine);
80 if (IS_ERR(this)) {
81 err = PTR_ERR(this);
82 goto out_file;
83 }
84 if (rq) {
85 i915_request_await_dma_fence(this, &rq->fence);
86 i915_request_put(rq);
87 }
88 rq = i915_request_get(this);
89 i915_request_add(this);
90 }
91 if (i915_request_wait(rq, 0, HZ) < 0) {
92 pr_err("Failed to populated %d contexts\n", nctx);
93 intel_gt_set_wedged(&i915->gt);
94 i915_request_put(rq);
95 err = -EIO;
96 goto out_file;
97 }
98 i915_request_put(rq);
99
100 times[1] = ktime_get_raw();
101
102 pr_info("Populated %d contexts on %s in %lluns\n",
103 nctx, engine->name, ktime_to_ns(times[1] - times[0]));
104
105 err = igt_live_test_begin(&t, i915, __func__, engine->name);
106 if (err)
107 goto out_file;
108
109 end_time = jiffies + i915_selftest.timeout_jiffies;
110 for_each_prime_number_from(prime, 2, 8192) {
111 times[1] = ktime_get_raw();
112
113 rq = NULL;
114 for (n = 0; n < prime; n++) {
115 struct i915_request *this;
116
117 this = igt_request_alloc(ctx[n % nctx], engine);
118 if (IS_ERR(this)) {
119 err = PTR_ERR(this);
120 goto out_file;
121 }
122
123 if (rq) { /* Force submission order */
124 i915_request_await_dma_fence(this, &rq->fence);
125 i915_request_put(rq);
126 }
127
128 /*
129 * This space is left intentionally blank.
130 *
131 * We do not actually want to perform any
132 * action with this request, we just want
133 * to measure the latency in allocation
134 * and submission of our breadcrumbs -
135 * ensuring that the bare request is sufficient
136 * for the system to work (i.e. proper HEAD
137 * tracking of the rings, interrupt handling,
138 * etc). It also gives us the lowest bounds
139 * for latency.
140 */
141
142 rq = i915_request_get(this);
143 i915_request_add(this);
144 }
145 GEM_BUG_ON(!rq);
146 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
147 pr_err("Switching between %ld contexts timed out\n",
148 prime);
149 intel_gt_set_wedged(&i915->gt);
150 i915_request_put(rq);
151 break;
152 }
153 i915_request_put(rq);
154
155 times[1] = ktime_sub(ktime_get_raw(), times[1]);
156 if (prime == 2)
157 times[0] = times[1];
158
159 if (__igt_timeout(end_time, NULL))
160 break;
161 }
162
163 err = igt_live_test_end(&t);
164 if (err)
165 goto out_file;
166
167 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
168 engine->name,
169 ktime_to_ns(times[0]),
170 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
171 }
172
173 out_file:
174 fput(file);
175 return err;
176 }
177
178 struct parallel_switch {
179 struct task_struct *tsk;
180 struct intel_context *ce[2];
181 };
182
__live_parallel_switch1(void * data)183 static int __live_parallel_switch1(void *data)
184 {
185 struct parallel_switch *arg = data;
186 IGT_TIMEOUT(end_time);
187 unsigned long count;
188
189 count = 0;
190 do {
191 struct i915_request *rq = NULL;
192 int err, n;
193
194 err = 0;
195 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
196 struct i915_request *prev = rq;
197
198 rq = i915_request_create(arg->ce[n]);
199 if (IS_ERR(rq)) {
200 i915_request_put(prev);
201 return PTR_ERR(rq);
202 }
203
204 i915_request_get(rq);
205 if (prev) {
206 err = i915_request_await_dma_fence(rq, &prev->fence);
207 i915_request_put(prev);
208 }
209
210 i915_request_add(rq);
211 }
212 if (i915_request_wait(rq, 0, HZ / 5) < 0)
213 err = -ETIME;
214 i915_request_put(rq);
215 if (err)
216 return err;
217
218 count++;
219 } while (!__igt_timeout(end_time, NULL));
220
221 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
222 return 0;
223 }
224
__live_parallel_switchN(void * data)225 static int __live_parallel_switchN(void *data)
226 {
227 struct parallel_switch *arg = data;
228 struct i915_request *rq = NULL;
229 IGT_TIMEOUT(end_time);
230 unsigned long count;
231 int n;
232
233 count = 0;
234 do {
235 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
236 struct i915_request *prev = rq;
237 int err = 0;
238
239 rq = i915_request_create(arg->ce[n]);
240 if (IS_ERR(rq)) {
241 i915_request_put(prev);
242 return PTR_ERR(rq);
243 }
244
245 i915_request_get(rq);
246 if (prev) {
247 err = i915_request_await_dma_fence(rq, &prev->fence);
248 i915_request_put(prev);
249 }
250
251 i915_request_add(rq);
252 if (err) {
253 i915_request_put(rq);
254 return err;
255 }
256 }
257
258 count++;
259 } while (!__igt_timeout(end_time, NULL));
260 i915_request_put(rq);
261
262 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
263 return 0;
264 }
265
live_parallel_switch(void * arg)266 static int live_parallel_switch(void *arg)
267 {
268 struct drm_i915_private *i915 = arg;
269 static int (* const func[])(void *arg) = {
270 __live_parallel_switch1,
271 __live_parallel_switchN,
272 NULL,
273 };
274 struct parallel_switch *data = NULL;
275 struct i915_gem_engines *engines;
276 struct i915_gem_engines_iter it;
277 int (* const *fn)(void *arg);
278 struct i915_gem_context *ctx;
279 struct intel_context *ce;
280 struct file *file;
281 int n, m, count;
282 int err = 0;
283
284 /*
285 * Check we can process switches on all engines simultaneously.
286 */
287
288 if (!DRIVER_CAPS(i915)->has_logical_contexts)
289 return 0;
290
291 file = mock_file(i915);
292 if (IS_ERR(file))
293 return PTR_ERR(file);
294
295 ctx = live_context(i915, file);
296 if (IS_ERR(ctx)) {
297 err = PTR_ERR(ctx);
298 goto out_file;
299 }
300
301 engines = i915_gem_context_lock_engines(ctx);
302 count = engines->num_engines;
303
304 data = kcalloc(count, sizeof(*data), GFP_KERNEL);
305 if (!data) {
306 i915_gem_context_unlock_engines(ctx);
307 err = -ENOMEM;
308 goto out_file;
309 }
310
311 m = 0; /* Use the first context as our template for the engines */
312 for_each_gem_engine(ce, engines, it) {
313 err = intel_context_pin(ce);
314 if (err) {
315 i915_gem_context_unlock_engines(ctx);
316 goto out;
317 }
318 data[m++].ce[0] = intel_context_get(ce);
319 }
320 i915_gem_context_unlock_engines(ctx);
321
322 /* Clone the same set of engines into the other contexts */
323 for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
324 ctx = live_context(i915, file);
325 if (IS_ERR(ctx)) {
326 err = PTR_ERR(ctx);
327 goto out;
328 }
329
330 for (m = 0; m < count; m++) {
331 if (!data[m].ce[0])
332 continue;
333
334 ce = intel_context_create(data[m].ce[0]->engine);
335 if (IS_ERR(ce))
336 goto out;
337
338 err = intel_context_pin(ce);
339 if (err) {
340 intel_context_put(ce);
341 goto out;
342 }
343
344 data[m].ce[n] = ce;
345 }
346 }
347
348 for (fn = func; !err && *fn; fn++) {
349 struct igt_live_test t;
350 int n;
351
352 err = igt_live_test_begin(&t, i915, __func__, "");
353 if (err)
354 break;
355
356 for (n = 0; n < count; n++) {
357 if (!data[n].ce[0])
358 continue;
359
360 data[n].tsk = kthread_run(*fn, &data[n],
361 "igt/parallel:%s",
362 data[n].ce[0]->engine->name);
363 if (IS_ERR(data[n].tsk)) {
364 err = PTR_ERR(data[n].tsk);
365 break;
366 }
367 get_task_struct(data[n].tsk);
368 }
369
370 yield(); /* start all threads before we kthread_stop() */
371
372 for (n = 0; n < count; n++) {
373 int status;
374
375 if (IS_ERR_OR_NULL(data[n].tsk))
376 continue;
377
378 status = kthread_stop(data[n].tsk);
379 if (status && !err)
380 err = status;
381
382 put_task_struct(data[n].tsk);
383 data[n].tsk = NULL;
384 }
385
386 if (igt_live_test_end(&t))
387 err = -EIO;
388 }
389
390 out:
391 for (n = 0; n < count; n++) {
392 for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
393 if (!data[n].ce[m])
394 continue;
395
396 intel_context_unpin(data[n].ce[m]);
397 intel_context_put(data[n].ce[m]);
398 }
399 }
400 kfree(data);
401 out_file:
402 fput(file);
403 return err;
404 }
405
real_page_count(struct drm_i915_gem_object * obj)406 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
407 {
408 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
409 }
410
fake_page_count(struct drm_i915_gem_object * obj)411 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
412 {
413 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
414 }
415
gpu_fill(struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int dw)416 static int gpu_fill(struct intel_context *ce,
417 struct drm_i915_gem_object *obj,
418 unsigned int dw)
419 {
420 struct i915_vma *vma;
421 int err;
422
423 GEM_BUG_ON(obj->base.size > ce->vm->total);
424 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
425
426 vma = i915_vma_instance(obj, ce->vm, NULL);
427 if (IS_ERR(vma))
428 return PTR_ERR(vma);
429
430 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
431 if (err)
432 return err;
433
434 /*
435 * Within the GTT the huge objects maps every page onto
436 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
437 * We set the nth dword within the page using the nth
438 * mapping via the GTT - this should exercise the GTT mapping
439 * whilst checking that each context provides a unique view
440 * into the object.
441 */
442 err = igt_gpu_fill_dw(ce, vma,
443 (dw * real_page_count(obj)) << PAGE_SHIFT |
444 (dw * sizeof(u32)),
445 real_page_count(obj),
446 dw);
447 i915_vma_unpin(vma);
448
449 return err;
450 }
451
cpu_fill(struct drm_i915_gem_object * obj,u32 value)452 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
453 {
454 const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
455 unsigned int n, m, need_flush;
456 int err;
457
458 i915_gem_object_lock(obj, NULL);
459 err = i915_gem_object_prepare_write(obj, &need_flush);
460 if (err)
461 goto out;
462
463 for (n = 0; n < real_page_count(obj); n++) {
464 u32 *map;
465
466 map = kmap_atomic(i915_gem_object_get_page(obj, n));
467 for (m = 0; m < DW_PER_PAGE; m++)
468 map[m] = value;
469 if (!has_llc)
470 drm_clflush_virt_range(map, PAGE_SIZE);
471 kunmap_atomic(map);
472 }
473
474 i915_gem_object_finish_access(obj);
475 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
476 obj->write_domain = 0;
477 out:
478 i915_gem_object_unlock(obj);
479 return err;
480 }
481
cpu_check(struct drm_i915_gem_object * obj,unsigned int idx,unsigned int max)482 static noinline int cpu_check(struct drm_i915_gem_object *obj,
483 unsigned int idx, unsigned int max)
484 {
485 unsigned int n, m, needs_flush;
486 int err;
487
488 i915_gem_object_lock(obj, NULL);
489 err = i915_gem_object_prepare_read(obj, &needs_flush);
490 if (err)
491 goto out_unlock;
492
493 for (n = 0; n < real_page_count(obj); n++) {
494 u32 *map;
495
496 map = kmap_atomic(i915_gem_object_get_page(obj, n));
497 if (needs_flush & CLFLUSH_BEFORE)
498 drm_clflush_virt_range(map, PAGE_SIZE);
499
500 for (m = 0; m < max; m++) {
501 if (map[m] != m) {
502 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
503 __builtin_return_address(0), idx,
504 n, real_page_count(obj), m, max,
505 map[m], m);
506 err = -EINVAL;
507 goto out_unmap;
508 }
509 }
510
511 for (; m < DW_PER_PAGE; m++) {
512 if (map[m] != STACK_MAGIC) {
513 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
514 __builtin_return_address(0), idx, n, m,
515 map[m], STACK_MAGIC);
516 err = -EINVAL;
517 goto out_unmap;
518 }
519 }
520
521 out_unmap:
522 kunmap_atomic(map);
523 if (err)
524 break;
525 }
526
527 i915_gem_object_finish_access(obj);
528 out_unlock:
529 i915_gem_object_unlock(obj);
530 return err;
531 }
532
file_add_object(struct file * file,struct drm_i915_gem_object * obj)533 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
534 {
535 int err;
536
537 GEM_BUG_ON(obj->base.handle_count);
538
539 /* tie the object to the drm_file for easy reaping */
540 err = idr_alloc(&to_drm_file(file)->object_idr,
541 &obj->base, 1, 0, GFP_KERNEL);
542 if (err < 0)
543 return err;
544
545 i915_gem_object_get(obj);
546 obj->base.handle_count++;
547 return 0;
548 }
549
550 static struct drm_i915_gem_object *
create_test_object(struct i915_address_space * vm,struct file * file,struct list_head * objects)551 create_test_object(struct i915_address_space *vm,
552 struct file *file,
553 struct list_head *objects)
554 {
555 struct drm_i915_gem_object *obj;
556 u64 size;
557 int err;
558
559 /* Keep in GEM's good graces */
560 intel_gt_retire_requests(vm->gt);
561
562 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
563 size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
564
565 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
566 if (IS_ERR(obj))
567 return obj;
568
569 err = file_add_object(file, obj);
570 i915_gem_object_put(obj);
571 if (err)
572 return ERR_PTR(err);
573
574 err = cpu_fill(obj, STACK_MAGIC);
575 if (err) {
576 pr_err("Failed to fill object with cpu, err=%d\n",
577 err);
578 return ERR_PTR(err);
579 }
580
581 list_add_tail(&obj->st_link, objects);
582 return obj;
583 }
584
max_dwords(struct drm_i915_gem_object * obj)585 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
586 {
587 unsigned long npages = fake_page_count(obj);
588
589 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
590 return npages / DW_PER_PAGE;
591 }
592
throttle_release(struct i915_request ** q,int count)593 static void throttle_release(struct i915_request **q, int count)
594 {
595 int i;
596
597 for (i = 0; i < count; i++) {
598 if (IS_ERR_OR_NULL(q[i]))
599 continue;
600
601 i915_request_put(fetch_and_zero(&q[i]));
602 }
603 }
604
throttle(struct intel_context * ce,struct i915_request ** q,int count)605 static int throttle(struct intel_context *ce,
606 struct i915_request **q, int count)
607 {
608 int i;
609
610 if (!IS_ERR_OR_NULL(q[0])) {
611 if (i915_request_wait(q[0],
612 I915_WAIT_INTERRUPTIBLE,
613 MAX_SCHEDULE_TIMEOUT) < 0)
614 return -EINTR;
615
616 i915_request_put(q[0]);
617 }
618
619 for (i = 0; i < count - 1; i++)
620 q[i] = q[i + 1];
621
622 q[i] = intel_context_create_request(ce);
623 if (IS_ERR(q[i]))
624 return PTR_ERR(q[i]);
625
626 i915_request_get(q[i]);
627 i915_request_add(q[i]);
628
629 return 0;
630 }
631
igt_ctx_exec(void * arg)632 static int igt_ctx_exec(void *arg)
633 {
634 struct drm_i915_private *i915 = arg;
635 struct intel_engine_cs *engine;
636 int err = -ENODEV;
637
638 /*
639 * Create a few different contexts (with different mm) and write
640 * through each ctx/mm using the GPU making sure those writes end
641 * up in the expected pages of our obj.
642 */
643
644 if (!DRIVER_CAPS(i915)->has_logical_contexts)
645 return 0;
646
647 for_each_uabi_engine(engine, i915) {
648 struct drm_i915_gem_object *obj = NULL;
649 unsigned long ncontexts, ndwords, dw;
650 struct i915_request *tq[5] = {};
651 struct igt_live_test t;
652 IGT_TIMEOUT(end_time);
653 LIST_HEAD(objects);
654 struct file *file;
655
656 if (!intel_engine_can_store_dword(engine))
657 continue;
658
659 if (!engine->context_size)
660 continue; /* No logical context support in HW */
661
662 file = mock_file(i915);
663 if (IS_ERR(file))
664 return PTR_ERR(file);
665
666 err = igt_live_test_begin(&t, i915, __func__, engine->name);
667 if (err)
668 goto out_file;
669
670 ncontexts = 0;
671 ndwords = 0;
672 dw = 0;
673 while (!time_after(jiffies, end_time)) {
674 struct i915_gem_context *ctx;
675 struct intel_context *ce;
676
677 ctx = kernel_context(i915, NULL);
678 if (IS_ERR(ctx)) {
679 err = PTR_ERR(ctx);
680 goto out_file;
681 }
682
683 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
684 GEM_BUG_ON(IS_ERR(ce));
685
686 if (!obj) {
687 obj = create_test_object(ce->vm, file, &objects);
688 if (IS_ERR(obj)) {
689 err = PTR_ERR(obj);
690 intel_context_put(ce);
691 kernel_context_close(ctx);
692 goto out_file;
693 }
694 }
695
696 err = gpu_fill(ce, obj, dw);
697 if (err) {
698 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
699 ndwords, dw, max_dwords(obj),
700 engine->name,
701 yesno(i915_gem_context_has_full_ppgtt(ctx)),
702 err);
703 intel_context_put(ce);
704 kernel_context_close(ctx);
705 goto out_file;
706 }
707
708 err = throttle(ce, tq, ARRAY_SIZE(tq));
709 if (err) {
710 intel_context_put(ce);
711 kernel_context_close(ctx);
712 goto out_file;
713 }
714
715 if (++dw == max_dwords(obj)) {
716 obj = NULL;
717 dw = 0;
718 }
719
720 ndwords++;
721 ncontexts++;
722
723 intel_context_put(ce);
724 kernel_context_close(ctx);
725 }
726
727 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
728 ncontexts, engine->name, ndwords);
729
730 ncontexts = dw = 0;
731 list_for_each_entry(obj, &objects, st_link) {
732 unsigned int rem =
733 min_t(unsigned int, ndwords - dw, max_dwords(obj));
734
735 err = cpu_check(obj, ncontexts++, rem);
736 if (err)
737 break;
738
739 dw += rem;
740 }
741
742 out_file:
743 throttle_release(tq, ARRAY_SIZE(tq));
744 if (igt_live_test_end(&t))
745 err = -EIO;
746
747 fput(file);
748 if (err)
749 return err;
750
751 i915_gem_drain_freed_objects(i915);
752 }
753
754 return 0;
755 }
756
igt_shared_ctx_exec(void * arg)757 static int igt_shared_ctx_exec(void *arg)
758 {
759 struct drm_i915_private *i915 = arg;
760 struct i915_request *tq[5] = {};
761 struct i915_gem_context *parent;
762 struct intel_engine_cs *engine;
763 struct igt_live_test t;
764 struct file *file;
765 int err = 0;
766
767 /*
768 * Create a few different contexts with the same mm and write
769 * through each ctx using the GPU making sure those writes end
770 * up in the expected pages of our obj.
771 */
772 if (!DRIVER_CAPS(i915)->has_logical_contexts)
773 return 0;
774
775 file = mock_file(i915);
776 if (IS_ERR(file))
777 return PTR_ERR(file);
778
779 parent = live_context(i915, file);
780 if (IS_ERR(parent)) {
781 err = PTR_ERR(parent);
782 goto out_file;
783 }
784
785 if (!parent->vm) { /* not full-ppgtt; nothing to share */
786 err = 0;
787 goto out_file;
788 }
789
790 err = igt_live_test_begin(&t, i915, __func__, "");
791 if (err)
792 goto out_file;
793
794 for_each_uabi_engine(engine, i915) {
795 unsigned long ncontexts, ndwords, dw;
796 struct drm_i915_gem_object *obj = NULL;
797 IGT_TIMEOUT(end_time);
798 LIST_HEAD(objects);
799
800 if (!intel_engine_can_store_dword(engine))
801 continue;
802
803 dw = 0;
804 ndwords = 0;
805 ncontexts = 0;
806 while (!time_after(jiffies, end_time)) {
807 struct i915_gem_context *ctx;
808 struct intel_context *ce;
809
810 ctx = kernel_context(i915, parent->vm);
811 if (IS_ERR(ctx)) {
812 err = PTR_ERR(ctx);
813 goto out_test;
814 }
815
816 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
817 GEM_BUG_ON(IS_ERR(ce));
818
819 if (!obj) {
820 obj = create_test_object(parent->vm,
821 file, &objects);
822 if (IS_ERR(obj)) {
823 err = PTR_ERR(obj);
824 intel_context_put(ce);
825 kernel_context_close(ctx);
826 goto out_test;
827 }
828 }
829
830 err = gpu_fill(ce, obj, dw);
831 if (err) {
832 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
833 ndwords, dw, max_dwords(obj),
834 engine->name,
835 yesno(i915_gem_context_has_full_ppgtt(ctx)),
836 err);
837 intel_context_put(ce);
838 kernel_context_close(ctx);
839 goto out_test;
840 }
841
842 err = throttle(ce, tq, ARRAY_SIZE(tq));
843 if (err) {
844 intel_context_put(ce);
845 kernel_context_close(ctx);
846 goto out_test;
847 }
848
849 if (++dw == max_dwords(obj)) {
850 obj = NULL;
851 dw = 0;
852 }
853
854 ndwords++;
855 ncontexts++;
856
857 intel_context_put(ce);
858 kernel_context_close(ctx);
859 }
860 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
861 ncontexts, engine->name, ndwords);
862
863 ncontexts = dw = 0;
864 list_for_each_entry(obj, &objects, st_link) {
865 unsigned int rem =
866 min_t(unsigned int, ndwords - dw, max_dwords(obj));
867
868 err = cpu_check(obj, ncontexts++, rem);
869 if (err)
870 goto out_test;
871
872 dw += rem;
873 }
874
875 i915_gem_drain_freed_objects(i915);
876 }
877 out_test:
878 throttle_release(tq, ARRAY_SIZE(tq));
879 if (igt_live_test_end(&t))
880 err = -EIO;
881 out_file:
882 fput(file);
883 return err;
884 }
885
rpcs_query_batch(struct drm_i915_gem_object * rpcs,struct i915_vma * vma)886 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
887 {
888 u32 *cmd;
889
890 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
891
892 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
893 if (IS_ERR(cmd))
894 return PTR_ERR(cmd);
895
896 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
897 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
898 *cmd++ = lower_32_bits(vma->node.start);
899 *cmd++ = upper_32_bits(vma->node.start);
900 *cmd = MI_BATCH_BUFFER_END;
901
902 __i915_gem_object_flush_map(rpcs, 0, 64);
903 i915_gem_object_unpin_map(rpcs);
904
905 intel_gt_chipset_flush(vma->vm->gt);
906
907 return 0;
908 }
909
910 static int
emit_rpcs_query(struct drm_i915_gem_object * obj,struct intel_context * ce,struct i915_request ** rq_out)911 emit_rpcs_query(struct drm_i915_gem_object *obj,
912 struct intel_context *ce,
913 struct i915_request **rq_out)
914 {
915 struct drm_i915_private *i915 = to_i915(obj->base.dev);
916 struct i915_request *rq;
917 struct i915_gem_ww_ctx ww;
918 struct i915_vma *batch;
919 struct i915_vma *vma;
920 struct drm_i915_gem_object *rpcs;
921 int err;
922
923 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
924
925 if (GRAPHICS_VER(i915) < 8)
926 return -EINVAL;
927
928 vma = i915_vma_instance(obj, ce->vm, NULL);
929 if (IS_ERR(vma))
930 return PTR_ERR(vma);
931
932 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
933 if (IS_ERR(rpcs))
934 return PTR_ERR(rpcs);
935
936 batch = i915_vma_instance(rpcs, ce->vm, NULL);
937 if (IS_ERR(batch)) {
938 err = PTR_ERR(batch);
939 goto err_put;
940 }
941
942 i915_gem_ww_ctx_init(&ww, false);
943 retry:
944 err = i915_gem_object_lock(obj, &ww);
945 if (!err)
946 err = i915_gem_object_lock(rpcs, &ww);
947 if (!err)
948 err = i915_gem_object_set_to_gtt_domain(obj, false);
949 if (!err)
950 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
951 if (err)
952 goto err_put;
953
954 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
955 if (err)
956 goto err_vma;
957
958 err = rpcs_query_batch(rpcs, vma);
959 if (err)
960 goto err_batch;
961
962 rq = i915_request_create(ce);
963 if (IS_ERR(rq)) {
964 err = PTR_ERR(rq);
965 goto err_batch;
966 }
967
968 err = i915_request_await_object(rq, batch->obj, false);
969 if (err == 0)
970 err = i915_vma_move_to_active(batch, rq, 0);
971 if (err)
972 goto skip_request;
973
974 err = i915_request_await_object(rq, vma->obj, true);
975 if (err == 0)
976 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
977 if (err)
978 goto skip_request;
979
980 if (rq->engine->emit_init_breadcrumb) {
981 err = rq->engine->emit_init_breadcrumb(rq);
982 if (err)
983 goto skip_request;
984 }
985
986 err = rq->engine->emit_bb_start(rq,
987 batch->node.start, batch->node.size,
988 0);
989 if (err)
990 goto skip_request;
991
992 *rq_out = i915_request_get(rq);
993
994 skip_request:
995 if (err)
996 i915_request_set_error_once(rq, err);
997 i915_request_add(rq);
998 err_batch:
999 i915_vma_unpin(batch);
1000 err_vma:
1001 i915_vma_unpin(vma);
1002 err_put:
1003 if (err == -EDEADLK) {
1004 err = i915_gem_ww_ctx_backoff(&ww);
1005 if (!err)
1006 goto retry;
1007 }
1008 i915_gem_ww_ctx_fini(&ww);
1009 i915_gem_object_put(rpcs);
1010 return err;
1011 }
1012
1013 #define TEST_IDLE BIT(0)
1014 #define TEST_BUSY BIT(1)
1015 #define TEST_RESET BIT(2)
1016
1017 static int
__sseu_prepare(const char * name,unsigned int flags,struct intel_context * ce,struct igt_spinner ** spin)1018 __sseu_prepare(const char *name,
1019 unsigned int flags,
1020 struct intel_context *ce,
1021 struct igt_spinner **spin)
1022 {
1023 struct i915_request *rq;
1024 int ret;
1025
1026 *spin = NULL;
1027 if (!(flags & (TEST_BUSY | TEST_RESET)))
1028 return 0;
1029
1030 *spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1031 if (!*spin)
1032 return -ENOMEM;
1033
1034 ret = igt_spinner_init(*spin, ce->engine->gt);
1035 if (ret)
1036 goto err_free;
1037
1038 rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1039 if (IS_ERR(rq)) {
1040 ret = PTR_ERR(rq);
1041 goto err_fini;
1042 }
1043
1044 i915_request_add(rq);
1045
1046 if (!igt_wait_for_spinner(*spin, rq)) {
1047 pr_err("%s: Spinner failed to start!\n", name);
1048 ret = -ETIMEDOUT;
1049 goto err_end;
1050 }
1051
1052 return 0;
1053
1054 err_end:
1055 igt_spinner_end(*spin);
1056 err_fini:
1057 igt_spinner_fini(*spin);
1058 err_free:
1059 kfree(fetch_and_zero(spin));
1060 return ret;
1061 }
1062
1063 static int
__read_slice_count(struct intel_context * ce,struct drm_i915_gem_object * obj,struct igt_spinner * spin,u32 * rpcs)1064 __read_slice_count(struct intel_context *ce,
1065 struct drm_i915_gem_object *obj,
1066 struct igt_spinner *spin,
1067 u32 *rpcs)
1068 {
1069 struct i915_request *rq = NULL;
1070 u32 s_mask, s_shift;
1071 unsigned int cnt;
1072 u32 *buf, val;
1073 long ret;
1074
1075 ret = emit_rpcs_query(obj, ce, &rq);
1076 if (ret)
1077 return ret;
1078
1079 if (spin)
1080 igt_spinner_end(spin);
1081
1082 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1083 i915_request_put(rq);
1084 if (ret < 0)
1085 return ret;
1086
1087 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1088 if (IS_ERR(buf)) {
1089 ret = PTR_ERR(buf);
1090 return ret;
1091 }
1092
1093 if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1094 s_mask = GEN11_RPCS_S_CNT_MASK;
1095 s_shift = GEN11_RPCS_S_CNT_SHIFT;
1096 } else {
1097 s_mask = GEN8_RPCS_S_CNT_MASK;
1098 s_shift = GEN8_RPCS_S_CNT_SHIFT;
1099 }
1100
1101 val = *buf;
1102 cnt = (val & s_mask) >> s_shift;
1103 *rpcs = val;
1104
1105 i915_gem_object_unpin_map(obj);
1106
1107 return cnt;
1108 }
1109
1110 static int
__check_rpcs(const char * name,u32 rpcs,int slices,unsigned int expected,const char * prefix,const char * suffix)1111 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1112 const char *prefix, const char *suffix)
1113 {
1114 if (slices == expected)
1115 return 0;
1116
1117 if (slices < 0) {
1118 pr_err("%s: %s read slice count failed with %d%s\n",
1119 name, prefix, slices, suffix);
1120 return slices;
1121 }
1122
1123 pr_err("%s: %s slice count %d is not %u%s\n",
1124 name, prefix, slices, expected, suffix);
1125
1126 pr_info("RPCS=0x%x; %u%sx%u%s\n",
1127 rpcs, slices,
1128 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1129 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1130 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1131
1132 return -EINVAL;
1133 }
1134
1135 static int
__sseu_finish(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,unsigned int expected,struct igt_spinner * spin)1136 __sseu_finish(const char *name,
1137 unsigned int flags,
1138 struct intel_context *ce,
1139 struct drm_i915_gem_object *obj,
1140 unsigned int expected,
1141 struct igt_spinner *spin)
1142 {
1143 unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1144 u32 rpcs = 0;
1145 int ret = 0;
1146
1147 if (flags & TEST_RESET) {
1148 ret = intel_engine_reset(ce->engine, "sseu");
1149 if (ret)
1150 goto out;
1151 }
1152
1153 ret = __read_slice_count(ce, obj,
1154 flags & TEST_RESET ? NULL : spin, &rpcs);
1155 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1156 if (ret)
1157 goto out;
1158
1159 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1160 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1161
1162 out:
1163 if (spin)
1164 igt_spinner_end(spin);
1165
1166 if ((flags & TEST_IDLE) && ret == 0) {
1167 ret = igt_flush_test(ce->engine->i915);
1168 if (ret)
1169 return ret;
1170
1171 ret = __read_slice_count(ce, obj, NULL, &rpcs);
1172 ret = __check_rpcs(name, rpcs, ret, expected,
1173 "Context", " after idle!");
1174 }
1175
1176 return ret;
1177 }
1178
1179 static int
__sseu_test(const char * name,unsigned int flags,struct intel_context * ce,struct drm_i915_gem_object * obj,struct intel_sseu sseu)1180 __sseu_test(const char *name,
1181 unsigned int flags,
1182 struct intel_context *ce,
1183 struct drm_i915_gem_object *obj,
1184 struct intel_sseu sseu)
1185 {
1186 struct igt_spinner *spin = NULL;
1187 int ret;
1188
1189 intel_engine_pm_get(ce->engine);
1190
1191 ret = __sseu_prepare(name, flags, ce, &spin);
1192 if (ret)
1193 goto out_pm;
1194
1195 ret = intel_context_reconfigure_sseu(ce, sseu);
1196 if (ret)
1197 goto out_spin;
1198
1199 ret = __sseu_finish(name, flags, ce, obj,
1200 hweight32(sseu.slice_mask), spin);
1201
1202 out_spin:
1203 if (spin) {
1204 igt_spinner_end(spin);
1205 igt_spinner_fini(spin);
1206 kfree(spin);
1207 }
1208 out_pm:
1209 intel_engine_pm_put(ce->engine);
1210 return ret;
1211 }
1212
1213 static int
__igt_ctx_sseu(struct drm_i915_private * i915,const char * name,unsigned int flags)1214 __igt_ctx_sseu(struct drm_i915_private *i915,
1215 const char *name,
1216 unsigned int flags)
1217 {
1218 struct drm_i915_gem_object *obj;
1219 int inst = 0;
1220 int ret = 0;
1221
1222 if (GRAPHICS_VER(i915) < 9)
1223 return 0;
1224
1225 if (flags & TEST_RESET)
1226 igt_global_reset_lock(&i915->gt);
1227
1228 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1229 if (IS_ERR(obj)) {
1230 ret = PTR_ERR(obj);
1231 goto out_unlock;
1232 }
1233
1234 do {
1235 struct intel_engine_cs *engine;
1236 struct intel_context *ce;
1237 struct intel_sseu pg_sseu;
1238
1239 engine = intel_engine_lookup_user(i915,
1240 I915_ENGINE_CLASS_RENDER,
1241 inst++);
1242 if (!engine)
1243 break;
1244
1245 if (hweight32(engine->sseu.slice_mask) < 2)
1246 continue;
1247
1248 if (!engine->gt->info.sseu.has_slice_pg)
1249 continue;
1250
1251 /*
1252 * Gen11 VME friendly power-gated configuration with
1253 * half enabled sub-slices.
1254 */
1255 pg_sseu = engine->sseu;
1256 pg_sseu.slice_mask = 1;
1257 pg_sseu.subslice_mask =
1258 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1259
1260 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1261 engine->name, name, flags,
1262 hweight32(engine->sseu.slice_mask),
1263 hweight32(pg_sseu.slice_mask));
1264
1265 ce = intel_context_create(engine);
1266 if (IS_ERR(ce)) {
1267 ret = PTR_ERR(ce);
1268 goto out_put;
1269 }
1270
1271 ret = intel_context_pin(ce);
1272 if (ret)
1273 goto out_ce;
1274
1275 /* First set the default mask. */
1276 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1277 if (ret)
1278 goto out_unpin;
1279
1280 /* Then set a power-gated configuration. */
1281 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1282 if (ret)
1283 goto out_unpin;
1284
1285 /* Back to defaults. */
1286 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1287 if (ret)
1288 goto out_unpin;
1289
1290 /* One last power-gated configuration for the road. */
1291 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1292 if (ret)
1293 goto out_unpin;
1294
1295 out_unpin:
1296 intel_context_unpin(ce);
1297 out_ce:
1298 intel_context_put(ce);
1299 } while (!ret);
1300
1301 if (igt_flush_test(i915))
1302 ret = -EIO;
1303
1304 out_put:
1305 i915_gem_object_put(obj);
1306
1307 out_unlock:
1308 if (flags & TEST_RESET)
1309 igt_global_reset_unlock(&i915->gt);
1310
1311 if (ret)
1312 pr_err("%s: Failed with %d!\n", name, ret);
1313
1314 return ret;
1315 }
1316
igt_ctx_sseu(void * arg)1317 static int igt_ctx_sseu(void *arg)
1318 {
1319 struct {
1320 const char *name;
1321 unsigned int flags;
1322 } *phase, phases[] = {
1323 { .name = "basic", .flags = 0 },
1324 { .name = "idle", .flags = TEST_IDLE },
1325 { .name = "busy", .flags = TEST_BUSY },
1326 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1327 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1328 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1329 };
1330 unsigned int i;
1331 int ret = 0;
1332
1333 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1334 i++, phase++)
1335 ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1336
1337 return ret;
1338 }
1339
igt_ctx_readonly(void * arg)1340 static int igt_ctx_readonly(void *arg)
1341 {
1342 struct drm_i915_private *i915 = arg;
1343 unsigned long idx, ndwords, dw, num_engines;
1344 struct drm_i915_gem_object *obj = NULL;
1345 struct i915_request *tq[5] = {};
1346 struct i915_gem_engines_iter it;
1347 struct i915_address_space *vm;
1348 struct i915_gem_context *ctx;
1349 struct intel_context *ce;
1350 struct igt_live_test t;
1351 I915_RND_STATE(prng);
1352 IGT_TIMEOUT(end_time);
1353 LIST_HEAD(objects);
1354 struct file *file;
1355 int err = -ENODEV;
1356
1357 /*
1358 * Create a few read-only objects (with the occasional writable object)
1359 * and try to write into these object checking that the GPU discards
1360 * any write to a read-only object.
1361 */
1362
1363 file = mock_file(i915);
1364 if (IS_ERR(file))
1365 return PTR_ERR(file);
1366
1367 err = igt_live_test_begin(&t, i915, __func__, "");
1368 if (err)
1369 goto out_file;
1370
1371 ctx = live_context(i915, file);
1372 if (IS_ERR(ctx)) {
1373 err = PTR_ERR(ctx);
1374 goto out_file;
1375 }
1376
1377 vm = ctx->vm ?: &i915->ggtt.alias->vm;
1378 if (!vm || !vm->has_read_only) {
1379 err = 0;
1380 goto out_file;
1381 }
1382
1383 num_engines = 0;
1384 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1385 if (intel_engine_can_store_dword(ce->engine))
1386 num_engines++;
1387 i915_gem_context_unlock_engines(ctx);
1388
1389 ndwords = 0;
1390 dw = 0;
1391 while (!time_after(jiffies, end_time)) {
1392 for_each_gem_engine(ce,
1393 i915_gem_context_lock_engines(ctx), it) {
1394 if (!intel_engine_can_store_dword(ce->engine))
1395 continue;
1396
1397 if (!obj) {
1398 obj = create_test_object(ce->vm, file, &objects);
1399 if (IS_ERR(obj)) {
1400 err = PTR_ERR(obj);
1401 i915_gem_context_unlock_engines(ctx);
1402 goto out_file;
1403 }
1404
1405 if (prandom_u32_state(&prng) & 1)
1406 i915_gem_object_set_readonly(obj);
1407 }
1408
1409 err = gpu_fill(ce, obj, dw);
1410 if (err) {
1411 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1412 ndwords, dw, max_dwords(obj),
1413 ce->engine->name,
1414 yesno(i915_gem_context_has_full_ppgtt(ctx)),
1415 err);
1416 i915_gem_context_unlock_engines(ctx);
1417 goto out_file;
1418 }
1419
1420 err = throttle(ce, tq, ARRAY_SIZE(tq));
1421 if (err) {
1422 i915_gem_context_unlock_engines(ctx);
1423 goto out_file;
1424 }
1425
1426 if (++dw == max_dwords(obj)) {
1427 obj = NULL;
1428 dw = 0;
1429 }
1430 ndwords++;
1431 }
1432 i915_gem_context_unlock_engines(ctx);
1433 }
1434 pr_info("Submitted %lu dwords (across %lu engines)\n",
1435 ndwords, num_engines);
1436
1437 dw = 0;
1438 idx = 0;
1439 list_for_each_entry(obj, &objects, st_link) {
1440 unsigned int rem =
1441 min_t(unsigned int, ndwords - dw, max_dwords(obj));
1442 unsigned int num_writes;
1443
1444 num_writes = rem;
1445 if (i915_gem_object_is_readonly(obj))
1446 num_writes = 0;
1447
1448 err = cpu_check(obj, idx++, num_writes);
1449 if (err)
1450 break;
1451
1452 dw += rem;
1453 }
1454
1455 out_file:
1456 throttle_release(tq, ARRAY_SIZE(tq));
1457 if (igt_live_test_end(&t))
1458 err = -EIO;
1459
1460 fput(file);
1461 return err;
1462 }
1463
check_scratch(struct i915_address_space * vm,u64 offset)1464 static int check_scratch(struct i915_address_space *vm, u64 offset)
1465 {
1466 struct drm_mm_node *node;
1467
1468 mutex_lock(&vm->mutex);
1469 node = __drm_mm_interval_first(&vm->mm,
1470 offset, offset + sizeof(u32) - 1);
1471 mutex_unlock(&vm->mutex);
1472 if (!node || node->start > offset)
1473 return 0;
1474
1475 GEM_BUG_ON(offset >= node->start + node->size);
1476
1477 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1478 upper_32_bits(offset), lower_32_bits(offset));
1479 return -EINVAL;
1480 }
1481
write_to_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,u64 offset,u32 value)1482 static int write_to_scratch(struct i915_gem_context *ctx,
1483 struct intel_engine_cs *engine,
1484 u64 offset, u32 value)
1485 {
1486 struct drm_i915_private *i915 = ctx->i915;
1487 struct drm_i915_gem_object *obj;
1488 struct i915_address_space *vm;
1489 struct i915_request *rq;
1490 struct i915_vma *vma;
1491 u32 *cmd;
1492 int err;
1493
1494 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1495
1496 err = check_scratch(ctx->vm, offset);
1497 if (err)
1498 return err;
1499
1500 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1501 if (IS_ERR(obj))
1502 return PTR_ERR(obj);
1503
1504 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1505 if (IS_ERR(cmd)) {
1506 err = PTR_ERR(cmd);
1507 goto out;
1508 }
1509
1510 *cmd++ = MI_STORE_DWORD_IMM_GEN4;
1511 if (GRAPHICS_VER(i915) >= 8) {
1512 *cmd++ = lower_32_bits(offset);
1513 *cmd++ = upper_32_bits(offset);
1514 } else {
1515 *cmd++ = 0;
1516 *cmd++ = offset;
1517 }
1518 *cmd++ = value;
1519 *cmd = MI_BATCH_BUFFER_END;
1520 __i915_gem_object_flush_map(obj, 0, 64);
1521 i915_gem_object_unpin_map(obj);
1522
1523 intel_gt_chipset_flush(engine->gt);
1524
1525 vm = i915_gem_context_get_eb_vm(ctx);
1526 vma = i915_vma_instance(obj, vm, NULL);
1527 if (IS_ERR(vma)) {
1528 err = PTR_ERR(vma);
1529 goto out_vm;
1530 }
1531
1532 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1533 if (err)
1534 goto out_vm;
1535
1536 rq = igt_request_alloc(ctx, engine);
1537 if (IS_ERR(rq)) {
1538 err = PTR_ERR(rq);
1539 goto err_unpin;
1540 }
1541
1542 i915_vma_lock(vma);
1543 err = i915_request_await_object(rq, vma->obj, false);
1544 if (err == 0)
1545 err = i915_vma_move_to_active(vma, rq, 0);
1546 i915_vma_unlock(vma);
1547 if (err)
1548 goto skip_request;
1549
1550 if (rq->engine->emit_init_breadcrumb) {
1551 err = rq->engine->emit_init_breadcrumb(rq);
1552 if (err)
1553 goto skip_request;
1554 }
1555
1556 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1557 if (err)
1558 goto skip_request;
1559
1560 i915_vma_unpin(vma);
1561
1562 i915_request_add(rq);
1563
1564 goto out_vm;
1565 skip_request:
1566 i915_request_set_error_once(rq, err);
1567 i915_request_add(rq);
1568 err_unpin:
1569 i915_vma_unpin(vma);
1570 out_vm:
1571 i915_vm_put(vm);
1572 out:
1573 i915_gem_object_put(obj);
1574 return err;
1575 }
1576
read_from_scratch(struct i915_gem_context * ctx,struct intel_engine_cs * engine,u64 offset,u32 * value)1577 static int read_from_scratch(struct i915_gem_context *ctx,
1578 struct intel_engine_cs *engine,
1579 u64 offset, u32 *value)
1580 {
1581 struct drm_i915_private *i915 = ctx->i915;
1582 struct drm_i915_gem_object *obj;
1583 struct i915_address_space *vm;
1584 const u32 result = 0x100;
1585 struct i915_request *rq;
1586 struct i915_vma *vma;
1587 unsigned int flags;
1588 u32 *cmd;
1589 int err;
1590
1591 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1592
1593 err = check_scratch(ctx->vm, offset);
1594 if (err)
1595 return err;
1596
1597 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1598 if (IS_ERR(obj))
1599 return PTR_ERR(obj);
1600
1601 if (GRAPHICS_VER(i915) >= 8) {
1602 const u32 GPR0 = engine->mmio_base + 0x600;
1603
1604 vm = i915_gem_context_get_eb_vm(ctx);
1605 vma = i915_vma_instance(obj, vm, NULL);
1606 if (IS_ERR(vma)) {
1607 err = PTR_ERR(vma);
1608 goto out_vm;
1609 }
1610
1611 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1612 if (err)
1613 goto out_vm;
1614
1615 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1616 if (IS_ERR(cmd)) {
1617 err = PTR_ERR(cmd);
1618 goto out;
1619 }
1620
1621 memset(cmd, POISON_INUSE, PAGE_SIZE);
1622 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1623 *cmd++ = GPR0;
1624 *cmd++ = lower_32_bits(offset);
1625 *cmd++ = upper_32_bits(offset);
1626 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1627 *cmd++ = GPR0;
1628 *cmd++ = result;
1629 *cmd++ = 0;
1630 *cmd = MI_BATCH_BUFFER_END;
1631
1632 i915_gem_object_flush_map(obj);
1633 i915_gem_object_unpin_map(obj);
1634
1635 flags = 0;
1636 } else {
1637 const u32 reg = engine->mmio_base + 0x420;
1638
1639 /* hsw: register access even to 3DPRIM! is protected */
1640 vm = i915_vm_get(&engine->gt->ggtt->vm);
1641 vma = i915_vma_instance(obj, vm, NULL);
1642 if (IS_ERR(vma)) {
1643 err = PTR_ERR(vma);
1644 goto out_vm;
1645 }
1646
1647 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1648 if (err)
1649 goto out_vm;
1650
1651 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1652 if (IS_ERR(cmd)) {
1653 err = PTR_ERR(cmd);
1654 goto out;
1655 }
1656
1657 memset(cmd, POISON_INUSE, PAGE_SIZE);
1658 *cmd++ = MI_LOAD_REGISTER_MEM;
1659 *cmd++ = reg;
1660 *cmd++ = offset;
1661 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1662 *cmd++ = reg;
1663 *cmd++ = vma->node.start + result;
1664 *cmd = MI_BATCH_BUFFER_END;
1665
1666 i915_gem_object_flush_map(obj);
1667 i915_gem_object_unpin_map(obj);
1668
1669 flags = I915_DISPATCH_SECURE;
1670 }
1671
1672 intel_gt_chipset_flush(engine->gt);
1673
1674 rq = igt_request_alloc(ctx, engine);
1675 if (IS_ERR(rq)) {
1676 err = PTR_ERR(rq);
1677 goto err_unpin;
1678 }
1679
1680 i915_vma_lock(vma);
1681 err = i915_request_await_object(rq, vma->obj, true);
1682 if (err == 0)
1683 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1684 i915_vma_unlock(vma);
1685 if (err)
1686 goto skip_request;
1687
1688 if (rq->engine->emit_init_breadcrumb) {
1689 err = rq->engine->emit_init_breadcrumb(rq);
1690 if (err)
1691 goto skip_request;
1692 }
1693
1694 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1695 if (err)
1696 goto skip_request;
1697
1698 i915_vma_unpin(vma);
1699
1700 i915_request_add(rq);
1701
1702 i915_gem_object_lock(obj, NULL);
1703 err = i915_gem_object_set_to_cpu_domain(obj, false);
1704 i915_gem_object_unlock(obj);
1705 if (err)
1706 goto out_vm;
1707
1708 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1709 if (IS_ERR(cmd)) {
1710 err = PTR_ERR(cmd);
1711 goto out_vm;
1712 }
1713
1714 *value = cmd[result / sizeof(*cmd)];
1715 i915_gem_object_unpin_map(obj);
1716
1717 goto out_vm;
1718 skip_request:
1719 i915_request_set_error_once(rq, err);
1720 i915_request_add(rq);
1721 err_unpin:
1722 i915_vma_unpin(vma);
1723 out_vm:
1724 i915_vm_put(vm);
1725 out:
1726 i915_gem_object_put(obj);
1727 return err;
1728 }
1729
check_scratch_page(struct i915_gem_context * ctx,u32 * out)1730 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1731 {
1732 struct i915_address_space *vm;
1733 u32 *vaddr;
1734 int err = 0;
1735
1736 vm = ctx->vm;
1737 if (!vm)
1738 return -ENODEV;
1739
1740 if (!vm->scratch[0]) {
1741 pr_err("No scratch page!\n");
1742 return -EINVAL;
1743 }
1744
1745 vaddr = __px_vaddr(vm->scratch[0]);
1746
1747 memcpy(out, vaddr, sizeof(*out));
1748 if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1749 pr_err("Inconsistent initial state of scratch page!\n");
1750 err = -EINVAL;
1751 }
1752
1753 return err;
1754 }
1755
igt_vm_isolation(void * arg)1756 static int igt_vm_isolation(void *arg)
1757 {
1758 struct drm_i915_private *i915 = arg;
1759 struct i915_gem_context *ctx_a, *ctx_b;
1760 unsigned long num_engines, count;
1761 struct intel_engine_cs *engine;
1762 struct igt_live_test t;
1763 I915_RND_STATE(prng);
1764 struct file *file;
1765 u64 vm_total;
1766 u32 expected;
1767 int err;
1768
1769 if (GRAPHICS_VER(i915) < 7)
1770 return 0;
1771
1772 /*
1773 * The simple goal here is that a write into one context is not
1774 * observed in a second (separate page tables and scratch).
1775 */
1776
1777 file = mock_file(i915);
1778 if (IS_ERR(file))
1779 return PTR_ERR(file);
1780
1781 err = igt_live_test_begin(&t, i915, __func__, "");
1782 if (err)
1783 goto out_file;
1784
1785 ctx_a = live_context(i915, file);
1786 if (IS_ERR(ctx_a)) {
1787 err = PTR_ERR(ctx_a);
1788 goto out_file;
1789 }
1790
1791 ctx_b = live_context(i915, file);
1792 if (IS_ERR(ctx_b)) {
1793 err = PTR_ERR(ctx_b);
1794 goto out_file;
1795 }
1796
1797 /* We can only test vm isolation, if the vm are distinct */
1798 if (ctx_a->vm == ctx_b->vm)
1799 goto out_file;
1800
1801 /* Read the initial state of the scratch page */
1802 err = check_scratch_page(ctx_a, &expected);
1803 if (err)
1804 goto out_file;
1805
1806 err = check_scratch_page(ctx_b, &expected);
1807 if (err)
1808 goto out_file;
1809
1810 vm_total = ctx_a->vm->total;
1811 GEM_BUG_ON(ctx_b->vm->total != vm_total);
1812
1813 count = 0;
1814 num_engines = 0;
1815 for_each_uabi_engine(engine, i915) {
1816 IGT_TIMEOUT(end_time);
1817 unsigned long this = 0;
1818
1819 if (!intel_engine_can_store_dword(engine))
1820 continue;
1821
1822 /* Not all engines have their own GPR! */
1823 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1824 continue;
1825
1826 while (!__igt_timeout(end_time, NULL)) {
1827 u32 value = 0xc5c5c5c5;
1828 u64 offset;
1829
1830 /* Leave enough space at offset 0 for the batch */
1831 offset = igt_random_offset(&prng,
1832 I915_GTT_PAGE_SIZE, vm_total,
1833 sizeof(u32), alignof_dword);
1834
1835 err = write_to_scratch(ctx_a, engine,
1836 offset, 0xdeadbeef);
1837 if (err == 0)
1838 err = read_from_scratch(ctx_b, engine,
1839 offset, &value);
1840 if (err)
1841 goto out_file;
1842
1843 if (value != expected) {
1844 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1845 engine->name, value,
1846 upper_32_bits(offset),
1847 lower_32_bits(offset),
1848 this);
1849 err = -EINVAL;
1850 goto out_file;
1851 }
1852
1853 this++;
1854 }
1855 count += this;
1856 num_engines++;
1857 }
1858 pr_info("Checked %lu scratch offsets across %lu engines\n",
1859 count, num_engines);
1860
1861 out_file:
1862 if (igt_live_test_end(&t))
1863 err = -EIO;
1864 fput(file);
1865 return err;
1866 }
1867
i915_gem_context_live_selftests(struct drm_i915_private * i915)1868 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1869 {
1870 static const struct i915_subtest tests[] = {
1871 SUBTEST(live_nop_switch),
1872 SUBTEST(live_parallel_switch),
1873 SUBTEST(igt_ctx_exec),
1874 SUBTEST(igt_ctx_readonly),
1875 SUBTEST(igt_ctx_sseu),
1876 SUBTEST(igt_shared_ctx_exec),
1877 SUBTEST(igt_vm_isolation),
1878 };
1879
1880 if (intel_gt_is_wedged(&i915->gt))
1881 return 0;
1882
1883 return i915_live_subtests(tests, i915);
1884 }
1885