diff --git a/patch/kernel/archive/uefi-loong64-6.16/0001-drm-xe-bo-fix-alignment-with-non-4KiB-kernel-page-si.patch b/patch/kernel/archive/uefi-loong64-6.16/0001-drm-xe-bo-fix-alignment-with-non-4KiB-kernel-page-si.patch new file mode 100644 index 0000000000..066c33b2ed --- /dev/null +++ b/patch/kernel/archive/uefi-loong64-6.16/0001-drm-xe-bo-fix-alignment-with-non-4KiB-kernel-page-si.patch @@ -0,0 +1,137 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Mingcong Bai +Date: Wed, 23 Jul 2025 16:45:13 +0900 +Subject: drm/xe/bo: fix alignment with non-4KiB kernel page sizes + +The bo/ttm interfaces with kernel memory mapping from dedicated GPU +memory. It is not correct to assume that SZ_4K would suffice for page +alignment as there are a few hardware platforms that commonly uses non- +4KiB pages - for instance, 16KiB is the most commonly used kernel page +size used on Loongson devices (of the LoongArch architecture). + +Per our testing, Intel Xe/Alchemist/Battlemage families of GPUs works on +Loongson platforms so long as "Above 4G Decoding" was enabled and +"Resizable BAR" was set to auto in the UEFI firmware settings. + +Without this fix, the kernel will hang at a kernel BUG(): + +[ 7.425445] ------------[ cut here ]------------ +[ 7.430032] kernel BUG at drivers/gpu/drm/drm_gem.c:181! +[ 7.435330] Oops - BUG[#1]: +[ 7.438099] CPU: 0 UID: 0 PID: 102 Comm: kworker/0:4 Tainted: G E 6.13.3-aosc-main-00336-g60829239b300-dirty #3 +[ 7.449511] Tainted: [E]=UNSIGNED_MODULE +[ 7.453402] Hardware name: Loongson Loongson-3A6000-HV-7A2000-1w-V0.1-EVB/Loongson-3A6000-HV-7A2000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V4.0.05756-prestab +[ 7.467144] Workqueue: events work_for_cpu_fn +[ 7.471472] pc 9000000001045fa4 ra ffff8000025331dc tp 90000001010c8000 sp 90000001010cb960 +[ 7.479770] a0 900000012a3e8000 a1 900000010028c000 a2 000000000005d000 a3 0000000000000000 +[ 7.488069] a4 0000000000000000 a5 0000000000000000 a6 0000000000000000 a7 0000000000000001 +[ 7.496367] t0 0000000000001000 t1 9000000001045000 t2 0000000000000000 t3 0000000000000000 +[ 7.504665] t4 0000000000000000 t5 0000000000000000 t6 0000000000000000 t7 0000000000000000 +[ 7.504667] t8 0000000000000000 u0 90000000029ea7d8 s9 900000012a3e9360 s0 900000010028c000 +[ 7.504668] s1 ffff800002744000 s2 0000000000000000 s3 0000000000000000 s4 0000000000000001 +[ 7.504669] s5 900000012a3e8000 s6 0000000000000001 s7 0000000000022022 s8 0000000000000000 +[ 7.537855] ra: ffff8000025331dc ___xe_bo_create_locked+0x158/0x3b0 [xe] +[ 7.544893] ERA: 9000000001045fa4 drm_gem_private_object_init+0xcc/0xd0 +[ 7.551639] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) +[ 7.557785] PRMD: 00000004 (PPLV0 +PIE -PWE) +[ 7.562111] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) +[ 7.566870] ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) +[ 7.571628] ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) +[ 7.577163] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) +[ 7.583128] Modules linked in: xe(E+) drm_gpuvm(E) drm_exec(E) drm_buddy(E) gpu_sched(E) drm_suballoc_helper(E) drm_display_helper(E) loongson(E) r8169(E) cec(E) rc_core(E) realtek(E) i2c_algo_bit(E) tpm_tis_spi(E) led_class(E) hid_generic(E) drm_ttm_helper(E) ttm(E) drm_client_lib(E) drm_kms_helper(E) sunrpc(E) la_ow_syscall(E) i2c_dev(E) +[ 7.613049] Process kworker/0:4 (pid: 102, threadinfo=00000000bc26ebd1, task=0000000055480707) +[ 7.621606] Stack : 0000000000000000 3030303a6963702b 000000000005d000 0000000000000000 +[ 7.629563] 0000000000000001 0000000000000000 0000000000000000 8e1bfae42b2f7877 +[ 7.637519] 000000000005d000 900000012a3e8000 900000012a3e9360 0000000000000000 +[ 7.645475] ffffffffffffffff 0000000000000000 0000000000022022 0000000000000000 +[ 7.653431] 0000000000000001 ffff800002533660 0000000000022022 9000000000234470 +[ 7.661386] 90000001010cba28 0000000000001000 0000000000000000 000000000005c300 +[ 7.669342] 900000012a3e8000 0000000000000000 0000000000000001 900000012a3e8000 +[ 7.677298] ffffffffffffffff 0000000000022022 900000012a3e9498 ffff800002533a14 +[ 7.685254] 0000000000022022 0000000000000000 900000000209c000 90000000010589e0 +[ 7.693209] 90000001010cbab8 ffff8000027c78c0 fffffffffffff000 900000012a3e8000 +[ 7.701165] ... +[ 7.703588] Call Trace: +[ 7.703590] [<9000000001045fa4>] drm_gem_private_object_init+0xcc/0xd0 +[ 7.712496] [] ___xe_bo_create_locked+0x154/0x3b0 [xe] +[ 7.719268] [] __xe_bo_create_locked+0x228/0x304 [xe] +[ 7.725951] [] xe_bo_create_pin_map_at_aligned+0x70/0x1b0 [xe] +[ 7.733410] [] xe_managed_bo_create_pin_map+0x34/0xcc [xe] +[ 7.740522] [] xe_managed_bo_create_from_data+0x44/0xb0 [xe] +[ 7.747807] [] xe_uc_fw_init+0x3ec/0x904 [xe] +[ 7.753814] [] xe_guc_init+0x30/0x3dc [xe] +[ 7.759553] [] xe_uc_init+0x20/0xf0 [xe] +[ 7.765121] [] xe_gt_init_hwconfig+0x5c/0xd0 [xe] +[ 7.771461] [] xe_device_probe+0x240/0x588 [xe] +[ 7.777627] [] xe_pci_probe+0x6c0/0xa6c [xe] +[ 7.783540] [<9000000000e9828c>] local_pci_probe+0x4c/0xb4 +[ 7.788989] [<90000000002aa578>] work_for_cpu_fn+0x20/0x40 +[ 7.794436] [<90000000002aeb50>] process_one_work+0x1a4/0x458 +[ 7.800143] [<90000000002af5a0>] worker_thread+0x304/0x3fc +[ 7.805591] [<90000000002bacac>] kthread+0x114/0x138 +[ 7.810520] [<9000000000241f64>] ret_from_kernel_thread+0x8/0xa4 +[ 7.816489] +[ 7.817961] Code: 4c000020 29c3e2f9 53ff93ff <002a0001> 0015002c 03400000 02ff8063 29c04077 001500f7 +[ 7.827651] +[ 7.829140] ---[ end trace 0000000000000000 ]--- + +Revise all instances of `SZ_4K' with `PAGE_SIZE' and revise the call to +`drm_gem_private_object_init()' in `*___xe_bo_create_locked()' (last call +before BUG()) to use `size_t aligned_size' calculated from `PAGE_SIZE' to +fix the above error. + +Cc: +Fixes: 4e03b584143e ("drm/xe/uapi: Reject bo creation of unaligned size") +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Tested-by: Mingcong Bai +Tested-by: Wenbin Fang +Tested-by: Haien Liang <27873200@qq.com> +Tested-by: Jianfeng Liu +Tested-by: Shirong Liu +Tested-by: Haofeng Wu +Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3ddb6dca3f28223360 +Link: https://t.me/c/1109254909/768552 +Co-developed-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Mingcong Bai +--- + drivers/gpu/drm/xe/xe_bo.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_bo.c ++++ b/drivers/gpu/drm/xe/xe_bo.c +@@ -1841,9 +1841,9 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + flags |= XE_BO_FLAG_INTERNAL_64K; + alignment = align >> PAGE_SHIFT; + } else { +- aligned_size = ALIGN(size, SZ_4K); ++ aligned_size = ALIGN(size, PAGE_SIZE); + flags &= ~XE_BO_FLAG_INTERNAL_64K; +- alignment = SZ_4K >> PAGE_SHIFT; ++ alignment = PAGE_SIZE >> PAGE_SHIFT; + } + + if (type == ttm_bo_type_device && aligned_size != size) +@@ -1857,7 +1857,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + + bo->ccs_cleared = false; + bo->tile = tile; +- bo->size = size; ++ bo->size = aligned_size; + bo->flags = flags; + bo->cpu_caching = cpu_caching; + bo->ttm.base.funcs = &xe_gem_object_funcs; +@@ -1868,7 +1868,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, + #endif + INIT_LIST_HEAD(&bo->vram_userfault_link); + +- drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); ++ drm_gem_private_object_init(&xe->drm, &bo->ttm.base, aligned_size); + + if (resv) { + ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT); +-- +Armbian + diff --git a/patch/kernel/archive/uefi-loong64-6.16/0002-drm-xe-guc-use-GUC_SIZE-SZ_4K-for-alignment.patch b/patch/kernel/archive/uefi-loong64-6.16/0002-drm-xe-guc-use-GUC_SIZE-SZ_4K-for-alignment.patch new file mode 100644 index 0000000000..b4271d7995 --- /dev/null +++ b/patch/kernel/archive/uefi-loong64-6.16/0002-drm-xe-guc-use-GUC_SIZE-SZ_4K-for-alignment.patch @@ -0,0 +1,325 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Mingcong Bai +Date: Wed, 23 Jul 2025 16:45:14 +0900 +Subject: drm/xe/guc: use GUC_SIZE (SZ_4K) for alignment + +Per the "Firmware" chapter in "drm/xe Intel GFX Driver", as well as +"Volume 8: Command Stream Programming" in "Intel(r) Arc(tm) A-Series Graphics +and Intel Data Center GPU Flex Series Open-Source Programmer's Reference +Manual For the discrete GPUs code named "Alchemist" and "Arctic Sound-M"" +and "Intel(r) Iris(r) Xe MAX Graphics Open Source Programmer's Reference +Manual For the 2020 Discrete GPU formerly named "DG1"": + + "The RINGBUF register sets (defined in Memory Interface Registers) are + used to specify the ring buffer memory areas. The ring buffer must start + on a 4KB boundary and be allocated in linear memory. The length of any + one ring buffer is limited to 2MB." + +The Graphics micro (m) Controller (GuC) really expects command buffers +aligned to 4KiB boundaries. + +Current implementation uses `PAGE_SIZE' as an assumed alignment reference +but 4KiB kernel page sizes is by no means a guarantee. On 16KiB-paged +kernels, this causes driver failures after loading the GuC firmware: + +[ 7.398317] xe 0000:09:00.0: [drm] Found dg2/g10 (device ID 56a1) display version 13.00 stepping C0 +[ 7.410429] xe 0000:09:00.0: [drm] Using GuC firmware from i915/dg2_guc_70.bin version 70.36.0 +[ 10.719989] xe 0000:09:00.0: [drm] *ERROR* GT0: load failed: status = 0x800001EC, time = 3297ms, freq = 2400MHz (req 2400MHz), done = 0 +[ 10.732106] xe 0000:09:00.0: [drm] *ERROR* GT0: load failed: status: Reset = 0, BootROM = 0x76, UKernel = 0x01, MIA = 0x00, Auth = 0x02 +[ 10.744214] xe 0000:09:00.0: [drm] *ERROR* CRITICAL: Xe has declared device 0000:09:00.0 as wedged. + Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new +[ 10.828908] xe 0000:09:00.0: [drm] *ERROR* GT0: GuC mmio request 0x4100: no reply 0x4100 + +Correct this by defining `GUC_ALIGN' as `SZ_4K' in accordance with the +references above, and revising all instances of `PAGE_SIZE' as +`GUC_ALIGN'. Then, revise `PAGE_ALIGN()' calls as `ALIGN()' with +`GUC_ALIGN' as their second argument (overriding `PAGE_SIZE'). + +Cc: stable@vger.kernel.org +Fixes: 84d15f426110 ("drm/xe/guc: Add capture size check in GuC log buffer") +Fixes: 9c8c7a7e6f1f ("drm/xe/guc: Prepare GuC register list and update ADS size for error capture") +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Tested-by: Mingcong Bai +Tested-by: Wenbin Fang +Tested-by: Haien Liang <27873200@qq.com> +Tested-by: Jianfeng Liu +Tested-by: Shirong Liu +Tested-by: Haofeng Wu +Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3ddb6dca3f28223360 +Link: https://t.me/c/1109254909/768552 +Co-developed-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Mingcong Bai +--- + drivers/gpu/drm/xe/xe_guc.c | 4 +- + drivers/gpu/drm/xe/xe_guc.h | 3 + + drivers/gpu/drm/xe/xe_guc_ads.c | 32 +++++----- + drivers/gpu/drm/xe/xe_guc_capture.c | 8 +-- + drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- + drivers/gpu/drm/xe/xe_guc_log.c | 5 +- + drivers/gpu/drm/xe/xe_guc_pc.c | 4 +- + 7 files changed, 31 insertions(+), 27 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc.c ++++ b/drivers/gpu/drm/xe/xe_guc.c +@@ -90,7 +90,7 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc) + + static u32 guc_ctl_log_params_flags(struct xe_guc *guc) + { +- u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT; ++ u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> XE_PTE_SHIFT; + u32 flags; + + #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) +@@ -143,7 +143,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) + + static u32 guc_ctl_ads_flags(struct xe_guc *guc) + { +- u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; ++ u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> XE_PTE_SHIFT; + u32 flags = ads << GUC_ADS_ADDR_SHIFT; + + return flags; +diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc.h ++++ b/drivers/gpu/drm/xe/xe_guc.h +@@ -23,6 +23,9 @@ + #define GUC_FIRMWARE_VER(guc) \ + MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + ++/* GuC really expects command buffers aligned to 4K boundaries. */ ++#define GUC_ALIGN SZ_4K ++ + struct drm_printer; + + void xe_guc_comm_init_early(struct xe_guc *guc); +diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc_ads.c ++++ b/drivers/gpu/drm/xe/xe_guc_ads.c +@@ -143,17 +143,17 @@ static size_t guc_ads_regset_size(struct xe_guc_ads *ads) + + static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) + { +- return PAGE_ALIGN(ads->golden_lrc_size); ++ return ALIGN(ads->golden_lrc_size, GUC_ALIGN); + } + + static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) + { +- return PAGE_ALIGN(ads->ads_waklv_size); ++ return ALIGN(ads->ads_waklv_size, GUC_ALIGN); + } + + static size_t guc_ads_capture_size(struct xe_guc_ads *ads) + { +- return PAGE_ALIGN(ads->capture_size); ++ return ALIGN(ads->capture_size, GUC_ALIGN); + } + + static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) +@@ -168,7 +168,7 @@ static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads) + + static size_t guc_ads_private_data_size(struct xe_guc_ads *ads) + { +- return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size); ++ return ALIGN(ads_to_guc(ads)->fw.private_data_size, GUC_ALIGN); + } + + static size_t guc_ads_regset_offset(struct xe_guc_ads *ads) +@@ -183,7 +183,7 @@ static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) + offset = guc_ads_regset_offset(ads) + + guc_ads_regset_size(ads); + +- return PAGE_ALIGN(offset); ++ return ALIGN(offset, GUC_ALIGN); + } + + static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) +@@ -193,7 +193,7 @@ static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + +- return PAGE_ALIGN(offset); ++ return ALIGN(offset, GUC_ALIGN); + } + + static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) +@@ -203,7 +203,7 @@ static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) + offset = guc_ads_waklv_offset(ads) + + guc_ads_waklv_size(ads); + +- return PAGE_ALIGN(offset); ++ return ALIGN(offset, GUC_ALIGN); + } + + static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) +@@ -213,7 +213,7 @@ static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads) + offset = guc_ads_capture_offset(ads) + + guc_ads_capture_size(ads); + +- return PAGE_ALIGN(offset); ++ return ALIGN(offset, GUC_ALIGN); + } + + static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) +@@ -223,7 +223,7 @@ static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads) + offset = guc_ads_um_queues_offset(ads) + + guc_ads_um_queues_size(ads); + +- return PAGE_ALIGN(offset); ++ return ALIGN(offset, GUC_ALIGN); + } + + static size_t guc_ads_size(struct xe_guc_ads *ads) +@@ -276,7 +276,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) + continue; + + real_size = xe_gt_lrc_size(gt, class); +- alloc_size = PAGE_ALIGN(real_size); ++ alloc_size = ALIGN(real_size, GUC_ALIGN); + total_size += alloc_size; + } + +@@ -646,12 +646,12 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads) + offsetof(struct __guc_ads_blob, system_info)); + + /* first, set aside the first page for a capture_list with zero descriptors */ +- total_size = PAGE_SIZE; ++ total_size = GUC_ALIGN; + if (!xe_guc_capture_getnullheader(guc, &ptr, &size)) + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size); + + null_ggtt = ads_ggtt + capture_offset; +- capture_offset += PAGE_SIZE; ++ capture_offset += GUC_ALIGN; + + /* + * Populate capture list : at this point adps is already allocated and +@@ -715,10 +715,10 @@ static int guc_capture_prep_lists(struct xe_guc_ads *ads) + } + } + +- if (ads->capture_size != PAGE_ALIGN(total_size)) ++ if (ads->capture_size != ALIGN(total_size, GUC_ALIGN)) + xe_gt_dbg(gt, "Updated ADS capture size %d (was %d)\n", +- PAGE_ALIGN(total_size), ads->capture_size); +- return PAGE_ALIGN(total_size); ++ ALIGN(total_size, GUC_ALIGN), ads->capture_size); ++ return ALIGN(total_size, GUC_ALIGN); + } + + static void guc_mmio_regset_write_one(struct xe_guc_ads *ads, +@@ -966,7 +966,7 @@ static void guc_golden_lrc_populate(struct xe_guc_ads *ads) + xe_gt_assert(gt, gt->default_lrc[class]); + + real_size = xe_gt_lrc_size(gt, class); +- alloc_size = PAGE_ALIGN(real_size); ++ alloc_size = ALIGN(real_size, GUC_ALIGN); + total_size += alloc_size; + + xe_map_memcpy_to(xe, ads_to_map(ads), offset, +diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc_capture.c ++++ b/drivers/gpu/drm/xe/xe_guc_capture.c +@@ -591,8 +591,8 @@ guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type, + return -ENODATA; + + if (size) +- *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + +- (num_regs * sizeof(struct guc_mmio_reg))); ++ *size = ALIGN((sizeof(struct guc_debug_capture_list)) + ++ (num_regs * sizeof(struct guc_mmio_reg)), GUC_ALIGN); + + return 0; + } +@@ -739,7 +739,7 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc) + * sequence, that is, during the pre-hwconfig phase before we have + * the exact engine fusing info. + */ +- total_size = PAGE_SIZE; /* Pad a page in front for empty lists */ ++ total_size = GUC_ALIGN; /* Pad a page in front for empty lists */ + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { + for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) { + if (xe_guc_capture_getlistsize(guc, i, +@@ -759,7 +759,7 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc) + total_size += global_size; + } + +- return PAGE_ALIGN(total_size); ++ return ALIGN(total_size, GUC_ALIGN); + } + + static int guc_capture_output_size_est(struct xe_guc *guc) +diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc_ct.c ++++ b/drivers/gpu/drm/xe/xe_guc_ct.c +@@ -214,7 +214,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) + struct xe_bo *bo; + int err; + +- xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); ++ xe_gt_assert(gt, !(guc_ct_size() % GUC_ALIGN)); + + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM); + if (!ct->g2h_wq) +diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc_log.c ++++ b/drivers/gpu/drm/xe/xe_guc_log.c +@@ -15,6 +15,7 @@ + #include "xe_force_wake.h" + #include "xe_gt.h" + #include "xe_gt_printk.h" ++#include "xe_guc.h" + #include "xe_map.h" + #include "xe_mmio.h" + #include "xe_module.h" +@@ -58,7 +59,7 @@ static size_t guc_log_size(void) + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ +- return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + ++ return GUC_ALIGN + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + + CAPTURE_BUFFER_SIZE; + } + +@@ -328,7 +329,7 @@ u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type + u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type) + { + enum guc_log_buffer_type i; +- u32 offset = PAGE_SIZE;/* for the log_buffer_states */ ++ u32 offset = GUC_ALIGN; /* for the log_buffer_states */ + + for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) { + if (i == type) +diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_guc_pc.c ++++ b/drivers/gpu/drm/xe/xe_guc_pc.c +@@ -1190,7 +1190,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) + { + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); +- u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); ++ u32 size = ALIGN(sizeof(struct slpc_shared_data), GUC_ALIGN); + unsigned int fw_ref; + ktime_t earlier; + int ret; +@@ -1318,7 +1318,7 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; +- u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); ++ u32 size = ALIGN(sizeof(struct slpc_shared_data), GUC_ALIGN); + int err; + + if (xe->info.skip_guc_pc) +-- +Armbian + diff --git a/patch/kernel/archive/uefi-loong64-6.16/0003-drm-xe-regs-fix-RING_CTL_SIZE-size-calculation.patch b/patch/kernel/archive/uefi-loong64-6.16/0003-drm-xe-regs-fix-RING_CTL_SIZE-size-calculation.patch new file mode 100644 index 0000000000..a35d07731d --- /dev/null +++ b/patch/kernel/archive/uefi-loong64-6.16/0003-drm-xe-regs-fix-RING_CTL_SIZE-size-calculation.patch @@ -0,0 +1,98 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Mingcong Bai +Date: Wed, 23 Jul 2025 16:45:15 +0900 +Subject: drm/xe/regs: fix RING_CTL_SIZE(size) calculation + +Similar to the preceding patch for GuC (and with the same references), +Intel GPUs expects command buffers to align to 4KiB boundaries. + +Current code uses `PAGE_SIZE' as an assumed alignment reference but 4KiB +kernel page sizes is by no means a guarantee. On 16KiB-paged kernels, this +causes driver failures during boot up: + +[ 14.018975] ------------[ cut here ]------------ +[ 14.023562] xe 0000:09:00.0: [drm] GT0: Kernel-submitted job timed out +[ 14.030084] WARNING: CPU: 3 PID: 564 at drivers/gpu/drm/xe/xe_guc_submit.c:1181 guc_exec_queue_timedout_job+0x1c0/0xacc [xe] +[ 14.041300] Modules linked in: nf_conntrack_netbios_ns(E) nf_conntrack_broadcast(E) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) nft_chain_nat(E) ip6table_nat(E) ip6table_mangle(E) ip6table_raw(E) ip6table_security(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) rfkill(E) iptable_mangle(E) iptable_raw(E) iptable_security(E) ip_set(E) nf_tables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) snd_hda_codec_conexant(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) nls_iso8859_1(E) snd_hda_intel(E) snd_intel_dspcfg(E) qrtr(E) nls_cp437(E) snd_hda_codec(E) spi_loongson_pci(E) rtc_efi(E) snd_hda_core(E) loongson3_cpufreq(E) spi_loongson_core(E) snd_hwdep(E) snd_pcm(E) snd_timer(E) snd(E) soundcore(E) gpio_loongson_64bit(E) input_leds(E) rtc_loongson(E) i2c_ls2x(E) mousedev(E) sch_fq_codel(E) fuse(E) nfnetlink(E) dmi_sysfs(E) ip_tables(E) x_tables(E) xe(E) d + rm_gpuvm(E) drm_buddy(E) gpu_sched(E) +[ 14.041369] drm_exec(E) drm_suballoc_helper(E) drm_display_helper(E) cec(E) rc_core(E) hid_generic(E) tpm_tis_spi(E) r8169(E) realtek(E) led_class(E) loongson(E) i2c_algo_bit(E) drm_ttm_helper(E) ttm(E) drm_client_lib(E) drm_kms_helper(E) sunrpc(E) i2c_dev(E) +[ 14.153910] CPU: 3 UID: 0 PID: 564 Comm: kworker/u32:2 Tainted: G E 6.14.0-rc4-aosc-main-gbad70b1cd8b0-dirty #7 +[ 14.165325] Tainted: [E]=UNSIGNED_MODULE +[ 14.169220] Hardware name: Loongson Loongson-3A6000-HV-7A2000-1w-V0.1-EVB/Loongson-3A6000-HV-7A2000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V4.0.05756-prestab +[ 14.182970] Workqueue: gt-ordered-wq drm_sched_job_timedout [gpu_sched] +[ 14.189549] pc ffff8000024f3760 ra ffff8000024f3760 tp 900000012f150000 sp 900000012f153ca0 +[ 14.197853] a0 0000000000000000 a1 0000000000000000 a2 0000000000000000 a3 0000000000000000 +[ 14.206156] a4 0000000000000000 a5 0000000000000000 a6 0000000000000000 a7 0000000000000000 +[ 14.214458] t0 0000000000000000 t1 0000000000000000 t2 0000000000000000 t3 0000000000000000 +[ 14.222761] t4 0000000000000000 t5 0000000000000000 t6 0000000000000000 t7 0000000000000000 +[ 14.231064] t8 0000000000000000 u0 900000000195c0c8 s9 900000012e4dcf48 s0 90000001285f3640 +[ 14.239368] s1 90000001004f8000 s2 ffff8000026ec000 s3 0000000000000000 s4 900000012e4dc028 +[ 14.247672] s5 90000001009f5e00 s6 000000000000137e s7 0000000000000001 s8 900000012f153ce8 +[ 14.255975] ra: ffff8000024f3760 guc_exec_queue_timedout_job+0x1c0/0xacc [xe] +[ 14.263379] ERA: ffff8000024f3760 guc_exec_queue_timedout_job+0x1c0/0xacc [xe] +[ 14.270777] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) +[ 14.276927] PRMD: 00000004 (PPLV0 +PIE -PWE) +[ 14.281258] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) +[ 14.286024] ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) +[ 14.290790] ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) +[ 14.296329] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) +[ 14.302299] CPU: 3 UID: 0 PID: 564 Comm: kworker/u32:2 Tainted: G E 6.14.0-rc4-aosc-main-gbad70b1cd8b0-dirty #7 +[ 14.302302] Tainted: [E]=UNSIGNED_MODULE +[ 14.302302] Hardware name: Loongson Loongson-3A6000-HV-7A2000-1w-V0.1-EVB/Loongson-3A6000-HV-7A2000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V4.0.05756-prestab +[ 14.302304] Workqueue: gt-ordered-wq drm_sched_job_timedout [gpu_sched] +[ 14.302307] Stack : 900000012f153928 d84a6232d48f1ac7 900000000023eb34 900000012f150000 +[ 14.302310] 900000012f153900 0000000000000000 900000012f153908 9000000001c31c70 +[ 14.302313] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 14.302315] 0000000000000000 d84a6232d48f1ac7 0000000000000000 0000000000000000 +[ 14.302318] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 14.302320] 0000000000000000 0000000000000000 00000000072b4000 900000012e4dcf48 +[ 14.302323] 9000000001eb8000 0000000000000000 9000000001c31c70 0000000000000004 +[ 14.302325] 0000000000000004 0000000000000000 000000000000137e 0000000000000001 +[ 14.302328] 900000012f153ce8 9000000001c31c70 9000000000244174 0000555581840b98 +[ 14.302331] 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d +[ 14.302333] ... +[ 14.302335] Call Trace: +[ 14.302336] [<9000000000244174>] show_stack+0x3c/0x16c +[ 14.302341] [<900000000023eb30>] dump_stack_lvl+0x84/0xe0 +[ 14.302346] [<9000000000288208>] __warn+0x8c/0x174 +[ 14.302350] [<90000000017c1918>] report_bug+0x1c0/0x22c +[ 14.302354] [<90000000017f66e8>] do_bp+0x280/0x344 +[ 14.302359] +[ 14.302360] ---[ end trace 0000000000000000 ]--- + +Revise calculation of `RING_CTL_SIZE(size)' to use `SZ_4K' to fix the +aforementioned issue. + +Cc: stable@vger.kernel.org +Fixes: b79e8fd954c4 ("drm/xe: Remove dependency on intel_engine_regs.h") +Tested-by: Mingcong Bai +Tested-by: Wenbin Fang +Tested-by: Haien Liang <27873200@qq.com> +Tested-by: Jianfeng Liu +Tested-by: Shirong Liu +Tested-by: Haofeng Wu +Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3ddb6dca3f28223360 +Link: https://t.me/c/1109254909/768552 +Co-developed-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Mingcong Bai +--- + drivers/gpu/drm/xe/regs/xe_engine_regs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h ++++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h +@@ -56,7 +56,7 @@ + #define RING_START(base) XE_REG((base) + 0x38) + + #define RING_CTL(base) XE_REG((base) + 0x3c) +-#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ ++#define RING_CTL_SIZE(size) ((size) - SZ_4K) /* in bytes -> pages */ + + #define RING_START_UDW(base) XE_REG((base) + 0x48) + +-- +Armbian + diff --git a/patch/kernel/archive/uefi-loong64-6.16/0004-drm-xe-use-4KiB-alignment-for-cursor-jumps.patch b/patch/kernel/archive/uefi-loong64-6.16/0004-drm-xe-use-4KiB-alignment-for-cursor-jumps.patch new file mode 100644 index 0000000000..eb0f22d2e4 --- /dev/null +++ b/patch/kernel/archive/uefi-loong64-6.16/0004-drm-xe-use-4KiB-alignment-for-cursor-jumps.patch @@ -0,0 +1,121 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Mingcong Bai +Date: Wed, 23 Jul 2025 16:45:16 +0900 +Subject: drm/xe: use 4KiB alignment for cursor jumps + +It appears that the xe_res_cursor also assumes 4KiB alignment. + +Current implementation uses `PAGE_SIZE' as an assumed alignment reference, +but 4KiB kernel page sizes is by no means a guarantee. On 16KiB-paged +kernels, this causes driver failures during boot up: + +[ 23.242757] ------------[ cut here ]------------ +[ 23.247363] WARNING: CPU: 0 PID: 2036 at drivers/gpu/drm/xe/xe_res_cursor.h:182 emit_pte+0x394/0x3b0 [xe] +[ 23.256962] Modules linked in: nf_conntrack_netbios_ns(E) nf_conntrack_broadcast(E) nft_fib_inet(E) nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) rfkill(E) nft_chain_nat(E) ip6table_nat(E) ip6table_mangle(E) ip6table_raw(E) ip6table_security(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) iptable_mangle(E) iptable_raw(E) iptable_security(E) ip_set(E) nf_tables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) snd_hda_codec_conexant(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) snd_intel_dspcfg(E) snd_hda_codec(E) nls_iso8859_1(E) qrtr(E) nls_cp437(E) snd_hda_core(E) loongson3_cpufreq(E) rtc_efi(E) snd_hwdep(E) snd_pcm(E) spi_loongson_pci(E) snd_timer(E) snd(E) spi_loongson_core(E) soundcore(E) gpio_loongson_64bit(E) rtc_loongson(E) i2c_ls2x(E) mousedev(E) input_leds(E) sch_fq_codel(E) fuse(E) nfnetlink(E) dmi_sysfs(E) ip_tables(E) x_tables(E) xe(E) d + rm_gpuvm(E) drm_buddy(E) gpu_sched(E) +[ 23.257034] drm_exec(E) drm_suballoc_helper(E) drm_display_helper(E) cec(E) rc_core(E) hid_generic(E) tpm_tis_spi(E) r8169(E) loongson(E) i2c_algo_bit(E) realtek(E) drm_ttm_helper(E) led_class(E) ttm(E) drm_client_lib(E) drm_kms_helper(E) sunrpc(E) i2c_dev(E) +[ 23.369697] CPU: 0 UID: 1000 PID: 2036 Comm: QSGRenderThread Tainted: G E 6.14.0-rc4-aosc-main-g7cc07e6e50b0-dirty #8 +[ 23.381640] Tainted: [E]=UNSIGNED_MODULE +[ 23.385534] Hardware name: Loongson Loongson-3A6000-HV-7A2000-1w-V0.1-EVB/Loongson-3A6000-HV-7A2000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V4.0.05756-prestab +[ 23.399319] pc ffff80000251efc0 ra ffff80000251eddc tp 900000011fe3c000 sp 900000011fe3f7e0 +[ 23.407632] a0 0000000000000001 a1 0000000000000000 a2 0000000000000000 a3 0000000000000000 +[ 23.415938] a4 0000000000000000 a5 0000000000000000 a6 0000000000060000 a7 900000010c947b00 +[ 23.424240] t0 0000000000000000 t1 0000000000000000 t2 0000000000000000 t3 900000012e456230 +[ 23.432543] t4 0000000000000035 t5 0000000000004000 t6 00000001fbc40403 t7 0000000000004000 +[ 23.440845] t8 9000000100e688a8 u0 5cc06cee8ef0edee s9 9000000100024420 s0 0000000000000047 +[ 23.449147] s1 0000000000004000 s2 0000000000000001 s3 900000012adba000 s4 ffffffffffffc000 +[ 23.457450] s5 9000000108939428 s6 0000000000000000 s7 0000000000000000 s8 900000011fe3f8e0 +[ 23.465851] ra: ffff80000251eddc emit_pte+0x1b0/0x3b0 [xe] +[ 23.471761] ERA: ffff80000251efc0 emit_pte+0x394/0x3b0 [xe] +[ 23.477557] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) +[ 23.483732] PRMD: 00000004 (PPLV0 +PIE -PWE) +[ 23.488068] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) +[ 23.492832] ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) +[ 23.497594] ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) +[ 23.503133] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) +[ 23.509164] CPU: 0 UID: 1000 PID: 2036 Comm: QSGRenderThread Tainted: G E 6.14.0-rc4-aosc-main-g7cc07e6e50b0-dirty #8 +[ 23.509168] Tainted: [E]=UNSIGNED_MODULE +[ 23.509168] Hardware name: Loongson Loongson-3A6000-HV-7A2000-1w-V0.1-EVB/Loongson-3A6000-HV-7A2000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V4.0.05756-prestab +[ 23.509170] Stack : ffffffffffffffff ffffffffffffffff 900000000023eb34 900000011fe3c000 +[ 23.509176] 900000011fe3f440 0000000000000000 900000011fe3f448 9000000001c31c70 +[ 23.509181] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 23.509185] 0000000000000000 5cc06cee8ef0edee 0000000000000000 0000000000000000 +[ 23.509190] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 23.509193] 0000000000000000 0000000000000000 00000000066b4000 9000000100024420 +[ 23.509197] 9000000001eb8000 0000000000000000 9000000001c31c70 0000000000000004 +[ 23.509202] 0000000000000004 0000000000000000 0000000000000000 0000000000000000 +[ 23.509206] 900000011fe3f8e0 9000000001c31c70 9000000000244174 00007fffac097534 +[ 23.509211] 00000000000000b0 0000000000000004 0000000000000003 0000000000071c1d +[ 23.509216] ... +[ 23.509218] Call Trace: +[ 23.509220] [<9000000000244174>] show_stack+0x3c/0x16c +[ 23.509226] [<900000000023eb30>] dump_stack_lvl+0x84/0xe0 +[ 23.509230] [<9000000000288208>] __warn+0x8c/0x174 +[ 23.509234] [<90000000017c1918>] report_bug+0x1c0/0x22c +[ 23.509238] [<90000000017f66e8>] do_bp+0x280/0x344 +[ 23.509243] [<90000000002428a0>] handle_bp+0x120/0x1c0 +[ 23.509247] [] emit_pte+0x394/0x3b0 [xe] +[ 23.509295] [] xe_migrate_clear+0x2d8/0xa54 [xe] +[ 23.509341] [] xe_bo_move+0x324/0x930 [xe] +[ 23.509387] [] ttm_bo_handle_move_mem+0xd0/0x194 [ttm] +[ 23.509392] [] ttm_bo_validate+0xd4/0x1cc [ttm] +[ 23.509396] [] ttm_bo_init_reserved+0x184/0x1dc [ttm] +[ 23.509399] [] ___xe_bo_create_locked+0x1e8/0x3d4 [xe] +[ 23.509445] [] __xe_bo_create_locked+0x2cc/0x390 [xe] +[ 23.509489] [] xe_bo_create_user+0x34/0xe4 [xe] +[ 23.509533] [] xe_gem_create_ioctl+0x154/0x4d8 [xe] +[ 23.509578] [<9000000001062784>] drm_ioctl_kernel+0xe0/0x14c +[ 23.509582] [<9000000001062c10>] drm_ioctl+0x420/0x5f4 +[ 23.509585] [] xe_drm_ioctl+0x64/0xac [xe] +[ 23.509630] [<9000000000653504>] sys_ioctl+0x2b8/0xf98 +[ 23.509634] [<90000000017f684c>] do_syscall+0xa0/0x140 +[ 23.509637] [<9000000000241e38>] handle_syscall+0xb8/0x158 +[ 23.509640] +[ 23.509644] ---[ end trace 0000000000000000 ]--- + +Revise calls to `xe_res_dma()' and `xe_res_cursor()' to use +`XE_PTE_MASK' (12) and `SZ_4K' to fix this potentially confused use of +`PAGE_SIZE' in relevant code. + +Cc: stable@vger.kernel.org +Fixes: e89b384cde62 ("drm/xe/migrate: Update emit_pte to cope with a size level than 4k") +Tested-by: Mingcong Bai +Tested-by: Wenbin Fang +Tested-by: Haien Liang <27873200@qq.com> +Tested-by: Jianfeng Liu +Tested-by: Shirong Liu +Tested-by: Haofeng Wu +Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3ddb6dca3f28223360 +Link: https://t.me/c/1109254909/768552 +Co-developed-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Mingcong Bai +--- + drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_migrate.c ++++ b/drivers/gpu/drm/xe/xe_migrate.c +@@ -592,7 +592,7 @@ static void emit_pte(struct xe_migrate *m, + u64 addr, flags = 0; + bool devmem = false; + +- addr = xe_res_dma(cur) & PAGE_MASK; ++ addr = xe_res_dma(cur) & ~XE_PTE_MASK; + if (is_vram) { + if (vm->flags & XE_VM_FLAG_64K) { + u64 va = cur_ofs * XE_PAGE_SIZE / 8; +@@ -613,7 +613,7 @@ static void emit_pte(struct xe_migrate *m, + bb->cs[bb->len++] = lower_32_bits(addr); + bb->cs[bb->len++] = upper_32_bits(addr); + +- xe_res_next(cur, min_t(u32, size, PAGE_SIZE)); ++ xe_res_next(cur, min_t(u32, size, XE_PAGE_SIZE)); + cur_ofs += 8; + } + } +-- +Armbian + diff --git a/patch/kernel/archive/uefi-loong64-6.16/0005-drm-xe-query-use-PAGE_SIZE-as-the-minimum-page-align.patch b/patch/kernel/archive/uefi-loong64-6.16/0005-drm-xe-query-use-PAGE_SIZE-as-the-minimum-page-align.patch new file mode 100644 index 0000000000..a55a8ef73c --- /dev/null +++ b/patch/kernel/archive/uefi-loong64-6.16/0005-drm-xe-query-use-PAGE_SIZE-as-the-minimum-page-align.patch @@ -0,0 +1,63 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Mingcong Bai +Date: Wed, 23 Jul 2025 16:45:17 +0900 +Subject: drm/xe/query: use PAGE_SIZE as the minimum page alignment + +As this component hooks into userspace API, it should be assumed that it +will play well with non-4KiB/64KiB pages. + +Use `PAGE_SIZE' as the final reference for page alignment instead. + +Cc: stable@vger.kernel.org +Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") +Fixes: 801989b08aff ("drm/xe/uapi: Make constant comments visible in kernel doc") +Tested-by: Mingcong Bai +Tested-by: Wenbin Fang +Tested-by: Haien Liang <27873200@qq.com> +Tested-by: Jianfeng Liu +Tested-by: Shirong Liu +Tested-by: Haofeng Wu +Link: https://github.com/FanFansfan/loongson-linux/commit/22c55ab3931c32410a077b3ddb6dca3f28223360 +Link: https://t.me/c/1109254909/768552 +Co-developed-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Shang Yatsen <429839446@qq.com> +Signed-off-by: Mingcong Bai +--- + drivers/gpu/drm/xe/xe_query.c | 2 +- + include/uapi/drm/xe_drm.h | 7 +++++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c +index 111111111111..222222222222 100644 +--- a/drivers/gpu/drm/xe/xe_query.c ++++ b/drivers/gpu/drm/xe/xe_query.c +@@ -346,7 +346,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; + config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = +- xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; ++ xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : PAGE_SIZE; + config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; + config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = + xe_exec_queue_device_get_max_priority(xe); +diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h +index 111111111111..222222222222 100644 +--- a/include/uapi/drm/xe_drm.h ++++ b/include/uapi/drm/xe_drm.h +@@ -397,8 +397,11 @@ struct drm_xe_query_mem_regions { + * has low latency hint support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the + * device has CPU address mirroring support +- * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment +- * required by this device, typically SZ_4K or SZ_64K ++ * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment required ++ * by this device and the CPU. The minimum page size for the device is ++ * usually SZ_4K or SZ_64K, while for the CPU, it is PAGE_SIZE. This value ++ * is calculated by max(min_gpu_page_size, PAGE_SIZE). This alignment is ++ * enforced on buffer object allocations and VM binds. + * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address + * - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest + * available exec queue priority +-- +Armbian +