diff --git a/patch/kernel/archive/rockchip64-6.18/media-0005-media-verisilicon-AV1-Set-IDR-flag-for-intra_only-fr.patch b/patch/kernel/archive/rockchip64-6.18/media-0005-media-verisilicon-AV1-Set-IDR-flag-for-intra_only-fr.patch new file mode 100644 index 0000000000..b5121b2215 --- /dev/null +++ b/patch/kernel/archive/rockchip64-6.18/media-0005-media-verisilicon-AV1-Set-IDR-flag-for-intra_only-fr.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Gaignard +Date: Thu, 8 Jan 2026 10:56:28 +0100 +Subject: media: verisilicon: AV1: Set IDR flag for intra_only frame type + +Intra_only frame could be considered as a key frame so +Instantaneous Decoding Refresh (IDR) flag must be set of the both +case and not only for key frames. + +Signed-off-by: Benjamin Gaignard +Fixes: 727a400686a2c ("media: verisilicon: Add Rockchip AV1 decoder") +--- + drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +index 111111111111..222222222222 100644 +--- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c ++++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +@@ -1995,7 +1995,7 @@ static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx) + !!(ctrls->frame->quantization.flags + & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT)); + +- hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type); ++ hantro_reg_write(vpu, &av1_idr_pic_e, IS_INTRA(ctrls->frame->frame_type)); + hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx); + hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8); + hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8); +-- +Armbian + diff --git a/patch/kernel/archive/rockchip64-6.18/media-0006-media-verisilicon-AV1-Fix-tx-mode-bit-setting.patch b/patch/kernel/archive/rockchip64-6.18/media-0006-media-verisilicon-AV1-Fix-tx-mode-bit-setting.patch new file mode 100644 index 0000000000..63ab52c15d --- /dev/null +++ b/patch/kernel/archive/rockchip64-6.18/media-0006-media-verisilicon-AV1-Fix-tx-mode-bit-setting.patch @@ -0,0 +1,78 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Gaignard +Date: Tue, 9 Dec 2025 11:34:17 +0100 +Subject: media: verisilicon: AV1: Fix tx mode bit setting + +AV1 specification describes 3 possibles tx modes: 4x4 only, +largest and select. +Hardware allows 5 possibles tx modes: 4x4 only, 8x8, 16x16, +32x32 and select. +Since the both aren't exactly matching we need to add a mapping +function to set the correct mode on hardware. + +Signed-off-by: Benjamin Gaignard +Fixes: 727a400686a2c ("media: verisilicon: Add Rockchip AV1 decoder") +--- + drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c | 27 +++++++++- + 1 file changed, 26 insertions(+), 1 deletion(-) + +diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +index 111111111111..222222222222 100644 +--- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c ++++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +@@ -72,6 +72,14 @@ + : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \ + }) + ++enum rockchip_av1_tx_mode { ++ ROCKCHIP_AV1_TX_MODE_ONLY_4X4 = 0, ++ ROCKCHIP_AV1_TX_MODE_8X8 = 1, ++ ROCKCHIP_AV1_TX_MODE_16x16 = 2, ++ ROCKCHIP_AV1_TX_MODE_32x32 = 3, ++ ROCKCHIP_AV1_TX_MODE_SELECT = 4, ++}; ++ + struct rockchip_av1_film_grain { + u8 scaling_lut_y[256]; + u8 scaling_lut_cb[256]; +@@ -1935,11 +1943,26 @@ static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx) + rockchip_vpu981_av1_dec_set_other_frames(ctx); + } + ++static int rockchip_vpu981_av1_get_hardware_tx_mode(enum v4l2_av1_tx_mode tx_mode) ++{ ++ switch (tx_mode) { ++ case V4L2_AV1_TX_MODE_ONLY_4X4: ++ return ROCKCHIP_AV1_TX_MODE_ONLY_4X4; ++ case V4L2_AV1_TX_MODE_LARGEST: ++ return ROCKCHIP_AV1_TX_MODE_32x32; ++ case V4L2_AV1_TX_MODE_SELECT: ++ return ROCKCHIP_AV1_TX_MODE_SELECT; ++ } ++ ++ return ROCKCHIP_AV1_TX_MODE_32x32; ++} ++ + static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx) + { + struct hantro_dev *vpu = ctx->dev; + struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec; + struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls; ++ int tx_mode; + + hantro_reg_write(vpu, &av1_skip_mode, + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT)); +@@ -2005,7 +2028,9 @@ static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx) + !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV)); + hantro_reg_write(vpu, &av1_comp_pred_mode, + (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0); +- hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4); ++ ++ tx_mode = rockchip_vpu981_av1_get_hardware_tx_mode(ctrls->frame->tx_mode); ++ hantro_reg_write(vpu, &av1_transform_mode, tx_mode); + hantro_reg_write(vpu, &av1_max_cb_size, + (ctrls->sequence->flags + & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6); +-- +Armbian + diff --git a/patch/kernel/archive/rockchip64-6.18/media-0007-add-verisilicon-AV1-iommu-driver.patch b/patch/kernel/archive/rockchip64-6.18/media-0007-add-verisilicon-AV1-iommu-driver.patch new file mode 100644 index 0000000000..ae37c0371f --- /dev/null +++ b/patch/kernel/archive/rockchip64-6.18/media-0007-add-verisilicon-AV1-iommu-driver.patch @@ -0,0 +1,995 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Gaignard +Date: Wed, 7 Jan 2026 11:09:53 +0100 +Subject: iommu: Add verisilicon IOMMU driver + +The Verisilicon IOMMU hardware block can be found in combination +with Verisilicon hardware video codecs (encoders or decoders) on +different SoCs. +Enable it will allow us to use non contiguous memory allocators +for Verisilicon video codecs. +If both decoder and this iommu driver are compiled has modules +there is undefined symboles issues so this iommu driver could +only be compiled has built-in. + +Signed-off-by: Benjamin Gaignard +--- + drivers/iommu/Kconfig | 11 + + drivers/iommu/Makefile | 1 + + drivers/iommu/vsi-iommu.c | 808 ++++++++++ + include/linux/vsi-iommu.h | 21 + + 4 files changed, 841 insertions(+) + +diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig +index 111111111111..222222222222 100644 +--- a/drivers/iommu/Kconfig ++++ b/drivers/iommu/Kconfig +@@ -384,6 +384,17 @@ config SPRD_IOMMU + + Say Y here if you want to use the multimedia devices listed above. + ++config VSI_IOMMU ++ bool "Verisilicon IOMMU Support" ++ depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST ++ select IOMMU_API ++ help ++ Support for IOMMUs used by Verisilicon sub-systems like video ++ decoders or encoder hardware blocks. ++ ++ Say Y here if you want to use this IOMMU in front of these ++ hardware blocks. ++ + endif # IOMMU_SUPPORT + + source "drivers/iommu/generic_pt/Kconfig" +diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile +index 111111111111..222222222222 100644 +--- a/drivers/iommu/Makefile ++++ b/drivers/iommu/Makefile +@@ -36,3 +36,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o + obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o + obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o + obj-$(CONFIG_APPLE_DART) += apple-dart.o ++obj-$(CONFIG_VSI_IOMMU) += vsi-iommu.o +diff --git a/drivers/iommu/vsi-iommu.c b/drivers/iommu/vsi-iommu.c +new file mode 100644 +index 000000000000..111111111111 +--- /dev/null ++++ b/drivers/iommu/vsi-iommu.c +@@ -0,0 +1,808 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* Copyright (C) 2025 Collabora Ltd. ++ * ++ * IOMMU API for Verisilicon ++ * ++ * Module Authors: Yandong Lin ++ * Simon Xue ++ * Benjamin Gaignard ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "iommu-pages.h" ++ ++struct vsi_iommu { ++ struct device *dev; ++ void __iomem *regs; ++ struct clk_bulk_data *clocks; ++ int num_clocks; ++ struct iommu_device iommu; ++ struct list_head node; /* entry in vsi_iommu_domain.iommus */ ++ struct iommu_domain *domain; /* domain to which iommu is attached */ ++ spinlock_t lock; /* lock to protect vsi_iommu fields */ ++ int irq; ++}; ++ ++struct vsi_iommu_domain { ++ struct list_head iommus; ++ struct device *dev; ++ u32 *dt; ++ dma_addr_t dt_dma; ++ struct iommu_domain domain; ++ u64 *pta; ++ dma_addr_t pta_dma; ++ spinlock_t lock; /* lock to protect vsi_iommu_domain fields */ ++}; ++ ++static struct iommu_domain vsi_identity_domain; ++ ++#define NUM_DT_ENTRIES 1024 ++#define NUM_PT_ENTRIES 1024 ++#define PT_SIZE (NUM_PT_ENTRIES * sizeof(u32)) ++ ++#define SPAGE_SIZE BIT(12) ++ ++/* vsi iommu regs address */ ++#define VSI_MMU_CONFIG1_BASE 0x1ac ++#define VSI_MMU_AHB_EXCEPTION_BASE 0x380 ++#define VSI_MMU_AHB_CONTROL_BASE 0x388 ++#define VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE 0x38C ++ ++/* MMU register offsets */ ++#define VSI_MMU_FLUSH_BASE 0x184 ++#define VSI_MMU_BIT_FLUSH BIT(4) ++ ++#define VSI_MMU_PAGE_FAULT_ADDR 0x380 ++#define VSI_MMU_STATUS_BASE 0x384 /* IRQ status */ ++ ++#define VSI_MMU_BIT_ENABLE BIT(0) ++ ++#define VSI_MMU_OUT_OF_BOUND BIT(28) ++/* Irq mask */ ++#define VSI_MMU_IRQ_MASK 0x7 ++ ++#define VSI_DTE_PT_ADDRESS_MASK 0xffffffc0 ++#define VSI_DTE_PT_VALID BIT(0) ++ ++#define VSI_PAGE_DESC_LO_MASK 0xfffff000 ++#define VSI_PAGE_DESC_HI_MASK GENMASK_ULL(39, 32) ++#define VSI_PAGE_DESC_HI_SHIFT (32 - 4) ++ ++static inline phys_addr_t vsi_dte_pt_address(u32 dte) ++{ ++ return (phys_addr_t)dte & VSI_DTE_PT_ADDRESS_MASK; ++} ++ ++static inline u32 vsi_mk_dte(u32 dte) ++{ ++ return (phys_addr_t)dte | VSI_DTE_PT_VALID; ++} ++ ++#define VSI_PTE_PAGE_WRITABLE BIT(2) ++#define VSI_PTE_PAGE_VALID BIT(0) ++ ++static inline phys_addr_t vsi_pte_page_address(u64 pte) ++{ ++ return ((pte << VSI_PAGE_DESC_HI_SHIFT) & VSI_PAGE_DESC_HI_MASK) | ++ (pte & VSI_PAGE_DESC_LO_MASK); ++} ++ ++static u32 vsi_mk_pte(phys_addr_t page, int prot) ++{ ++ u32 flags = 0; ++ ++ flags |= (prot & IOMMU_WRITE) ? VSI_PTE_PAGE_WRITABLE : 0; ++ ++ page = (page & VSI_PAGE_DESC_LO_MASK) | ++ ((page & VSI_PAGE_DESC_HI_MASK) >> VSI_PAGE_DESC_HI_SHIFT); ++ ++ return page | flags | VSI_PTE_PAGE_VALID; ++} ++ ++#define VSI_DTE_PT_VALID BIT(0) ++ ++static inline bool vsi_dte_is_pt_valid(u32 dte) ++{ ++ return dte & VSI_DTE_PT_VALID; ++} ++ ++static inline bool vsi_pte_is_page_valid(u32 pte) ++{ ++ return pte & VSI_PTE_PAGE_VALID; ++} ++ ++static u32 vsi_mk_pte_invalid(u32 pte) ++{ ++ return pte & ~VSI_PTE_PAGE_VALID; ++} ++ ++#define VSI_MASTER_TLB_MASK GENMASK_ULL(31, 10) ++/* mode 0 : 4k */ ++#define VSI_PTA_4K_MODE 0 ++ ++static u64 vsi_mk_pta(dma_addr_t dt_dma) ++{ ++ u64 val = (dt_dma & VSI_MASTER_TLB_MASK) | VSI_PTA_4K_MODE; ++ ++ return val; ++} ++ ++static struct vsi_iommu_domain *to_vsi_domain(struct iommu_domain *dom) ++{ ++ return container_of(dom, struct vsi_iommu_domain, domain); ++} ++ ++static inline void vsi_table_flush(struct vsi_iommu_domain *vsi_domain, dma_addr_t dma, ++ unsigned int count) ++{ ++ size_t size = count * sizeof(u32); /* count of u32 entry */ ++ ++ dma_sync_single_for_device(vsi_domain->dev, dma, size, DMA_TO_DEVICE); ++} ++ ++#define VSI_IOVA_DTE_MASK 0xffc00000 ++#define VSI_IOVA_DTE_SHIFT 22 ++#define VSI_IOVA_PTE_MASK 0x003ff000 ++#define VSI_IOVA_PTE_SHIFT 12 ++#define VSI_IOVA_PAGE_MASK 0x00000fff ++#define VSI_IOVA_PAGE_SHIFT 0 ++ ++static u32 vsi_iova_dte_index(u32 iova) ++{ ++ return (iova & VSI_IOVA_DTE_MASK) >> VSI_IOVA_DTE_SHIFT; ++} ++ ++static u32 vsi_iova_pte_index(u32 iova) ++{ ++ return (iova & VSI_IOVA_PTE_MASK) >> VSI_IOVA_PTE_SHIFT; ++} ++ ++static u32 vsi_iova_page_offset(u32 iova) ++{ ++ return (iova & VSI_IOVA_PAGE_MASK) >> VSI_IOVA_PAGE_SHIFT; ++} ++ ++static void vsi_iommu_flush_tlb_all(struct iommu_domain *domain) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ struct list_head *pos; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ ++ list_for_each(pos, &vsi_domain->iommus) { ++ struct vsi_iommu *iommu; ++ int ret; ++ ++ iommu = list_entry(pos, struct vsi_iommu, node); ++ ret = pm_runtime_resume_and_get(iommu->dev); ++ if (ret < 0) ++ continue; ++ ++ spin_lock(&iommu->lock); ++ ++ writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE); ++ writel(0, iommu->regs + VSI_MMU_FLUSH_BASE); ++ ++ spin_unlock(&iommu->lock); ++ pm_runtime_put_autosuspend(iommu->dev); ++ } ++ ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++} ++ ++static irqreturn_t vsi_iommu_irq(int irq, void *dev_id) ++{ ++ struct vsi_iommu *iommu = dev_id; ++ unsigned long flags; ++ dma_addr_t iova; ++ u32 status; ++ ++ if (pm_runtime_resume_and_get(iommu->dev) < 0) ++ return IRQ_NONE; ++ ++ spin_lock_irqsave(&iommu->lock, flags); ++ ++ status = readl(iommu->regs + VSI_MMU_STATUS_BASE); ++ if (status & VSI_MMU_IRQ_MASK) { ++ dev_err(iommu->dev, "unexpected int_status=%08x\n", status); ++ iova = readl(iommu->regs + VSI_MMU_PAGE_FAULT_ADDR); ++ report_iommu_fault(iommu->domain, iommu->dev, iova, status); ++ } ++ writel(0, iommu->regs + VSI_MMU_STATUS_BASE); ++ ++ spin_unlock_irqrestore(&iommu->lock, flags); ++ pm_runtime_put_autosuspend(iommu->dev); ++ ++ return IRQ_HANDLED; ++} ++ ++static struct vsi_iommu *vsi_iommu_get_from_dev(struct device *dev) ++{ ++ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); ++ struct device *iommu_dev = bus_find_device_by_fwnode(&platform_bus_type, ++ fwspec->iommu_fwnode); ++ ++ put_device(iommu_dev); ++ ++ return iommu_dev ? dev_get_drvdata(iommu_dev) : NULL; ++} ++ ++static struct iommu_domain *vsi_iommu_domain_alloc_paging(struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_iommu_priv_get(dev); ++ struct vsi_iommu_domain *vsi_domain; ++ ++ vsi_domain = kzalloc(sizeof(*vsi_domain), GFP_KERNEL); ++ if (!vsi_domain) ++ return NULL; ++ ++ vsi_domain->dev = iommu->dev; ++ spin_lock_init(&vsi_domain->lock); ++ ++ /* ++ * iommu use a 2 level pagetable. ++ * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. ++ * Allocate one 4 KiB page for each table. ++ */ ++ vsi_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, ++ SPAGE_SIZE); ++ if (!vsi_domain->dt) ++ goto err_free_domain; ++ ++ vsi_domain->dt_dma = dma_map_single(vsi_domain->dev, vsi_domain->dt, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++ if (dma_mapping_error(vsi_domain->dev, vsi_domain->dt_dma)) { ++ dev_err(dev, "DMA map error for DT\n"); ++ goto err_free_dt; ++ } ++ ++ vsi_domain->pta = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, ++ SPAGE_SIZE); ++ if (!vsi_domain->pta) ++ goto err_unmap_dt; ++ ++ vsi_domain->pta[0] = vsi_mk_pta(vsi_domain->dt_dma); ++ vsi_domain->pta_dma = dma_map_single(vsi_domain->dev, vsi_domain->pta, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++ if (dma_mapping_error(vsi_domain->dev, vsi_domain->pta_dma)) { ++ dev_err(dev, "DMA map error for PTA\n"); ++ goto err_free_pta; ++ } ++ ++ INIT_LIST_HEAD(&vsi_domain->iommus); ++ ++ vsi_domain->domain.geometry.aperture_start = 0; ++ vsi_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); ++ vsi_domain->domain.geometry.force_aperture = true; ++ vsi_domain->domain.pgsize_bitmap = SZ_4K; ++ ++ return &vsi_domain->domain; ++ ++err_free_pta: ++ iommu_free_pages(vsi_domain->pta); ++err_unmap_dt: ++ dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++err_free_dt: ++ iommu_free_pages(vsi_domain->dt); ++err_free_domain: ++ kfree(vsi_domain); ++ ++ return NULL; ++} ++ ++static phys_addr_t vsi_iommu_iova_to_phys(struct iommu_domain *domain, ++ dma_addr_t iova) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ phys_addr_t pt_phys, phys = 0; ++ unsigned long flags; ++ u32 dte, pte; ++ u32 *page_table; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ dte = vsi_domain->dt[vsi_iova_dte_index(iova)]; ++ if (!vsi_dte_is_pt_valid(dte)) ++ goto unlock; ++ ++ pt_phys = vsi_dte_pt_address(dte); ++ page_table = (u32 *)phys_to_virt(pt_phys); ++ pte = page_table[vsi_iova_pte_index(iova)]; ++ if (!vsi_pte_is_page_valid(pte)) ++ goto unlock; ++ ++ phys = vsi_pte_page_address(pte) + vsi_iova_page_offset(iova); ++ ++unlock: ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ return phys; ++} ++ ++static size_t vsi_iommu_unmap_iova(struct vsi_iommu_domain *vsi_domain, ++ u32 *pte_addr, dma_addr_t pte_dma, ++ size_t size) ++{ ++ unsigned int pte_count; ++ unsigned int pte_total = size / SPAGE_SIZE; ++ ++ for (pte_count = 0; ++ pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) { ++ u32 pte = pte_addr[pte_count]; ++ ++ if (!vsi_pte_is_page_valid(pte)) ++ break; ++ ++ pte_addr[pte_count] = vsi_mk_pte_invalid(pte); ++ } ++ ++ vsi_table_flush(vsi_domain, pte_dma, pte_total); ++ ++ return pte_count * SPAGE_SIZE; ++} ++ ++static int vsi_iommu_map_iova(struct vsi_iommu_domain *vsi_domain, u32 *pte_addr, ++ dma_addr_t pte_dma, dma_addr_t iova, ++ phys_addr_t paddr, size_t size, int prot) ++{ ++ unsigned int pte_count; ++ unsigned int pte_total = size / SPAGE_SIZE; ++ ++ for (pte_count = 0; ++ pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) { ++ u32 pte = pte_addr[pte_count]; ++ ++ if (vsi_pte_is_page_valid(pte)) ++ return (pte_count - 1) * SPAGE_SIZE; ++ ++ pte_addr[pte_count] = vsi_mk_pte(paddr, prot); ++ ++ paddr += SPAGE_SIZE; ++ } ++ ++ vsi_table_flush(vsi_domain, pte_dma, pte_total); ++ ++ return 0; ++} ++ ++static size_t vsi_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, ++ size_t size, size_t count, struct iommu_iotlb_gather *gather) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ dma_addr_t pte_dma, iova = (dma_addr_t)_iova; ++ unsigned long flags; ++ phys_addr_t pt_phys; ++ u32 dte; ++ u32 *pte_addr; ++ size_t unmap_size = 0; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ ++ dte = vsi_domain->dt[vsi_iova_dte_index(iova)]; ++ /* Just return 0 if iova is unmapped */ ++ if (!vsi_dte_is_pt_valid(dte)) ++ goto unlock; ++ ++ pt_phys = vsi_dte_pt_address(dte); ++ pte_addr = (u32 *)phys_to_virt(pt_phys) + vsi_iova_pte_index(iova); ++ pte_dma = pt_phys + vsi_iova_pte_index(iova) * sizeof(u32); ++ unmap_size = vsi_iommu_unmap_iova(vsi_domain, pte_addr, pte_dma, size); ++ ++unlock: ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ ++ return unmap_size; ++} ++ ++static u32 *vsi_dte_get_page_table(struct vsi_iommu_domain *vsi_domain, ++ dma_addr_t iova, gfp_t gfp) ++{ ++ u32 *page_table, *dte_addr; ++ u32 dte_index, dte; ++ phys_addr_t pt_phys; ++ dma_addr_t pt_dma; ++ gfp_t flags; ++ ++ dte_index = vsi_iova_dte_index(iova); ++ dte_addr = &vsi_domain->dt[dte_index]; ++ dte = *dte_addr; ++ if (vsi_dte_is_pt_valid(dte)) ++ goto done; ++ ++ /* Do not allow to sleep while allocating the buffer */ ++ flags = (gfp & ~GFP_KERNEL) | GFP_ATOMIC | GFP_DMA32; ++ page_table = iommu_alloc_pages_sz(flags, PAGE_SIZE); ++ if (!page_table) ++ return ERR_PTR(-ENOMEM); ++ ++ pt_dma = dma_map_single(vsi_domain->dev, page_table, PAGE_SIZE, DMA_TO_DEVICE); ++ if (dma_mapping_error(vsi_domain->dev, pt_dma)) { ++ dev_err(vsi_domain->dev, "DMA mapping error while allocating page table\n"); ++ iommu_free_pages(page_table); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ dte = vsi_mk_dte(pt_dma); ++ *dte_addr = dte; ++ ++ vsi_table_flush(vsi_domain, ++ vsi_domain->dt_dma + dte_index * sizeof(u32), 1); ++done: ++ pt_phys = vsi_dte_pt_address(dte); ++ return (u32 *)phys_to_virt(pt_phys); ++} ++ ++static int vsi_iommu_map(struct iommu_domain *domain, unsigned long _iova, ++ phys_addr_t paddr, size_t size, size_t count, ++ int prot, gfp_t gfp, size_t *mapped) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ dma_addr_t pte_dma, iova = (dma_addr_t)_iova; ++ u32 *page_table, *pte_addr; ++ u32 dte, pte_index; ++ unsigned long flags; ++ int ret; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ ++ page_table = vsi_dte_get_page_table(vsi_domain, iova, gfp); ++ if (IS_ERR(page_table)) { ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ return PTR_ERR(page_table); ++ } ++ ++ dte = vsi_domain->dt[vsi_iova_dte_index(iova)]; ++ pte_index = vsi_iova_pte_index(iova); ++ pte_addr = &page_table[pte_index]; ++ pte_dma = vsi_dte_pt_address(dte) + pte_index * sizeof(u32); ++ ret = vsi_iommu_map_iova(vsi_domain, pte_addr, pte_dma, iova, ++ paddr, size, prot); ++ if (!ret) ++ *mapped = size; ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ ++ return ret; ++} ++ ++static void vsi_iommu_disable(struct vsi_iommu *iommu) ++{ ++ writel(0, iommu->regs + VSI_MMU_AHB_CONTROL_BASE); ++} ++ ++static int vsi_iommu_identity_attach(struct iommu_domain *domain, ++ struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_iommu_priv_get(dev); ++ unsigned long flags; ++ int ret; ++ ++ ret = pm_runtime_resume_and_get(iommu->dev); ++ if (ret < 0) ++ return ret; ++ ++ spin_lock_irqsave(&iommu->lock, flags); ++ if (iommu->domain == domain) ++ goto unlock; ++ ++ vsi_iommu_disable(iommu); ++ list_del_init(&iommu->node); ++ ++ iommu->domain = domain; ++ ++unlock: ++ spin_unlock_irqrestore(&iommu->lock, flags); ++ pm_runtime_put_autosuspend(iommu->dev); ++ return 0; ++} ++ ++static const struct iommu_domain_ops vsi_identity_ops = { ++ .attach_dev = vsi_iommu_identity_attach, ++}; ++ ++static struct iommu_domain vsi_identity_domain = { ++ .type = IOMMU_DOMAIN_IDENTITY, ++ .ops = &vsi_identity_ops, ++}; ++ ++static void vsi_iommu_enable(struct vsi_iommu *iommu, struct iommu_domain *domain) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ ++ if (domain == &vsi_identity_domain) ++ return; ++ ++ writel(vsi_domain->pta_dma, iommu->regs + VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE); ++ writel(VSI_MMU_OUT_OF_BOUND, iommu->regs + VSI_MMU_CONFIG1_BASE); ++ writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_EXCEPTION_BASE); ++ writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_CONTROL_BASE); ++} ++ ++void vsi_iommu_restore_ctx(struct iommu_domain *domain) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ struct list_head *pos; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ ++ list_for_each(pos, &vsi_domain->iommus) { ++ struct vsi_iommu *iommu; ++ ++ iommu = list_entry(pos, struct vsi_iommu, node); ++ if (!iommu) ++ continue; ++ ++ spin_lock(&iommu->lock); ++ ++ writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE); ++ writel(0, iommu->regs + VSI_MMU_FLUSH_BASE); ++ ++ spin_unlock(&iommu->lock); ++ } ++ ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++} ++EXPORT_SYMBOL_GPL(vsi_iommu_restore_ctx); ++ ++static int vsi_iommu_attach_device(struct iommu_domain *domain, ++ struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_iommu_priv_get(dev); ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ unsigned long flags, flags2; ++ int ret = 0; ++ ++ ret = pm_runtime_resume_and_get(iommu->dev); ++ if (ret < 0) ++ return ret; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ spin_lock_irqsave(&iommu->lock, flags2); ++ ++ vsi_iommu_enable(iommu, domain); ++ writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE); ++ writel(0, iommu->regs + VSI_MMU_FLUSH_BASE); ++ ++ list_del_init(&iommu->node); ++ list_add_tail(&iommu->node, &vsi_domain->iommus); ++ ++ iommu->domain = domain; ++ ++ spin_unlock_irqrestore(&iommu->lock, flags2); ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ pm_runtime_put_autosuspend(iommu->dev); ++ return ret; ++} ++ ++static void vsi_iommu_domain_free(struct iommu_domain *domain) ++{ ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain); ++ unsigned long flags; ++ int i; ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ ++ WARN_ON(!list_empty(&vsi_domain->iommus)); ++ ++ for (i = 0; i < NUM_DT_ENTRIES; i++) { ++ u32 dte = vsi_domain->dt[i]; ++ ++ if (vsi_dte_is_pt_valid(dte)) { ++ phys_addr_t pt_phys = vsi_dte_pt_address(dte); ++ u32 *page_table = phys_to_virt(pt_phys); ++ ++ dma_unmap_single(vsi_domain->dev, pt_phys, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++ iommu_free_pages(page_table); ++ } ++ } ++ ++ dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++ iommu_free_pages(vsi_domain->dt); ++ ++ dma_unmap_single(vsi_domain->dev, vsi_domain->pta_dma, ++ SPAGE_SIZE, DMA_TO_DEVICE); ++ iommu_free_pages(vsi_domain->pta); ++ ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ ++ kfree(vsi_domain); ++} ++ ++static struct iommu_device *vsi_iommu_probe_device(struct device *dev) ++{ ++ struct vsi_iommu *iommu = vsi_iommu_get_from_dev(dev); ++ struct device_link *link; ++ ++ link = device_link_add(dev, iommu->dev, ++ DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); ++ if (!link) ++ dev_err(dev, "Unable to link %s\n", dev_name(iommu->dev)); ++ ++ dev_iommu_priv_set(dev, iommu); ++ return &iommu->iommu; ++} ++ ++static void vsi_iommu_release_device(struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_iommu_priv_get(dev); ++ ++ device_link_remove(dev, iommu->dev); ++} ++ ++static int vsi_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args) ++{ ++ return iommu_fwspec_add_ids(dev, args->args, 1); ++} ++ ++static const struct iommu_ops vsi_iommu_ops = { ++ .identity_domain = &vsi_identity_domain, ++ .release_domain = &vsi_identity_domain, ++ .domain_alloc_paging = vsi_iommu_domain_alloc_paging, ++ .of_xlate = vsi_iommu_of_xlate, ++ .probe_device = vsi_iommu_probe_device, ++ .release_device = vsi_iommu_release_device, ++ .device_group = generic_single_device_group, ++ .owner = THIS_MODULE, ++ .default_domain_ops = &(const struct iommu_domain_ops) { ++ .attach_dev = vsi_iommu_attach_device, ++ .map_pages = vsi_iommu_map, ++ .unmap_pages = vsi_iommu_unmap, ++ .flush_iotlb_all = vsi_iommu_flush_tlb_all, ++ .iova_to_phys = vsi_iommu_iova_to_phys, ++ .free = vsi_iommu_domain_free, ++ } ++}; ++ ++static const struct of_device_id vsi_iommu_dt_ids[] = { ++ { ++ .compatible = "verisilicon,iommu-1.2", ++ }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, vsi_iommu_dt_ids); ++ ++static int vsi_iommu_probe(struct platform_device *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct vsi_iommu *iommu; ++ int err; ++ ++ iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); ++ if (!iommu) ++ return -ENOMEM; ++ ++ iommu->dev = dev; ++ spin_lock_init(&iommu->lock); ++ INIT_LIST_HEAD(&iommu->node); ++ ++ iommu->regs = devm_platform_ioremap_resource(pdev, 0); ++ if (IS_ERR(iommu->regs)) ++ return -ENOMEM; ++ ++ iommu->num_clocks = devm_clk_bulk_get_all(dev, &iommu->clocks); ++ if (iommu->num_clocks < 0) ++ return iommu->num_clocks; ++ ++ err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks); ++ if (err) ++ return err; ++ ++ iommu->irq = platform_get_irq(pdev, 0); ++ if (iommu->irq < 0) ++ return iommu->irq; ++ ++ err = devm_request_irq(iommu->dev, iommu->irq, vsi_iommu_irq, ++ IRQF_SHARED, dev_name(dev), iommu); ++ if (err) ++ goto err_unprepare_clocks; ++ ++ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); ++ platform_set_drvdata(pdev, iommu); ++ ++ pm_runtime_set_autosuspend_delay(dev, 100); ++ pm_runtime_use_autosuspend(dev); ++ pm_runtime_enable(dev); ++ ++ err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); ++ if (err) ++ goto err_runtime_disable; ++ ++ err = iommu_device_register(&iommu->iommu, &vsi_iommu_ops, dev); ++ if (err) ++ goto err_remove_sysfs; ++ ++ return 0; ++ ++err_remove_sysfs: ++ iommu_device_sysfs_remove(&iommu->iommu); ++err_runtime_disable: ++ pm_runtime_disable(dev); ++err_unprepare_clocks: ++ clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); ++ return err; ++} ++ ++static void vsi_iommu_shutdown(struct platform_device *pdev) ++{ ++ struct vsi_iommu *iommu = platform_get_drvdata(pdev); ++ ++ disable_irq(iommu->irq); ++ pm_runtime_force_suspend(&pdev->dev); ++} ++ ++static int __maybe_unused vsi_iommu_suspend(struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_get_drvdata(dev); ++ ++ vsi_iommu_disable(iommu); ++ ++ clk_bulk_disable(iommu->num_clocks, iommu->clocks); ++ ++ return 0; ++} ++ ++static int __maybe_unused vsi_iommu_resume(struct device *dev) ++{ ++ struct vsi_iommu *iommu = dev_get_drvdata(dev); ++ unsigned long flags, flags2; ++ int ret; ++ ++ ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks); ++ if (ret) ++ return ret; ++ ++ if (iommu->domain) { ++ struct vsi_iommu_domain *vsi_domain = to_vsi_domain(iommu->domain); ++ ++ spin_lock_irqsave(&vsi_domain->lock, flags); ++ spin_lock_irqsave(&iommu->lock, flags2); ++ vsi_iommu_enable(iommu, iommu->domain); ++ spin_unlock_irqrestore(&iommu->lock, flags2); ++ spin_unlock_irqrestore(&vsi_domain->lock, flags); ++ } ++ ++ return 0; ++} ++ ++static DEFINE_RUNTIME_DEV_PM_OPS(vsi_iommu_pm_ops, ++ vsi_iommu_suspend, vsi_iommu_resume, ++ NULL); ++ ++static struct platform_driver rockchip_vsi_iommu_driver = { ++ .probe = vsi_iommu_probe, ++ .shutdown = vsi_iommu_shutdown, ++ .driver = { ++ .name = "vsi_iommu", ++ .of_match_table = vsi_iommu_dt_ids, ++ .pm = pm_sleep_ptr(&vsi_iommu_pm_ops), ++ .suppress_bind_attrs = true, ++ }, ++}; ++module_platform_driver(rockchip_vsi_iommu_driver); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Benjamin Gaignard "); ++MODULE_DESCRIPTION("Verisilicon IOMMU driver"); +diff --git a/include/linux/vsi-iommu.h b/include/linux/vsi-iommu.h +new file mode 100644 +index 000000000000..111111111111 +--- /dev/null ++++ b/include/linux/vsi-iommu.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * verisilicon iommu: simple virtual address space management ++ * ++ * Copyright (c) 2025, Collabora ++ * ++ * Written by Benjamin Gaignard ++ */ ++ ++#ifndef _VSI_IOMMU_H_ ++#define _VSI_IOMMU_H_ ++ ++struct iommu_domain; ++ ++#if IS_ENABLED(CONFIG_VSI_IOMMU) ++void vsi_iommu_restore_ctx(struct iommu_domain *domain); ++#else ++static inline void vsi_iommu_restore_ctx(struct iommu_domain *domain) {} ++#endif ++ ++#endif +-- +Armbian + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Gaignard +Date: Wed, 7 Jan 2026 11:09:55 +0100 +Subject: media: verisilicon: AV1: Restore IOMMU context before decoding a + frame + +AV1 is a stateless decoder which means multiple AV1 bitstreams could be +decoded at the same time using the same hardware block. Before decoding +a frame it is needed to restore the iommu tables to avoid mixing decode +contexts. + +Signed-off-by: Benjamin Gaignard +Reviewed-by: Nicolas Dufresne +--- + drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c | 15 ++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +index 111111111111..222222222222 100644 +--- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c ++++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c +@@ -5,6 +5,9 @@ + * Author: Benjamin Gaignard + */ + ++#include ++#include ++ + #include + #include "hantro.h" + #include "hantro_v4l2.h" +@@ -2126,12 +2129,24 @@ rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx) + hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr); + } + ++static void rockchip_vpu981_av1_restore_iommu(struct hantro_ctx *ctx) ++{ ++ struct iommu_domain *domain; ++ ++ /* Before decoding any frame iommu context need to be restored */ ++ domain = iommu_get_domain_for_dev(ctx->dev->v4l2_dev.dev); ++ if (domain) ++ vsi_iommu_restore_ctx(domain); ++} ++ + int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx) + { + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_src; + int ret; + ++ rockchip_vpu981_av1_restore_iommu(ctx); ++ + hantro_start_prepare_run(ctx); + + ret = rockchip_vpu981_av1_dec_prepare_run(ctx); +-- +Armbian + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Gaignard +Date: Wed, 7 Jan 2026 11:09:56 +0100 +Subject: arm64: dts: rockchip: Add verisilicon IOMMU node on RK3588 + +Add the device tree node for the Verisilicon IOMMU present +in the RK3588 SoC. +This IOMMU handles address translation for the VPU hardware blocks. + +Signed-off-by: Benjamin Gaignard +--- + arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 11 ++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +index 111111111111..222222222222 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi ++++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +@@ -1428,6 +1428,17 @@ av1d: video-codec@fdc70000 { + clock-names = "aclk", "hclk"; + power-domains = <&power RK3588_PD_AV1>; + resets = <&cru SRST_A_AV1>, <&cru SRST_P_AV1>, <&cru SRST_A_AV1_BIU>, <&cru SRST_P_AV1_BIU>; ++ iommus = <&av1d_mmu>; ++ }; ++ ++ av1d_mmu: iommu@fdca0000 { ++ compatible = "rockchip,rk3588-av1-iommu", "verisilicon,iommu-1.2"; ++ reg = <0x0 0xfdca0000 0x0 0x600>; ++ interrupts = ; ++ clocks = <&cru ACLK_AV1>, <&cru PCLK_AV1>; ++ clock-names = "core", "iface"; ++ #iommu-cells = <0>; ++ power-domains = <&power RK3588_PD_AV1>; + }; + + vop: vop@fdd90000 { +-- +Armbian +