From 25b2037af22f36400c9004481b9e44c02c30c8e1 Mon Sep 17 00:00:00 2001
From: Zhen Chen <chenzhen@rock-chips.com>
Date: Fri, 10 Feb 2023 12:01:52 +0800
Subject: [PATCH] MALI: rockchip: upgrade bifrost DDK to g17p0-01eac0, from
 g15p0-01eac0

In addition, add some more modifications, according to
commit ccf3f0670c36 ("MALI: bifrost: from ARM: Remove references to PageMovable()").

Note, the corresponding mali_csffw.bin for DDK g15 MUST be used.

Change-Id: Ie233cd29d8d169202d5b80b00a97ccb90e6bd3f2
Signed-off-by: Zhen Chen <chenzhen@rock-chips.com>
---
 .../sysfs-device-mali-coresight-source        | 113 +++
 Documentation/csf_sync_state_dump.txt         | 111 +++
 .../devicetree/bindings/arm/mali-bifrost.txt  |   1 +
 .../bindings/arm/mali-coresight-source.txt    | 160 +++
 Documentation/dma-buf-test-exporter.txt       |   4 +-
 drivers/base/arm/Makefile                     |   9 +-
 drivers/base/arm/Mconfig                      |  64 --
 .../memory_group_manager.c                    |   4 +-
 drivers/gpu/arm/bifrost/Kbuild                |   2 +-
 drivers/gpu/arm/bifrost/Kconfig               |  71 +-
 drivers/gpu/arm/bifrost/Makefile              |  42 +-
 drivers/gpu/arm/bifrost/Mconfig               | 326 -------
 drivers/gpu/arm/bifrost/backend/gpu/Kbuild    |   8 +-
 .../backend/gpu/mali_kbase_irq_linux.c        |  10 +-
 .../bifrost/backend/gpu/mali_kbase_jm_as.c    |   8 +-
 .../bifrost/backend/gpu/mali_kbase_jm_hw.c    |  46 +-
 .../backend/gpu/mali_kbase_jm_internal.h      |  41 +-
 .../bifrost/backend/gpu/mali_kbase_jm_rb.c    | 113 +--
 .../bifrost/backend/gpu/mali_kbase_jm_rb.h    |  14 +-
 .../backend/gpu/mali_kbase_js_backend.c       |   5 +-
 .../backend/gpu/mali_kbase_model_dummy.c      |  73 +-
 .../backend/gpu/mali_kbase_model_dummy.h      |  29 +-
 .../gpu/mali_kbase_model_error_generator.c    |   2 +-
 .../backend/gpu/mali_kbase_model_linux.c      |  33 +-
 .../backend/gpu/mali_kbase_model_linux.h      | 125 ++-
 .../bifrost/backend/gpu/mali_kbase_pm_ca.c    |   4 +-
 .../backend/gpu/mali_kbase_pm_driver.c        |  82 +-
 .../backend/gpu/mali_kbase_pm_internal.h      |  23 +
 .../backend/gpu/mali_kbase_pm_mcu_states.h    |  16 +-
 .../backend/gpu/mali_kbase_pm_metrics.c       |   2 +-
 drivers/gpu/arm/bifrost/build.bp              |   5 +-
 .../context/backend/mali_kbase_context_csf.c  |   2 +
 .../arm/bifrost/context/mali_kbase_context.c  |  11 -
 drivers/gpu/arm/bifrost/csf/Kbuild            |  14 +-
 drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c  | 588 +++++------
 drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h  |  41 +-
 .../bifrost/csf/mali_kbase_csf_csg_debugfs.c  |  24 +-
 .../bifrost/csf/mali_kbase_csf_csg_debugfs.h  |   9 +-
 .../gpu/arm/bifrost/csf/mali_kbase_csf_defs.h | 105 +-
 .../arm/bifrost/csf/mali_kbase_csf_firmware.c | 288 +++++-
 .../arm/bifrost/csf/mali_kbase_csf_firmware.h |  56 +-
 .../csf/mali_kbase_csf_firmware_core_dump.c   | 807 ++++++++++++++++
 .../csf/mali_kbase_csf_firmware_core_dump.h   |  65 ++
 .../bifrost/csf/mali_kbase_csf_firmware_log.c |   4 +-
 .../bifrost/csf/mali_kbase_csf_firmware_log.h |   3 +
 .../csf/mali_kbase_csf_firmware_no_mali.c     |  93 +-
 .../csf/mali_kbase_csf_heap_context_alloc.c   |  82 +-
 .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c | 415 ++++++--
 .../gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h |  22 +-
 .../csf/mali_kbase_csf_mcu_shared_reg.c       | 815 ++++++++++++++++
 .../csf/mali_kbase_csf_mcu_shared_reg.h       | 139 +++
 .../bifrost/csf/mali_kbase_csf_registers.h    |  87 +-
 .../bifrost/csf/mali_kbase_csf_scheduler.c    | 212 +++-
 .../bifrost/csf/mali_kbase_csf_sync_debugfs.c | 788 +++++++++++++++
 .../bifrost/csf/mali_kbase_csf_sync_debugfs.h |  37 +
 .../bifrost/csf/mali_kbase_csf_tiler_heap.c   |  82 +-
 .../csf/mali_kbase_csf_tiler_heap_reclaim.c   |   4 +
 drivers/gpu/arm/bifrost/debug/Kbuild          |   3 +-
 .../backend/mali_kbase_debug_coresight_csf.c  | 851 ++++++++++++++++
 .../mali_kbase_debug_coresight_internal_csf.h | 182 ++++
 .../device/backend/mali_kbase_device_csf.c    |  24 +-
 .../device/backend/mali_kbase_device_hw_csf.c |   8 +-
 .../device/backend/mali_kbase_device_hw_jm.c  |   4 +-
 .../device/backend/mali_kbase_device_jm.c     |  20 +-
 .../arm/bifrost/device/mali_kbase_device.c    |   8 +
 .../device/mali_kbase_device_internal.h       |  12 +-
 .../mali_kbase_hwcnt_backend_csf_if_fw.c      |  12 +-
 .../backend/mali_kbase_hwcnt_backend_jm.c     |   5 +-
 .../ipa/backend/mali_kbase_ipa_counter_csf.c  |  45 +-
 .../ipa/backend/mali_kbase_ipa_counter_jm.c   |   5 +-
 drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c  |  30 +-
 .../arm/bifrost/ipa/mali_kbase_ipa_simple.c   |   6 +-
 .../gpu/arm/bifrost/jm/mali_kbase_jm_defs.h   |   2 +-
 drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h |  23 +-
 .../arm/bifrost/mali_base_hwconfig_features.h |  10 -
 .../arm/bifrost/mali_base_hwconfig_issues.h   | 164 +++-
 drivers/gpu/arm/bifrost/mali_kbase.h          |  30 +-
 .../arm/bifrost/mali_kbase_as_fault_debugfs.c |  10 +-
 .../gpu/arm/bifrost/mali_kbase_core_linux.c   |  59 +-
 .../gpu/arm/bifrost/mali_kbase_ctx_sched.c    |  41 +-
 .../gpu/arm/bifrost/mali_kbase_ctx_sched.h    |  12 +-
 drivers/gpu/arm/bifrost/mali_kbase_defs.h     |  34 +-
 drivers/gpu/arm/bifrost/mali_kbase_fence.h    |  91 +-
 .../gpu/arm/bifrost/mali_kbase_fence_ops.c    |  50 +-
 drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c |  53 +-
 drivers/gpu/arm/bifrost/mali_kbase_hw.c       |  12 +-
 .../gpu/arm/bifrost/mali_kbase_hwaccess_jm.h  |  17 +-
 drivers/gpu/arm/bifrost/mali_kbase_jd.c       |  21 +-
 drivers/gpu/arm/bifrost/mali_kbase_jm.c       |  18 +-
 drivers/gpu/arm/bifrost/mali_kbase_js.c       | 210 ++--
 .../gpu/arm/bifrost/mali_kbase_kinstr_jm.c    |   5 +
 .../arm/bifrost/mali_kbase_kinstr_prfcnt.c    | 515 ++++------
 drivers/gpu/arm/bifrost/mali_kbase_linux.h    |   4 +-
 drivers/gpu/arm/bifrost/mali_kbase_mem.c      | 407 ++++++--
 drivers/gpu/arm/bifrost/mali_kbase_mem.h      | 148 ++-
 .../gpu/arm/bifrost/mali_kbase_mem_linux.c    | 241 ++++-
 .../gpu/arm/bifrost/mali_kbase_mem_linux.h    |   2 +-
 .../gpu/arm/bifrost/mali_kbase_mem_migrate.c  | 347 ++++++-
 .../gpu/arm/bifrost/mali_kbase_mem_migrate.h  |   7 +-
 drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c |  84 +-
 drivers/gpu/arm/bifrost/mali_kbase_softjobs.c |  41 +-
 drivers/gpu/arm/bifrost/mali_kbase_vinstr.c   |   5 +
 .../bifrost/mmu/backend/mali_kbase_mmu_csf.c  |  13 +-
 .../bifrost/mmu/backend/mali_kbase_mmu_jm.c   |   6 +-
 drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c  | 914 +++++++++++++++---
 drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h  | 109 ++-
 .../gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h   |  45 +-
 .../bifrost/mmu/mali_kbase_mmu_hw_direct.c    |   8 +
 drivers/gpu/arm/bifrost/tests/Mconfig         |  73 --
 .../bifrost/tests/include/kutf/kutf_helpers.h |  27 +-
 .../gpu/arm/bifrost/tests/kutf/kutf_helpers.c |  14 +-
 .../arm/bifrost/tl/mali_kbase_timeline_io.c   |   5 +
 .../gpu/arm/bifrost/tl/mali_kbase_tlstream.h  |  12 +-
 .../arm/bifrost/tl/mali_kbase_tracepoints.c   |  32 +
 .../arm/bifrost/tl/mali_kbase_tracepoints.h   |  37 +
 drivers/hwtracing/coresight/mali/Kbuild       |  65 ++
 drivers/hwtracing/coresight/mali/Kconfig      |  47 +
 drivers/hwtracing/coresight/mali/Makefile     | 101 ++
 drivers/hwtracing/coresight/mali/build.bp     | 100 ++
 .../coresight/mali/coresight_mali_common.c    |  62 ++
 .../coresight/mali/coresight_mali_common.h    | 133 +++
 .../mali/sources/coresight_mali_sources.c     | 168 ++++
 .../mali/sources/coresight_mali_sources.h     |  94 ++
 .../mali/sources/ela/coresight-ela600.h       | 129 +++
 .../ela/coresight_mali_source_ela_core.c      | 666 +++++++++++++
 .../etm/coresight_mali_source_etm_core.c      | 280 ++++++
 .../itm/coresight_mali_source_itm_core.c      | 265 +++++
 .../linux/mali_kbase_debug_coresight_csf.h    | 241 +++++
 include/linux/version_compat_defs.h           |   6 +-
 .../backend/gpu/mali_kbase_model_dummy.h      |  13 +
 .../backend/gpu/mali_kbase_model_linux.h      |  38 +
 .../arm/bifrost/csf/mali_base_csf_kernel.h    |  18 +-
 .../arm/bifrost/csf/mali_kbase_csf_ioctl.h    |  10 +-
 .../gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h   |   1 -
 .../gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h  |  10 +-
 .../gpu/arm/bifrost/mali_kbase_hwcnt_reader.h |   4 +-
 .../uapi/gpu/arm/bifrost/mali_kbase_ioctl.h   | 276 +++---
 137 files changed, 11502 insertions(+), 2737 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-device-mali-coresight-source
 create mode 100644 Documentation/csf_sync_state_dump.txt
 create mode 100644 Documentation/devicetree/bindings/arm/mali-coresight-source.txt
 delete mode 100644 drivers/base/arm/Mconfig
 delete mode 100644 drivers/gpu/arm/bifrost/Mconfig
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c
 create mode 100644 drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h
 create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
 create mode 100644 drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h
 delete mode 100644 drivers/gpu/arm/bifrost/tests/Mconfig
 create mode 100644 drivers/hwtracing/coresight/mali/Kbuild
 create mode 100644 drivers/hwtracing/coresight/mali/Kconfig
 create mode 100644 drivers/hwtracing/coresight/mali/Makefile
 create mode 100644 drivers/hwtracing/coresight/mali/build.bp
 create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.c
 create mode 100644 drivers/hwtracing/coresight/mali/coresight_mali_common.h
 create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
 create mode 100644 drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h
 create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h
 create mode 100644 drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c
 create mode 100644 drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c
 create mode 100644 drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
 create mode 100644 include/linux/mali_kbase_debug_coresight_csf.h
 create mode 100644 include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h

diff --git a/Documentation/ABI/testing/sysfs-device-mali-coresight-source b/Documentation/ABI/testing/sysfs-device-mali-coresight-source
new file mode 100644
index 000000000000..a24a88a824e4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-device-mali-coresight-source
@@ -0,0 +1,113 @@
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation) and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program) and can also be obtained
+ * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor)
+ * Boston) MA  02110-1301) USA.
+ *
+ */
+
+What:		/sys/bus/coresight/devices/mali-source-etm/enable_source
+Description:
+        Attribute used to enable Coresight Source ETM.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/is_enabled
+Description:
+        Attribute used to check if Coresight Source ITM is enabled.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/trcconfigr
+Description:
+        Coresight Source ETM trace configuration to enable global
+        timestamping, and data value tracing.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/trctraceidr
+Description:
+        Coresight Source ETM trace ID.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/trcvdarcctlr
+Description:
+        Coresight Source ETM viewData include/exclude address
+        range comparators.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/trcviiectlr
+Description:
+        Coresight Source ETM viewInst include and exclude control.
+
+What:		/sys/bus/coresight/devices/mali-source-etm/trcstallctlr
+Description:
+        Coresight Source ETM stall control register.
+
+What:		/sys/bus/coresight/devices/mali-source-itm/enable_source
+Description:
+        Attribute used to enable Coresight Source ITM.
+
+What:		/sys/bus/coresight/devices/mali-source-itm/is_enabled
+Description:
+        Attribute used to check if Coresight Source ITM is enabled.
+
+What:		/sys/bus/coresight/devices/mali-source-itm/dwt_ctrl
+Description:
+        Coresight Source DWT configuration:
+            [0] = 1, enable cycle counter
+            [4:1] = 4, set PC sample rate pf 256 cycles
+            [8:5] = 1, set initial post count value
+            [9] = 1, select position of post count tap on the cycle counter
+            [10:11] = 1, enable sync packets
+            [12] = 1, enable periodic PC sample packets
+
+What:		/sys/bus/coresight/devices/mali-source-itm/itm_tcr
+Description:
+        Coresight Source ITM configuration:
+            [0] = 1, Enable ITM
+            [1] = 1, Enable Time stamp generation
+            [2] = 1, Enable sync packet transmission
+            [3] = 1, Enable HW event forwarding
+            [11:10] = 1, Generate TS request approx every 128 cycles
+            [22:16] = 1, Trace bus ID
+
+What:		/sys/bus/coresight/devices/mali-source-ela/enable_source
+Description:
+        Attribute used to enable Coresight Source ELA.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/is_enabled
+Description:
+        Attribute used to check if Coresight Source ELA is enabled.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/select
+Description:
+        Coresight Source ELA select trace mode:
+            [0], NONE
+            [1], JCN
+            [2], CEU_EXEC
+            [3], CEU_CMDS
+            [4], MCU_AHBP
+            [5], HOST_AXI
+            [6], NR_TRACEMODE
+
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/sigmask0
+Description:
+        Coresight Source ELA SIGMASK0 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/sigmask4
+Description:
+        Coresight Source ELA SIGMASK4 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/sigcomp0
+Description:
+        Coresight Source ELA SIGCOMP0 register set/get.
+        Refer to specification for more details.
+
+What:		/sys/bus/coresight/devices/mali-source-ela/sigcomp4
+Description:
+        Coresight Source ELA SIGCOMP4 register set/get.
+        Refer to specification for more details.
diff --git a/Documentation/csf_sync_state_dump.txt b/Documentation/csf_sync_state_dump.txt
new file mode 100644
index 000000000000..dc1e48774377
--- /dev/null
+++ b/Documentation/csf_sync_state_dump.txt
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+DebugFS interface:
+------------------
+
+A new per-kbase-context debugfs file called csf_sync has been implemented
+which captures the current KCPU & GPU queue state of the not-yet-completed
+operations and displayed through the debugfs file.
+This file is at:
+=======================================================
+/sys/kernel/debug/mali0/ctx/<pid>_<context id>/csf_sync
+=======================================================
+
+Output Format:
+----------------
+
+The csf_sync file contains important data for the currently active queues.
+This data is formatted into two segments, which are separated by a
+pipe character: the common properties and the operation-specific properties.
+
+Common Properties:
+------------------
+
+* Queue type: GPU or KCPU.
+* kbase context id and the queue id.
+* If the queue type is a GPU queue then the group handle is also noted,
+in the middle of the other two IDs. The slot value is also dumped.
+* Execution status, which can either be 'P' for pending or 'S' for started.
+* Command type is then output which indicates the type of dependency
+(i.e. wait or signal).
+* Object address which is a pointer to the sync object that the
+command operates on.
+* The live value, which is the value of the synchronization object
+at the time of dumping. This could help to determine why wait
+operations might be blocked.
+
+Operation-Specific Properties:
+------------------------------
+
+The operation-specific values for KCPU queue fence operations
+are as follows: a unique timeline name, timeline context, and a fence
+sequence number. The CQS WAIT and CQS SET are denoted in the sync dump
+as their OPERATION counterparts, and therefore show the same operation
+specific values; the argument value to wait on or set to, and operation type,
+being (by definition) op:gt and op:set for CQS_WAIT and CQS_SET respectively.
+
+There are only two operation-specific values for operations in GPU queues
+which are always shown; the argument value to wait on or set/add to,
+and the operation type (set/add) or wait condition (e.g. LE, GT, GE).
+
+Examples
+--------
+GPU Queue Example
+------------------
+
+The following output is of a GPU queue, from a process that has a KCTX ID of 52,
+is in Queue Group (CSG) 0, and has Queue ID 0. It has started and is waiting on
+the object at address 0x0000007f81ffc800. The live value is 0,
+as is the arg value. However, the operation "op" is GT, indicating it's waiting
+for the live value to surpass the arg value:
+
+======================================================================================================================================
+queue:GPU-52-0-0 exec:S cmd:SYNC_WAIT slot:4 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000
+======================================================================================================================================
+
+The following is an example of GPU queue dump, where the SYNC SET operation
+is blocked by the preceding SYNC WAIT operation. This shows two GPU queues,
+with the same KCTX ID of 8, Queue Group (CSG) 0, and Queue ID 0. The SYNC WAIT
+operation has started, while the SYNC SET is pending, blocked by the SYNC WAIT.
+Both operations are on the same slot, 2 and have live value of 0. The SYNC WAIT
+is waiting on the object at address 0x0000007f81ffc800, while the SYNC SET will
+set the object at address 0x00000000a3bad4fb when it is unblocked.
+The operation "op" is GT for the SYNC WAIT, indicating it's waiting for the
+live value to surpass the arg value, while the operation and arg value for the
+SYNC SET is "set" and "1" respectively:
+
+======================================================================================================================================
+queue:GPU-8-0-0 exec:S cmd:SYNC_WAIT slot:2 obj:0x0000007f81ffc800 live_value:0x0000000000000000 | op:gt arg_value:0x0000000000000000
+queue:GPU-8-0-0 exec:P cmd:SYNC_SET slot:2 obj:0x00000000a3bad4fb live_value:0x0000000000000000 | op:set arg_value:0x0000000000000001
+======================================================================================================================================
+
+KCPU Queue Example
+------------------
+
+The following is an example of a KCPU queue, from a process that has
+a KCTX ID of 0 and has Queue ID 1. It has started and is waiting on the
+object at address 0x0000007fbf6f2ff8. The live value is currently 0 with
+the "op" being GT indicating it is waiting on the live value to
+surpass the arg value.
+
+===============================================================================================================================
+queue:KCPU-0-1 exec:S cmd:CQS_WAIT_OPERATION obj:0x0000007fbf6f2ff8 live_value:0x0000000000000000 | op:gt arg_value: 0x00000000
+===============================================================================================================================
diff --git a/Documentation/devicetree/bindings/arm/mali-bifrost.txt b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
index 2b3b1d028ccd..caf2de5e47be 100644
--- a/Documentation/devicetree/bindings/arm/mali-bifrost.txt
+++ b/Documentation/devicetree/bindings/arm/mali-bifrost.txt
@@ -235,6 +235,7 @@ gpu@0xfc010000 {
     ...
     pbha {
         int_id_override = <2 0x32>, <9 0x05>, <16 0x32>;
+        propagate_bits = <0x03>;
     };
     ...
 };
diff --git a/Documentation/devicetree/bindings/arm/mali-coresight-source.txt b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt
new file mode 100644
index 000000000000..87a1ce3b3e85
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mali-coresight-source.txt
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+=====================================
+ARM CoreSight Mali Source integration
+=====================================
+
+See Documentation/trace/coresight/coresight.rst for detailed information
+about Coresight.
+
+This documentation will cover Mali specific devicetree integration.
+
+References to Sink ports are given as examples. Access to Sink is specific
+to an implementation and would require dedicated kernel modules.
+
+ARM Coresight Mali Source ITM
+=============================
+
+Required properties
+-------------------
+
+- compatible: Has to be "arm,coresight-mali-source-itm"
+- gpu : phandle to a Mali GPU definition
+- port:
+  - endpoint:
+    - remote-endpoint: phandle to a Coresight sink port
+
+Example
+-------
+
+mali-source-itm {
+  compatible = "arm,coresight-mali-source-itm";
+  gpu = <&gpu>;
+  port {
+    mali_source_itm_out_port0: endpoint {
+      remote-endpoint = <&mali_sink_in_port0>;
+    };
+  };
+};
+
+ARM Coresight Mali Source ETM
+=============================
+
+Required properties
+-------------------
+
+- compatible: Has to be "arm,coresight-mali-source-etm"
+- gpu : phandle to a Mali GPU definition
+- port:
+  - endpoint:
+    - remote-endpoint: phandle to a Coresight sink port
+
+Example
+-------
+
+mali-source-etm {
+  compatible = "arm,coresight-mali-source-etm";
+  gpu = <&gpu>;
+  port {
+    mali_source_etm_out_port0: endpoint {
+      remote-endpoint = <&mali_sink_in_port1>;
+    };
+  };
+};
+
+ARM Coresight Mali Source ELA
+=============================
+
+Required properties
+-------------------
+
+- compatible: Has to be "arm,coresight-mali-source-ela"
+- gpu : phandle to a Mali GPU definition
+- signal-groups: Signal groups indexed from 0 to 5.
+                 Used to configure the signal channels.
+  - sgN: Types of signals attached to one channel.
+         It can be more than one type in the case of
+         JCN request/response.
+
+         Types:
+           - "jcn-request": Can share the channel with "jcn-response"
+           - "jcn-response": Can share the channel with "jcn-request"
+           - "ceu-execution": Cannot share the channel with other types
+           - "ceu-commands": Cannot share the channel with other types
+           - "mcu-ahbp": Cannot share the channel with other types
+           - "host-axi": Cannot share the channel with other types
+
+
+         If the HW implementation shares a common channel
+         for JCN response and request (total of 4 channels),
+         Refer to:
+           - "Example: Shared JCN request/response channel"
+         Otherwise (total of 5 channels), refer to:
+           - "Example: Split JCN request/response channel"
+- port:
+  - endpoint:
+    - remote-endpoint: phandle to a Coresight sink port
+
+Example: Split JCN request/response channel
+--------------------------------------------
+
+This examples applies to implementations with a total of 5 signal groups,
+where JCN request and response are assigned to independent channels.
+
+mali-source-ela {
+  compatible = "arm,coresight-mali-source-ela";
+  gpu = <&gpu>;
+  signal-groups {
+    sg0 = "jcn-request";
+    sg1 = "jcn-response";
+    sg2 = "ceu-execution";
+    sg3 = "ceu-commands";
+    sg4 = "mcu-ahbp";
+    sg5 = "host-axi";
+  };
+  port {
+    mali_source_ela_out_port0: endpoint {
+      remote-endpoint = <&mali_sink_in_port2>;
+    };
+  };
+};
+
+Example: Shared JCN request/response channel
+--------------------------------------------
+
+This examples applies to implementations with a total of 4 signal groups,
+where JCN request and response are assigned to the same channel.
+
+mali-source-ela {
+  compatible = "arm,coresight-mali-source-ela";
+  gpu = <&gpu>;
+  signal-groups {
+    sg0 = "jcn-request", "jcn-response";
+    sg1 = "ceu-execution";
+    sg2 = "ceu-commands";
+    sg3 = "mcu-ahbp";
+    sg4 = "host-axi";
+  };
+  port {
+    mali_source_ela_out_port0: endpoint {
+      remote-endpoint = <&mali_sink_in_port1>;
+    };
+  };
+};
diff --git a/Documentation/dma-buf-test-exporter.txt b/Documentation/dma-buf-test-exporter.txt
index b01020c06751..70a92f7d3e28 100644
--- a/Documentation/dma-buf-test-exporter.txt
+++ b/Documentation/dma-buf-test-exporter.txt
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2013, 2020-2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2013, 2020-2022 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -38,5 +38,5 @@ The buffers support all of the dma_buf API, including mmap.
 
 It supports being compiled as a module both in-tree and out-of-tree.
 
-See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See include/uapi/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.h for the ioctl interface.
 See Documentation/dma-buf-sharing.txt for details on dma_buf.
diff --git a/drivers/base/arm/Makefile b/drivers/base/arm/Makefile
index c1a61a1106d0..cc4bde71d3e6 100644
--- a/drivers/base/arm/Makefile
+++ b/drivers/base/arm/Makefile
@@ -90,6 +90,12 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
 
 KBUILD_CFLAGS += -Wall -Werror
 
+ifeq ($(CONFIG_GCOV_KERNEL), y)
+    KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
+    KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+    EXTRA_CFLAGS += -DGCOV_PROFILE=1
+endif
+
 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
 KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
@@ -120,7 +126,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization
 # global variables.
 KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
 KBUILD_CFLAGS += -Wmissing-field-initializers
-KBUILD_CFLAGS += -Wtype-limits
+# -Wtype-limits must be disabled due to build failures on kernel 5.x
+KBUILD_CFLAGS += -Wno-type-limit
 KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
 
diff --git a/drivers/base/arm/Mconfig b/drivers/base/arm/Mconfig
deleted file mode 100644
index f7787f0ccd34..000000000000
--- a/drivers/base/arm/Mconfig
+++ /dev/null
@@ -1,64 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-menuconfig MALI_BASE_MODULES
-	bool "Mali Base extra modules"
-	default y if BACKEND_KERNEL
-	help
-	  Enable this option to build support for a Arm Mali base modules.
-	  Those modules provide extra features or debug interfaces and,
-	  are optional for the use of the Mali GPU modules.
-
-config DMA_SHARED_BUFFER_TEST_EXPORTER
-	bool "Build dma-buf framework test exporter module"
-	depends on MALI_BASE_MODULES
-	default y
-	help
-	  This option will build the dma-buf framework test exporter module.
-	  Usable to help test importers.
-
-	  Modules:
-	  - dma-buf-test-exporter.ko
-
-config MALI_MEMORY_GROUP_MANAGER
-	bool "Build Mali Memory Group Manager module"
-	depends on MALI_BASE_MODULES
-	default y
-	help
-	  This option will build the memory group manager module.
-	  This is an example implementation for allocation and release of pages
-	  for memory pools managed by Mali GPU device drivers.
-
-	  Modules:
-	  - memory_group_manager.ko
-
-config MALI_PROTECTED_MEMORY_ALLOCATOR
-	bool "Build Mali Protected Memory Allocator module"
-	depends on MALI_BASE_MODULES && GPU_HAS_CSF
-	default y
-	help
-	  This option will build the protected memory allocator module.
-	  This is an example implementation for allocation and release of pages
-	  of secure memory intended to be used by the firmware
-	  of Mali GPU device drivers.
-
-	  Modules:
-	  - protected_memory_allocator.ko
-
diff --git a/drivers/base/arm/memory_group_manager/memory_group_manager.c b/drivers/base/arm/memory_group_manager/memory_group_manager.c
index 825893e3cf8e..2acb9faf12d0 100644
--- a/drivers/base/arm/memory_group_manager/memory_group_manager.c
+++ b/drivers/base/arm/memory_group_manager/memory_group_manager.c
@@ -228,8 +228,8 @@ static int mgm_initialize_debugfs(struct mgm_groups *mgm_data)
 
 #define ORDER_SMALL_PAGE 0
 #define ORDER_LARGE_PAGE 9
-static void update_size(struct memory_group_manager_device *mgm_dev, int
-		group_id, int order, bool alloc)
+static void update_size(struct memory_group_manager_device *mgm_dev, unsigned int group_id,
+			int order, bool alloc)
 {
 	struct mgm_groups *data = mgm_dev->data;
 
diff --git a/drivers/gpu/arm/bifrost/Kbuild b/drivers/gpu/arm/bifrost/Kbuild
index 70f3997b2bd3..398e102a0af5 100644
--- a/drivers/gpu/arm/bifrost/Kbuild
+++ b/drivers/gpu/arm/bifrost/Kbuild
@@ -69,7 +69,7 @@ endif
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"g15p0-01eac0"'
+MALI_RELEASE_NAME ?= '"g17p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_BIFROST_DEBUG), y)
     MALI_UNIT_TEST = 1
diff --git a/drivers/gpu/arm/bifrost/Kconfig b/drivers/gpu/arm/bifrost/Kconfig
index 1bfb59ca14e2..e530e8c85b17 100644
--- a/drivers/gpu/arm/bifrost/Kconfig
+++ b/drivers/gpu/arm/bifrost/Kconfig
@@ -41,9 +41,30 @@ config MALI_PLATFORM_NAME
 	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
 	  exist.
 
-config MALI_REAL_HW
+choice
+	prompt "Mali HW backend"
 	depends on MALI_BIFROST
-	def_bool !MALI_BIFROST_NO_MALI
+	default MALI_REAL_HW
+
+config MALI_REAL_HW
+	bool "Enable build of Mali kernel driver for real HW"
+	depends on MALI_BIFROST
+	help
+	  This is the default HW backend.
+
+config MALI_BIFROST_NO_MALI
+	bool "Enable build of Mali kernel driver for No Mali"
+	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+
+endchoice
 
 menu "Platform specific options"
 source "drivers/gpu/arm/bifrost/platform/Kconfig"
@@ -91,6 +112,21 @@ config MALI_BIFROST_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
+config MALI_FW_CORE_DUMP
+	bool "Enable support for FW core dump"
+	depends on MALI_BIFROST && MALI_CSF_SUPPORT
+	default n
+	help
+	  Adds ability to request firmware core dump through the "fw_core_dump"
+	  debugfs file
+
+	  Example:
+	  * To explicitly request core dump:
+	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
+	  * To output current core dump (after explicitly requesting a core dump,
+	    or kernel driver reported an internal firmware error):
+	      cat /sys/kernel/debug/mali0/fw_core_dump
+
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_BIFROST && !MALI_CSF_SUPPORT
@@ -127,6 +163,11 @@ config MALI_DMA_BUF_LEGACY_COMPAT
 	  flushes in other drivers. This only has an effect for clients using
 	  UK 11.18 or older. For later UK versions it is not possible.
 
+config MALI_CORESIGHT
+	depends on MALI_BIFROST && MALI_CSF_SUPPORT && !MALI_BIFROST_NO_MALI
+	bool "Enable Kbase CoreSight tracing support"
+	default n
+
 menuconfig MALI_BIFROST_EXPERT
 	depends on MALI_BIFROST
 	bool "Enable Expert Settings"
@@ -174,18 +215,6 @@ config MALI_CORESTACK
 comment "Platform options"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
 
-config MALI_BIFROST_NO_MALI
-	bool "Enable No Mali"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  This can be used to test the driver in a simulated environment
-	  whereby the hardware is not physically present. If the hardware is physically
-	  present it will not be used. This can be used to test the majority of the
-	  driver without needing actual hardware or for software benchmarking.
-	  All calls to the simulated hardware will complete immediately as if the hardware
-	  completed the task.
-
 config MALI_BIFROST_ERROR_INJECT
 	bool "Enable No Mali error injection"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_BIFROST_NO_MALI
@@ -204,20 +233,6 @@ config MALI_GEM5_BUILD
 comment "Debug options"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
 
-config MALI_FW_CORE_DUMP
-        bool "Enable support for FW core dump"
-        depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT
-        default n
-        help
-          Adds ability to request firmware core dump
-
-          Example:
-          * To explicitly request core dump:
-                echo 1 >/sys/kernel/debug/mali0/fw_core_dump
-          * To output current core dump (after explicitly requesting a core dump,
-            or kernel driver reported an internal firmware error):
-                cat /sys/kernel/debug/mali0/fw_core_dump
-
 config MALI_BIFROST_DEBUG
 	bool "Enable debug build"
 	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
diff --git a/drivers/gpu/arm/bifrost/Makefile b/drivers/gpu/arm/bifrost/Makefile
index 3fb736d7950e..dfe96d8c37e7 100644
--- a/drivers/gpu/arm/bifrost/Makefile
+++ b/drivers/gpu/arm/bifrost/Makefile
@@ -39,17 +39,10 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
     CONFIG_MALI_ARBITRATION ?= n
     CONFIG_MALI_PARTITION_MANAGER ?= n
 
-    ifeq ($(origin CONFIG_MALI_ABITER_MODULES), undefined)
-        CONFIG_MALI_ARBITER_MODULES := $(CONFIG_MALI_ARBITRATION)
-    endif
-
-    ifeq ($(origin CONFIG_MALI_GPU_POWER_MODULES), undefined)
-        CONFIG_MALI_GPU_POWER_MODULES := $(CONFIG_MALI_ARBITRATION)
-    endif
-
     ifneq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
         # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI
         CONFIG_MALI_REAL_HW ?= y
+        CONFIG_MALI_CORESIGHT = n
     endif
 
     ifeq ($(CONFIG_MALI_BIFROST_DVFS),y)
@@ -64,10 +57,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
         CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
     endif
 
-    ifeq ($(CONFIG_XEN),y)
-        ifneq ($(CONFIG_MALI_ARBITER_SUPPORT), n)
-            CONFIG_MALI_XEN ?= m
-        endif
+    ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+        CONFIG_MALI_FW_CORE_DUMP ?= y
+        CONFIG_MALI_CORESIGHT ?= n
+    else
+        CONFIG_MALI_FW_CORE_DUMP ?= n
     endif
 
     #
@@ -76,12 +70,14 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
     ifeq ($(CONFIG_MALI_BIFROST_EXPERT), y)
         ifeq ($(CONFIG_MALI_BIFROST_NO_MALI), y)
             CONFIG_MALI_REAL_HW = n
+
         else
             # Prevent misuse when CONFIG_MALI_BIFROST_NO_MALI=n
             CONFIG_MALI_REAL_HW = y
             CONFIG_MALI_BIFROST_ERROR_INJECT = n
         endif
 
+
         ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
             # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
             CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
@@ -143,12 +139,11 @@ ifeq ($(CONFIG_MALI_BIFROST),m)
 else
     # Prevent misuse when CONFIG_MALI_BIFROST=n
     CONFIG_MALI_ARBITRATION = n
-    CONFIG_MALI_ARBITER_MODULES = n
-    CONFIG_MALI_GPU_POWER_MODULES = n
     CONFIG_MALI_KUTF = n
     CONFIG_MALI_KUTF_IRQ_TEST = n
     CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+    CONFIG_MALI_FW_CORE_DUMP = n
 endif
 
 # All Mali CONFIG should be listed here
@@ -158,8 +153,6 @@ CONFIGS := \
     CONFIG_MALI_BIFROST_GATOR_SUPPORT \
     CONFIG_MALI_ARBITER_SUPPORT \
     CONFIG_MALI_ARBITRATION \
-    CONFIG_MALI_ARBITER_MODULES \
-    CONFIG_MALI_GPU_POWER_MODULES \
     CONFIG_MALI_PARTITION_MANAGER \
     CONFIG_MALI_REAL_HW \
     CONFIG_MALI_GEM5_BUILD \
@@ -189,10 +182,14 @@ CONFIGS := \
     CONFIG_MALI_KUTF_IRQ_TEST \
     CONFIG_MALI_KUTF_CLK_RATE_TRACE \
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
-    CONFIG_MALI_XEN
+    CONFIG_MALI_XEN \
+    CONFIG_MALI_FW_CORE_DUMP \
+    CONFIG_MALI_CORESIGHT
 
 
-#
+THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+-include $(THIS_DIR)/../arbitration/Makefile
+
 # MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
 #
 # Generate the list of CONFIGs and values.
@@ -254,7 +251,8 @@ KBUILD_CFLAGS += -Wdisabled-optimization
 # global variables.
 KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
 KBUILD_CFLAGS += -Wmissing-field-initializers
-KBUILD_CFLAGS += -Wtype-limits
+# -Wtype-limits must be disabled due to build failures on kernel 5.x
+KBUILD_CFLAGS += -Wno-type-limit
 KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
 
@@ -263,6 +261,12 @@ KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
 # This warning is disabled to avoid build failures in some kernel versions
 KBUILD_CFLAGS += -Wno-ignored-qualifiers
 
+ifeq ($(CONFIG_GCOV_KERNEL),y)
+    KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
+    KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+    EXTRA_CFLAGS += -DGCOV_PROFILE=1
+endif
+
 all:
 	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
 
diff --git a/drivers/gpu/arm/bifrost/Mconfig b/drivers/gpu/arm/bifrost/Mconfig
deleted file mode 100644
index f812bcad639c..000000000000
--- a/drivers/gpu/arm/bifrost/Mconfig
+++ /dev/null
@@ -1,326 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-menuconfig MALI_BIFROST
-	bool "Mali Midgard series support"
-	default y
-	help
-	  Enable this option to build support for a ARM Mali Midgard GPU.
-
-	  To compile this driver as a module, choose M here:
-	  this will generate a single module, called mali_kbase.
-
-config MALI_PLATFORM_NAME
-	depends on MALI_BIFROST
-	string "Platform name"
-	default "hisilicon" if PLATFORM_HIKEY960
-	default "hisilicon" if PLATFORM_HIKEY970
-	default "devicetree"
-	help
-	  Enter the name of the desired platform configuration directory to
-	  include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must
-	  exist.
-
-	  When PLATFORM_CUSTOM is set, this needs to be set manually to
-	  pick up the desired platform files.
-
-config MALI_REAL_HW
-	bool
-	depends on MALI_BIFROST
-	default y
-	default n if NO_MALI
-
-config MALI_PLATFORM_DT_PIN_RST
-	bool "Enable Juno GPU Pin reset"
-	depends on MALI_BIFROST
-	default n
-	default y if BUSLOG
-	help
-	  Enables support for GPUs pin reset on Juno platforms.
-
-config MALI_CSF_SUPPORT
-	bool "Enable Mali CSF based GPU support"
-	depends on MALI_BIFROST
-	default y if GPU_HAS_CSF
-	help
-	  Enables support for CSF based GPUs.
-
-config MALI_BIFROST_DEVFREQ
-	bool "Enable devfreq support for Mali"
-	depends on MALI_BIFROST
-	default y
-	help
-	  Support devfreq for Mali.
-
-	  Using the devfreq framework and, by default, the simple on-demand
-	  governor, the frequency of Mali will be dynamically selected from the
-	  available OPPs.
-
-config MALI_BIFROST_DVFS
-	bool "Enable legacy DVFS"
-	depends on MALI_BIFROST && !MALI_BIFROST_DEVFREQ
-	default n
-	help
-	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
-
-config MALI_BIFROST_GATOR_SUPPORT
-	bool "Enable Streamline tracing support"
-	depends on MALI_BIFROST && !BACKEND_USER
-	default y
-	help
-	  Enables kbase tracing used by the Arm Streamline Performance Analyzer.
-	  The tracepoints are used to derive GPU activity charts in Streamline.
-
-config MALI_BIFROST_ENABLE_TRACE
-	bool "Enable kbase tracing"
-	depends on MALI_BIFROST
-	default y if MALI_BIFROST_DEBUG
-	default n
-	help
-	  Enables tracing in kbase. Trace log available through
-	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
-
-config MALI_ARBITER_SUPPORT
-	bool "Enable arbiter support for Mali"
-	depends on MALI_BIFROST && !MALI_CSF_SUPPORT
-	default n
-	help
-	  Enable support for the arbiter interface in the driver.
-	  This allows an external arbiter to manage driver access
-	  to GPU hardware in a virtualized environment
-
-	  If unsure, say N.
-
-config DMA_BUF_SYNC_IOCTL_SUPPORTED
-	bool "Enable Kernel DMA buffers support DMA_BUF_IOCTL_SYNC"
-	depends on MALI_BIFROST && BACKEND_KERNEL
-	default y
-
-config MALI_DMA_BUF_MAP_ON_DEMAND
-	bool "Enable map imported dma-bufs on demand"
-	depends on MALI_BIFROST
-	default n
-	default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
-	help
-	  This option will cause kbase to set up the GPU mapping of imported
-	  dma-buf when needed to run atoms. This is the legacy behavior.
-
-	  This is intended for testing and the option will get removed in the
-	  future.
-
-config MALI_DMA_BUF_LEGACY_COMPAT
-	bool "Enable legacy compatibility cache flush on dma-buf map"
-	depends on MALI_BIFROST && !MALI_DMA_BUF_MAP_ON_DEMAND
-	default n
-	help
-	  This option enables compatibility with legacy dma-buf mapping
-	  behavior, then the dma-buf is mapped on import, by adding cache
-	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
-	  including a cache flush.
-
-	  This option might work-around issues related to missing cache
-	  flushes in other drivers. This only has an effect for clients using
-	  UK 11.18 or older. For later UK versions it is not possible.
-
-menuconfig MALI_BIFROST_EXPERT
-	depends on MALI_BIFROST
-	bool "Enable Expert Settings"
-	default y
-	help
-	  Enabling this option and modifying the default settings may produce
-	  a driver with performance or other limitations.
-
-config MALI_MEMORY_FULLY_BACKED
-	bool "Enable memory fully physically-backed"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  This option enables full physical backing of all virtual
-	  memory allocations in the kernel. Notice that this build
-	  option only affects allocations of grow-on-GPU-page-fault
-	  memory.
-
-config MALI_CORESTACK
-	bool "Enable support of GPU core stack power control"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  Enabling this feature on supported GPUs will let the driver powering
-	  on/off the GPU core stack independently without involving the Power
-	  Domain Controller. This should only be enabled on platforms which
-	  integration of the PDC to the Mali GPU is known to be problematic.
-	  This feature is currently only supported on t-Six and t-HEx GPUs.
-
-	  If unsure, say N.
-
-config MALI_FW_CORE_DUMP
-	bool "Enable support for FW core dump"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && MALI_CSF_SUPPORT
-	default n
-	help
-	  Adds ability to request firmware core dump
-
-	  Example:
-	  * To explicitly request core dump:
-	  echo 1 >/sys/kernel/debug/mali0/fw_core_dump
-	  * To output current core dump (after explicitly requesting a core dump,
-	  or kernel driver reported an internal firmware error):
-	  cat /sys/kernel/debug/mali0/fw_core_dump
-
-choice
-	prompt "Error injection level"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default MALI_ERROR_INJECT_NONE
-	help
-	  Enables insertion of errors to test module failure and recovery mechanisms.
-
-config MALI_ERROR_INJECT_NONE
-	bool "disabled"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	help
-	  Error injection is disabled.
-
-config MALI_ERROR_INJECT_TRACK_LIST
-	bool "error track list"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI
-	help
-	  Errors to inject are pre-configured by the user.
-
-config MALI_ERROR_INJECT_RANDOM
-	bool "random error injection"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && NO_MALI
-	help
-	  Injected errors are random, rather than user-driven.
-
-endchoice
-
-config MALI_ERROR_INJECT_ON
-	string
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default "0" if MALI_ERROR_INJECT_NONE
-	default "1" if MALI_ERROR_INJECT_TRACK_LIST
-	default "2" if MALI_ERROR_INJECT_RANDOM
-
-config MALI_BIFROST_ERROR_INJECT
-	bool
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default y if !MALI_ERROR_INJECT_NONE
-
-config MALI_GEM5_BUILD
-	bool "Enable build of Mali kernel driver for GEM5"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  This option is to do a Mali GEM5 build.
-	  If unsure, say N.
-
-config MALI_BIFROST_DEBUG
-	bool "Enable debug build"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default y if DEBUG
-	default n
-	help
-	  Select this option for increased checking and reporting of errors.
-
-config MALI_BIFROST_FENCE_DEBUG
-	bool "Enable debug sync fence usage"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default y if MALI_BIFROST_DEBUG
-	help
-	  Select this option to enable additional checking and reporting on the
-	  use of sync fences in the Mali driver.
-
-	  This will add a 3s timeout to all sync fence waits in the Mali
-	  driver, so that when work for Mali has been waiting on a sync fence
-	  for a long time a debug message will be printed, detailing what fence
-	  is causing the block, and which dependent Mali atoms are blocked as a
-	  result of this.
-
-	  The timeout can be changed at runtime through the js_soft_timeout
-	  device attribute, where the timeout is specified in milliseconds.
-
-config MALI_BIFROST_SYSTEM_TRACE
-	bool "Enable system event tracing support"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default y if MALI_BIFROST_DEBUG
-	default n
-	help
-	  Choose this option to enable system trace events for each
-	  kbase event. This is typically used for debugging but has
-	  minimal overhead when not in use. Enable only if you know what
-	  you are doing.
-
-# Instrumentation options.
-
-# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig.
-# config MALI_BIFROST_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
-# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig.
-# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig.
-
-config MALI_JOB_DUMP
-	bool "Enable system level support needed for job dumping"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  Choose this option to enable system level support needed for
-	  job dumping. This is typically used for instrumentation but has
-	  minimal overhead when not in use. Enable only if you know what
-	  you are doing.
-
-config MALI_PWRSOFT_765
-	bool "Enable workaround for PWRSOFT-765"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	help
-	  PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged
-	  in kernel v4.10, however if backported into the kernel then this
-	  option must be manually selected.
-
-	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
-	  changes have been backported say Y to avoid compilation errors.
-
-
-config MALI_HW_ERRATA_1485982_NOT_AFFECTED
-	bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT
-	default n
-	default y if PLATFORM_JUNO
-	help
-	  This option disables the default workaround for GPU2017-1336. The
-	  workaround keeps the L2 cache powered up except for powerdown and reset.
-
-	  The workaround introduces a limitation that will prevent the running of
-	  protected mode content on fully coherent platforms, as the switch to IO
-	  coherency mode requires the L2 to be turned off.
-
-config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
-	bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
-	depends on MALI_BIFROST && MALI_BIFROST_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
-	default n
-	help
-	  This option uses an alternative workaround for GPU2017-1336. Lowering
-	  the GPU clock to a, platform specific, known good frequeuncy before
-	  powering down the L2 cache. The clock can be specified in the device
-	  tree using the property, opp-mali-errata-1485982. Otherwise the
-	  slowest clock will be selected.
-
-
-source "kernel/drivers/gpu/arm/arbitration/Mconfig"
-source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
index 7eec91ff6631..efebc8a544d1 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/bifrost/backend/gpu/Kbuild
@@ -47,8 +47,12 @@ endif
 bifrost_kbase-$(CONFIG_MALI_BIFROST_DEVFREQ) += \
     backend/gpu/mali_kbase_devfreq.o
 
-# Dummy model
+ifneq ($(CONFIG_MALI_REAL_HW),y)
+    bifrost_kbase-y += backend/gpu/mali_kbase_model_linux.o
+endif
+
+# NO_MALI Dummy model interface
 bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
-bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
 # HW error simulation
 bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
+
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
index 15999cbc9126..eb63b2c56c3d 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_irq_linux.c
@@ -25,12 +25,12 @@
 
 #include <linux/interrupt.h>
 
-#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 
 /* GPU IRQ Tags */
-#define	JOB_IRQ_TAG	0
-#define MMU_IRQ_TAG	1
-#define GPU_IRQ_TAG	2
+#define JOB_IRQ_TAG 0
+#define MMU_IRQ_TAG 1
+#define GPU_IRQ_TAG 2
 
 static void *kbase_tag(void *ptr, u32 tag)
 {
@@ -500,4 +500,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
 
-#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
index 888aa59421a7..258dc6dac6c5 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_as.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -67,9 +67,8 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
 	kbase_js_runpool_inc_context_count(kbdev, kctx);
 }
 
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js)
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+				 unsigned int js)
 {
 	int i;
 
@@ -240,4 +239,3 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
 
 	return true;
 }
-
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
index e17014e45f6b..ab27e8bde40e 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_hw.c
@@ -44,9 +44,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
 static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
 				const u64 affinity, const u64 limited_core_mask);
 
-static u64 kbase_job_write_affinity(struct kbase_device *kbdev,
-				base_jd_core_req core_req,
-				int js, const u64 limited_core_mask)
+static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req,
+				    unsigned int js, const u64 limited_core_mask)
 {
 	u64 affinity;
 	bool skip_affinity_check = false;
@@ -191,7 +190,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
 	return jc;
 }
 
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
 {
 	struct kbase_context *kctx;
 	u32 cfg;
@@ -344,10 +343,8 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
  * work out the best estimate (which might still result in an over-estimate to
  * the calculated time spent)
  */
-static void kbasep_job_slot_update_head_start_timestamp(
-						struct kbase_device *kbdev,
-						int js,
-						ktime_t end_timestamp)
+static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js,
+							ktime_t end_timestamp)
 {
 	ktime_t timestamp_diff;
 	struct kbase_jd_atom *katom;
@@ -377,8 +374,7 @@ static void kbasep_job_slot_update_head_start_timestamp(
  * Make a tracepoint call to the instrumentation module informing that
  * softstop happened on given lpu (job slot).
  */
-static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
-					int js)
+static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js)
 {
 	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
 		kbdev,
@@ -387,7 +383,6 @@ static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev,
 
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
 {
-	int i;
 	u32 count = 0;
 	ktime_t end_timestamp;
 
@@ -398,6 +393,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 	end_timestamp = ktime_get_raw();
 
 	while (done) {
+		unsigned int i;
 		u32 failed = done >> 16;
 
 		/* treat failed slots as finished slots */
@@ -407,8 +403,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 		 * numbered interrupts before the higher numbered ones.
 		 */
 		i = ffs(finished) - 1;
-		if (WARN(i < 0, "%s: called without receiving any interrupts\n", __func__))
-			break;
 
 		do {
 			int nr_done;
@@ -607,11 +601,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 	KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
 }
 
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
-					int js,
-					u32 action,
-					base_jd_core_req core_reqs,
-					struct kbase_jd_atom *target_katom)
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+						 u32 action, base_jd_core_req core_reqs,
+						 struct kbase_jd_atom *target_katom)
 {
 #if KBASE_KTRACE_ENABLE
 	u32 status_reg_before;
@@ -669,6 +661,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 		struct kbase_context *head_kctx;
 
 		head = kbase_gpu_inspect(kbdev, js, 0);
+		if (unlikely(!head)) {
+			dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+			return;
+		}
 		head_kctx = head->kctx;
 
 		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
@@ -737,7 +733,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
-	int i;
+	unsigned int i;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -749,7 +745,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 				struct kbase_jd_atom *target_katom)
 {
 	struct kbase_device *kbdev;
-	int target_js = target_katom->slot_nr;
+	unsigned int target_js = target_katom->slot_nr;
 	int i;
 	bool stop_sent = false;
 
@@ -927,8 +923,8 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term);
  *
  * Where possible any job in the next register is evicted before the soft-stop.
  */
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
-			struct kbase_jd_atom *target_katom, u32 sw_flags)
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+				     struct kbase_jd_atom *target_katom, u32 sw_flags)
 {
 	dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
 		target_katom, sw_flags, js);
@@ -948,8 +944,8 @@ void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
 }
 
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
-				struct kbase_jd_atom *target_katom)
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+			     struct kbase_jd_atom *target_katom)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
 	bool stopped;
@@ -1258,7 +1254,7 @@ static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
 
 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
 {
-	int i;
+	unsigned int i;
 	int pending_jobs = 0;
 
 	/* Count the number of jobs */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
index 1ebb8434046c..e4cff1f1e59c 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_internal.h
@@ -33,21 +33,6 @@
 #include <backend/gpu/mali_kbase_jm_rb.h>
 #include <device/mali_kbase_device.h>
 
-/**
- * kbase_job_submit_nolock() - Submit a job to a certain job-slot
- * @kbdev:	Device pointer
- * @katom:	Atom to submit
- * @js:		Job slot to submit on
- *
- * The caller must check kbasep_jm_is_submit_slots_free() != false before
- * calling this.
- *
- * The following locking conditions are made on the caller:
- * - it must hold the hwaccess_lock
- */
-void kbase_job_submit_nolock(struct kbase_device *kbdev,
-					struct kbase_jd_atom *katom, int js);
-
 /**
  * kbase_job_done_slot() - Complete the head job on a particular job-slot
  * @kbdev:		Device pointer
@@ -60,17 +45,16 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
 					u64 job_tail, ktime_t *end_timestamp);
 
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
-static inline char *kbasep_make_job_slot_string(int js, char *js_string,
-						size_t js_size)
+static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
 {
-	snprintf(js_string, js_size, "job_slot_%i", js);
+	snprintf(js_string, js_size, "job_slot_%u", js);
 	return js_string;
 }
 #endif
 
 #if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
-						struct kbase_context *kctx)
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js,
+				       struct kbase_context *kctx)
 {
 	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
 }
@@ -90,7 +74,7 @@ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
  *
  * Return: 0 if the job was successfully submitted to hardware, an error otherwise.
  */
-int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js);
 
 #if !MALI_USE_CSF
 /**
@@ -106,11 +90,9 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
  * The following locking conditions are made on the caller:
  * - it must hold the hwaccess_lock
  */
-void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
-					int js,
-					u32 action,
-					base_jd_core_req core_reqs,
-					struct kbase_jd_atom *target_katom);
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
+						 u32 action, base_jd_core_req core_reqs,
+						 struct kbase_jd_atom *target_katom);
 #endif /* !MALI_USE_CSF */
 
 /**
@@ -134,11 +116,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
  *
  * Return: true if an atom was stopped, false otherwise
  */
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					int js,
-					struct kbase_jd_atom *katom,
-					u32 action);
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+				       unsigned int js, struct kbase_jd_atom *katom, u32 action);
 
 /**
  * kbase_job_slot_init - Initialise job slot framework
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
index e5af4ca8fc43..388b37f36a9d 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.c
@@ -93,9 +93,8 @@ static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
  *
  * Return: Atom removed from ringbuffer
  */
-static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
-						int js,
-						ktime_t *end_timestamp)
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, unsigned int js,
+						    ktime_t *end_timestamp)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 	struct kbase_jd_atom *katom;
@@ -118,8 +117,7 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
 	return katom;
 }
 
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
-					int idx)
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
@@ -131,8 +129,7 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
 	return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
 }
 
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
-					int js)
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js)
 {
 	struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
 
@@ -144,12 +141,13 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
 
 bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
 {
-	int js;
-	int i;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int i;
+
 		for (i = 0; i < SLOT_RB_SIZE; i++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
 
@@ -160,7 +158,7 @@ bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
 	return false;
 }
 
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js)
 {
 	int nr = 0;
 	int i;
@@ -178,7 +176,7 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
 	return nr;
 }
 
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js)
 {
 	int nr = 0;
 	int i;
@@ -193,8 +191,8 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
 	return nr;
 }
 
-static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
-				enum kbase_atom_gpu_rb_state min_rb_state)
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, unsigned int js,
+					  enum kbase_atom_gpu_rb_state min_rb_state)
 {
 	int nr = 0;
 	int i;
@@ -244,9 +242,11 @@ static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure)
 static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
 		bool secure)
 {
-	int js, i;
+	unsigned int js;
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		int i;
+
 		for (i = 0; i < SLOT_RB_SIZE; i++) {
 			struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
 					js, i);
@@ -261,7 +261,7 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev,
 	return false;
 }
 
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -430,9 +430,9 @@ static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
  *
  * Return: true if any slots other than @js are busy, false otherwise
  */
-static inline bool other_slots_busy(struct kbase_device *kbdev, int js)
+static inline bool other_slots_busy(struct kbase_device *kbdev, unsigned int js)
 {
-	int slot;
+	unsigned int slot;
 
 	for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) {
 		if (slot == js)
@@ -844,7 +844,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev,
 
 void kbase_backend_slot_update(struct kbase_device *kbdev)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -1013,25 +1013,25 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 					kbase_pm_request_gpu_cycle_counter_l2_is_on(
 									kbdev);
 
-				if (!kbase_job_hw_submit(kbdev, katom[idx], js))
+				if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
 					katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
+
+					/* Inform power management at start/finish of
+					 * atom so it can update its GPU utilisation
+					 * metrics.
+					 */
+					kbase_pm_metrics_update(kbdev,
+							&katom[idx]->start_timestamp);
+
+					/* Inform platform at start/finish of atom */
+					kbasep_platform_event_atom_submit(katom[idx]);
+				}
 				else
 					break;
 
 				/* ***TRANSITION TO HIGHER STATE*** */
 				fallthrough;
 			case KBASE_ATOM_GPU_RB_SUBMITTED:
-
-				/* Inform power management at start/finish of
-				 * atom so it can update its GPU utilisation
-				 * metrics.
-				 */
-				kbase_pm_metrics_update(kbdev,
-						&katom[idx]->start_timestamp);
-
-				/* Inform platform at start/finish of atom */
-				kbasep_platform_event_atom_submit(katom[idx]);
-
 				break;
 
 			case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
@@ -1111,8 +1111,7 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
  *
  * Return: true if an atom was evicted, false otherwise.
  */
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
-				u32 completion_code)
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code)
 {
 	struct kbase_jd_atom *katom;
 	struct kbase_jd_atom *next_katom;
@@ -1120,6 +1119,10 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	katom = kbase_gpu_inspect(kbdev, js, 0);
+	if (!katom) {
+		dev_err(kbdev->dev, "Can't get a katom from js(%u)\n", js);
+		return false;
+	}
 	next_katom = kbase_gpu_inspect(kbdev, js, 1);
 
 	if (next_katom &&
@@ -1184,13 +1187,18 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
  * on the HW and returned to the JS.
  */
 
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
-				u32 completion_code,
-				u64 job_tail,
-				ktime_t *end_timestamp)
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+			   u64 job_tail, ktime_t *end_timestamp)
 {
 	struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
-	struct kbase_context *kctx = katom->kctx;
+	struct kbase_context *kctx = NULL;
+
+	if (unlikely(!katom)) {
+		dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
+		return;
+	}
+
+	kctx = katom->kctx;
 
 	dev_dbg(kbdev->dev,
 		"Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n",
@@ -1243,7 +1251,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 		}
 	} else if (completion_code != BASE_JD_EVENT_DONE) {
 		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
-		int i;
+		unsigned int i;
 
 		if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) {
 			dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
@@ -1388,7 +1396,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
 
 void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -1416,7 +1424,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
 				     kbase_gpu_in_protected_mode(kbdev));
 				WARN(!(kbase_jd_katom_is_protected(katom) && js == 0) &&
 					     kbase_jd_katom_is_protected(katom),
-				     "Protected atom on JS%d not supported", js);
+				     "Protected atom on JS%u not supported", js);
 			}
 			if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
 			    !kbase_ctx_flag(katom->kctx, KCTX_DYING))
@@ -1512,10 +1520,8 @@ static bool should_stop_next_atom(struct kbase_device *kbdev,
 	return ret;
 }
 
-static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
-					int js,
-					struct kbase_jd_atom *katom,
-					u32 action)
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int js,
+				       struct kbase_jd_atom *katom, u32 action)
 {
 	struct kbase_context *kctx = katom->kctx;
 	u32 hw_action = action & JS_COMMAND_MASK;
@@ -1559,11 +1565,8 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
 	return -1;
 }
 
-bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					int js,
-					struct kbase_jd_atom *katom,
-					u32 action)
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx,
+				       unsigned int js, struct kbase_jd_atom *katom, u32 action)
 {
 	struct kbase_jd_atom *katom_idx0;
 	struct kbase_context *kctx_idx0 = NULL;
@@ -1816,7 +1819,7 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
 void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 {
 	unsigned long flags;
-	int js;
+	unsigned int js;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
@@ -1831,12 +1834,10 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 									idx);
 
 			if (katom)
-				dev_info(kbdev->dev,
-				"  js%d idx%d : katom=%pK gpu_rb_state=%d\n",
-				js, idx, katom, katom->gpu_rb_state);
+				dev_info(kbdev->dev, "  js%u idx%d : katom=%pK gpu_rb_state=%d\n",
+					 js, idx, katom, katom->gpu_rb_state);
 			else
-				dev_info(kbdev->dev, "  js%d idx%d : empty\n",
-								js, idx);
+				dev_info(kbdev->dev, "  js%u idx%d : empty\n", js, idx);
 		}
 	}
 
@@ -1845,7 +1846,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 
 void kbase_backend_slot_kctx_purge_locked(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
-	int js;
+	unsigned int js;
 	bool tracked = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
index d3ff203762f9..32be0bf44655 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_jm_rb.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,8 +40,7 @@
  *
  * Return: true if job evicted from NEXT registers, false otherwise
  */
-bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
-				u32 completion_code);
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 completion_code);
 
 /**
  * kbase_gpu_complete_hw - Complete an atom on job slot js
@@ -53,10 +52,8 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js,
  *                   completed
  * @end_timestamp:   Time of completion
  */
-void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
-				u32 completion_code,
-				u64 job_tail,
-				ktime_t *end_timestamp);
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, unsigned int js, u32 completion_code,
+			   u64 job_tail, ktime_t *end_timestamp);
 
 /**
  * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer
@@ -68,8 +65,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
  * Return:  The atom at that position in the ringbuffer
  *          or NULL if no atom present
  */
-struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
-					int idx);
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, unsigned int js, int idx);
 
 /**
  * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
index a2f824da5e04..cbc88f91a400 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -91,7 +91,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 	struct kbase_device *kbdev;
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_backend_data *backend;
-	int s;
+	unsigned int s;
 	bool reset_needed = false;
 
 	KBASE_DEBUG_ASSERT(timer != NULL);
@@ -375,4 +375,3 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev)
 
 	backend->timeouts_updated = true;
 }
-
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
index 1a0209f702ac..19c345341ea9 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.c
@@ -62,8 +62,9 @@
  *      document
  */
 #include <mali_kbase.h>
+#include <device/mali_kbase_device.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
 #include <mali_kbase_mem_linux.h>
 
 #if MALI_USE_CSF
@@ -319,7 +320,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0,
 		.gpu_features_hi = 0,
-		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX,
 		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
 	},
 	{
@@ -337,21 +338,6 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
 		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
 	},
-	{
-		.name = "tDUx",
-		.gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
-		.as_present = 0xFF,
-		.thread_max_threads = 0x180,
-		.thread_max_workgroup_size = 0x180,
-		.thread_max_barrier_size = 0x180,
-		.thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
-		.tiler_features = 0x809,
-		.mmu_features = 0x2830,
-		.gpu_features_lo = 0,
-		.gpu_features_hi = 0,
-		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
-		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
-	},
 	{
 		.name = "tODx",
 		.gpu_id = GPU_ID2_MAKE(10, 8, 0, 2, 0, 0, 0),
@@ -364,7 +350,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0,
 		.gpu_features_hi = 0,
-		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX,
 		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
 	},
 	{
@@ -412,7 +398,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0xf,
 		.gpu_features_hi = 0,
-		.shader_present = 0xFF,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX,
 		.stack_present = 0xF,
 	},
 	{
@@ -428,7 +414,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0xf,
 		.gpu_features_hi = 0,
-		.shader_present = 0xFF,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX,
 		.stack_present = 0xF,
 	},
 };
@@ -530,17 +516,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
 		(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
 
 	/* Currently only primary counter blocks are supported */
-	if (WARN_ON(event_index >= 64))
+	if (WARN_ON(event_index >=
+		    (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE)))
 		return 0;
 
 	/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
 	 * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
 	 * IPA counters. If selected, the value returned for them will be zero.
 	 */
-	if (WARN_ON(event_index <= 3))
+	if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))
 		return 0;
 
-	event_index -= 4;
+	event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
 
 	spin_lock_irqsave(&performance_counters.access_lock, flags);
 
@@ -736,7 +723,7 @@ void gpu_model_glb_request_job_irq(void *model)
 	spin_lock_irqsave(&hw_error_status.access_lock, flags);
 	hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
 	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
-	gpu_device_raise_irq(model, GPU_DUMMY_JOB_IRQ);
+	gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ);
 }
 #endif /* !MALI_USE_CSF */
 
@@ -768,7 +755,7 @@ static void init_register_statuses(struct dummy_model_t *dummy)
 	performance_counters.time = 0;
 }
 
-static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
+static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot)
 {
 	lockdep_assert_held(&hw_error_status.access_lock);
 
@@ -1101,7 +1088,7 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
 	return ret;
 }
 
-void *midgard_model_create(const void *config)
+void *midgard_model_create(struct kbase_device *kbdev)
 {
 	struct dummy_model_t *dummy = NULL;
 
@@ -1118,7 +1105,12 @@ void *midgard_model_create(const void *config)
 			GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
 		performance_counters.shader_present = get_implementation_register(
 			GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
+
+		gpu_device_set_data(dummy, kbdev);
+
+		dev_info(kbdev->dev, "Using Dummy Model");
 	}
+
 	return dummy;
 }
 
@@ -1134,7 +1126,7 @@ static void midgard_model_get_outputs(void *h)
 	lockdep_assert_held(&hw_error_status.access_lock);
 
 	if (hw_error_status.job_irq_status)
-		gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
+		gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ);
 
 	if ((dummy->power_changed && dummy->power_changed_mask) ||
 	    (dummy->reset_completed & dummy->reset_completed_mask) ||
@@ -1145,10 +1137,10 @@ static void midgard_model_get_outputs(void *h)
 	    (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
 #endif
 	    (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
-		gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
+		gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ);
 
 	if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
-		gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
+		gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ);
 }
 
 static void midgard_model_update(void *h)
@@ -1215,7 +1207,7 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
 	}
 }
 
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
+void midgard_model_write_reg(void *h, u32 addr, u32 value)
 {
 	unsigned long flags;
 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -1225,7 +1217,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
 #if !MALI_USE_CSF
 	if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
 			(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
-		int slot_idx = (addr >> 7) & 0xf;
+		unsigned int slot_idx = (addr >> 7) & 0xf;
 
 		KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
 		if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
@@ -1607,11 +1599,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
 	midgard_model_update(dummy);
 	midgard_model_get_outputs(dummy);
 	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
-
-	return 1;
 }
 
-u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 {
 	unsigned long flags;
 	struct dummy_model_t *dummy = (struct dummy_model_t *)h;
@@ -2051,8 +2041,6 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
 
 	spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
 	CSTD_UNUSED(dummy);
-
-	return 1;
 }
 
 static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
@@ -2228,3 +2216,16 @@ int gpu_model_control(void *model,
 
 	return 0;
 }
+
+/**
+ * kbase_is_gpu_removed - Has the GPU been removed.
+ * @kbdev:    Kbase device pointer
+ *
+ * This function would return true if the GPU has been removed.
+ * It is stubbed here
+ * Return: Always false
+ */
+bool kbase_is_gpu_removed(struct kbase_device *kbdev)
+{
+	return false;
+}
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
index 7d370de9f601..84842291c0f7 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -21,11 +21,24 @@
 
 /*
  * Dummy Model interface
+ *
+ * Support for NO_MALI dummy Model interface.
+ *
+ * +-----------------------------------+
+ * | Kbase read/write/IRQ              |
+ * +-----------------------------------+
+ * | Model Linux Framework             |
+ * +-----------------------------------+
+ * | Model Dummy interface definitions |
+ * +-----------------+-----------------+
+ * | Fake R/W        | Fake IRQ        |
+ * +-----------------+-----------------+
  */
 
 #ifndef _KBASE_MODEL_DUMMY_H_
 #define _KBASE_MODEL_DUMMY_H_
 
+#include <uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h>
 #include <uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h>
 
 #define model_error_log(module, ...) pr_err(__VA_ARGS__)
@@ -154,11 +167,6 @@ struct gpu_model_prfcnt_en {
 	u32 shader;
 };
 
-void *midgard_model_create(const void *config);
-void midgard_model_destroy(void *h);
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
-u8 midgard_model_read_reg(void *h, u32 addr,
-							u32 * const value);
 void midgard_set_error(int job_slot);
 int job_atom_inject_error(struct kbase_error_params *params);
 int gpu_model_control(void *h,
@@ -211,17 +219,6 @@ void gpu_model_prfcnt_dump_request(uint32_t *sample_buf, struct gpu_model_prfcnt
 void gpu_model_glb_request_job_irq(void *model);
 #endif /* MALI_USE_CSF */
 
-enum gpu_dummy_irq {
-	GPU_DUMMY_JOB_IRQ,
-	GPU_DUMMY_GPU_IRQ,
-	GPU_DUMMY_MMU_IRQ
-};
-
-void gpu_device_raise_irq(void *model,
-						enum gpu_dummy_irq irq);
-void gpu_device_set_data(void *model, void *data);
-void *gpu_device_get_data(void *model);
-
 extern struct error_status_t hw_error_status;
 
 #endif
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
index 972d1c87fb1a..75b1e7e656c0 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_error_generator.c
@@ -21,7 +21,7 @@
 
 #include <mali_kbase.h>
 #include <linux/random.h>
-#include "backend/gpu/mali_kbase_model_dummy.h"
+#include "backend/gpu/mali_kbase_model_linux.h"
 
 static struct kbase_error_atom *error_track_list;
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
index 7887cb240d43..b37680ddb29b 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.c
@@ -20,12 +20,12 @@
  */
 
 /*
- * Model interface
+ * Model Linux Framework interfaces.
  */
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+
 #include "backend/gpu/mali_kbase_model_linux.h"
 #include "device/mali_kbase_device.h"
 #include "mali_kbase_irq_internal.h"
@@ -105,8 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
 	kmem_cache_free(kbdev->irq_slab, data);
 }
 
-void gpu_device_raise_irq(void *model,
-				enum gpu_dummy_irq irq)
+void gpu_device_raise_irq(void *model, enum model_linux_irqs irq)
 {
 	struct model_irq_data *data;
 	struct kbase_device *kbdev = gpu_device_get_data(model);
@@ -120,15 +119,15 @@ void gpu_device_raise_irq(void *model,
 	data->kbdev = kbdev;
 
 	switch (irq) {
-	case GPU_DUMMY_JOB_IRQ:
+	case MODEL_LINUX_JOB_IRQ:
 		INIT_WORK(&data->work, serve_job_irq);
 		atomic_set(&kbdev->serving_job_irq, 1);
 		break;
-	case GPU_DUMMY_GPU_IRQ:
+	case MODEL_LINUX_GPU_IRQ:
 		INIT_WORK(&data->work, serve_gpu_irq);
 		atomic_set(&kbdev->serving_gpu_irq, 1);
 		break;
-	case GPU_DUMMY_MMU_IRQ:
+	case MODEL_LINUX_MMU_IRQ:
 		INIT_WORK(&data->work, serve_mmu_irq);
 		atomic_set(&kbdev->serving_mmu_irq, 1);
 		break;
@@ -165,22 +164,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
 
 	return val;
 }
-
 KBASE_EXPORT_TEST_API(kbase_reg_read);
 
-/**
- * kbase_is_gpu_removed - Has the GPU been removed.
- * @kbdev:    Kbase device pointer
- *
- * This function would return true if the GPU has been removed.
- * It is stubbed here
- * Return: Always false
- */
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
-	return false;
-}
-
 int kbase_install_interrupts(struct kbase_device *kbdev)
 {
 	KBASE_DEBUG_ASSERT(kbdev);
@@ -239,16 +224,12 @@ KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler);
 
 int kbase_gpu_device_create(struct kbase_device *kbdev)
 {
-	kbdev->model = midgard_model_create(NULL);
+	kbdev->model = midgard_model_create(kbdev);
 	if (kbdev->model == NULL)
 		return -ENOMEM;
 
-	gpu_device_set_data(kbdev->model, kbdev);
-
 	spin_lock_init(&kbdev->reg_op_lock);
 
-	dev_warn(kbdev->dev, "Using Dummy Model");
-
 	return 0;
 }
 
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
index dcb2e7cf7c70..a1c480eaf49d 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,13 +20,132 @@
  */
 
 /*
- * Model interface
+ * Model Linux Framework interfaces.
+ *
+ * This framework is used to provide generic Kbase Models interfaces.
+ * Note: Backends cannot be used together; the selection is done at build time.
+ *
+ * - Without Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ        |
+ * +-----------------------------+
+ * | HW interface definitions    |
+ * +-----------------------------+
+ *
+ * - With Model Linux Framework:
+ * +-----------------------------+
+ * | Kbase read/write/IRQ        |
+ * +-----------------------------+
+ * | Model Linux Framework       |
+ * +-----------------------------+
+ * | Model interface definitions |
+ * +-----------------------------+
  */
 
 #ifndef _KBASE_MODEL_LINUX_H_
 #define _KBASE_MODEL_LINUX_H_
 
+/*
+ * Include Model definitions
+ */
+
+#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
+
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+/**
+ * kbase_gpu_device_create() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_create()
+ *
+ * Return: 0 on success, error code otherwise.
+ */
 int kbase_gpu_device_create(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_device_destroy() - Generic create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * Specific model hook is implemented by midgard_model_destroy()
+ */
 void kbase_gpu_device_destroy(struct kbase_device *kbdev);
 
-#endif				/* _KBASE_MODEL_LINUX_H_ */
+/**
+ * midgard_model_create() - Private create function.
+ *
+ * @kbdev: Kbase device.
+ *
+ * This hook is specific to the model built in Kbase.
+ *
+ * Return: Model handle.
+ */
+void *midgard_model_create(struct kbase_device *kbdev);
+
+/**
+ * midgard_model_destroy() - Private destroy function.
+ *
+ * @h: Model handle.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_destroy(void *h);
+
+/**
+ * midgard_model_write_reg() - Private model write function.
+ *
+ * @h: Model handle.
+ * @addr: Address at which to write.
+ * @value: value to write.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_write_reg(void *h, u32 addr, u32 value);
+
+/**
+ * midgard_model_read_reg() - Private model read function.
+ *
+ * @h: Model handle.
+ * @addr: Address from which to read.
+ * @value: Pointer where to store the read value.
+ *
+ * This hook is specific to the model built in Kbase.
+ */
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
+
+/**
+ * gpu_device_raise_irq() - Private IRQ raise function.
+ *
+ * @model: Model handle.
+ * @irq: IRQ type to raise.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_raise_irq(void *model, enum model_linux_irqs irq);
+
+/**
+ * gpu_device_set_data() - Private model set data function.
+ *
+ * @model: Model handle.
+ * @data: Data carried by model.
+ *
+ * This hook is global to the model Linux framework.
+ */
+void gpu_device_set_data(void *model, void *data);
+
+/**
+ * gpu_device_get_data() - Private model get data function.
+ *
+ * @model: Model handle.
+ *
+ * This hook is global to the model Linux framework.
+ *
+ * Return: Pointer to the data carried by model.
+ */
+void *gpu_device_get_data(void *model);
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+
+#endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
index 9e38b904b459..e2b0a919282e 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_ca.c
@@ -26,9 +26,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
 #include <mali_kbase_dummy_job_wa.h>
 
 int kbase_pm_ca_init(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
index 8173cf6ba7d7..d86a388c64fb 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_driver.c
@@ -804,6 +804,17 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 						KBASE_MCU_HCTL_SHADERS_PEND_ON;
 				} else
 					backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+				if (kbase_debug_coresight_csf_state_check(
+					    kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+					kbase_debug_coresight_csf_state_request(
+						kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+					backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+				} else if (kbase_debug_coresight_csf_state_check(
+						   kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+					backend->mcu_state = KBASE_MCU_CORESIGHT_ENABLE;
+				}
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 			}
 			break;
 
@@ -832,8 +843,7 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 				unsigned long flags;
 
 				kbase_csf_scheduler_spin_lock(kbdev, &flags);
-				kbase_hwcnt_context_enable(
-					kbdev->hwcnt_gpu_ctx);
+				kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 				kbase_csf_scheduler_spin_unlock(kbdev, flags);
 				backend->hwcnt_disabled = false;
 			}
@@ -854,9 +864,19 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 					backend->mcu_state =
 						KBASE_MCU_HCTL_MCU_ON_RECHECK;
 				}
-			} else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) {
+			} else if (kbase_pm_handle_mcu_core_attr_update(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+			else if (kbdev->csf.coresight.disable_on_pmode_enter) {
+				kbase_debug_coresight_csf_state_request(
+					kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+				backend->mcu_state = KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE;
+			} else if (kbdev->csf.coresight.enable_on_pmode_exit) {
+				kbase_debug_coresight_csf_state_request(
+					kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+				backend->mcu_state = KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE;
 			}
+#endif
 			break;
 
 		case KBASE_MCU_HCTL_MCU_ON_RECHECK:
@@ -947,12 +967,46 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 #ifdef KBASE_PM_RUNTIME
 				if (backend->gpu_sleep_mode_active)
 					backend->mcu_state = KBASE_MCU_ON_SLEEP_INITIATE;
-				else
+				else {
 #endif
 					backend->mcu_state = KBASE_MCU_ON_HALT;
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+					kbase_debug_coresight_csf_state_request(
+						kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED);
+					backend->mcu_state = KBASE_MCU_CORESIGHT_DISABLE;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+				}
 			}
 			break;
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+		case KBASE_MCU_ON_PMODE_ENTER_CORESIGHT_DISABLE:
+			if (kbase_debug_coresight_csf_state_check(
+				    kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED)) {
+				backend->mcu_state = KBASE_MCU_ON;
+				kbdev->csf.coresight.disable_on_pmode_enter = false;
+			}
+			break;
+		case KBASE_MCU_ON_PMODE_EXIT_CORESIGHT_ENABLE:
+			if (kbase_debug_coresight_csf_state_check(
+				    kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED)) {
+				backend->mcu_state = KBASE_MCU_ON;
+				kbdev->csf.coresight.enable_on_pmode_exit = false;
+			}
+			break;
+		case KBASE_MCU_CORESIGHT_DISABLE:
+			if (kbase_debug_coresight_csf_state_check(
+				    kbdev, KBASE_DEBUG_CORESIGHT_CSF_DISABLED))
+				backend->mcu_state = KBASE_MCU_ON_HALT;
+			break;
+
+		case KBASE_MCU_CORESIGHT_ENABLE:
+			if (kbase_debug_coresight_csf_state_check(
+				    kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+			break;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 		case KBASE_MCU_ON_HALT:
 			if (!kbase_pm_is_mcu_desired(kbdev)) {
 				kbase_csf_firmware_trigger_mcu_halt(kbdev);
@@ -1045,6 +1099,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 			/* Reset complete  */
 			if (!backend->in_reset)
 				backend->mcu_state = KBASE_MCU_OFF;
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+			kbdev->csf.coresight.disable_on_pmode_enter = false;
+			kbdev->csf.coresight.enable_on_pmode_exit = false;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 			break;
 
 		default:
@@ -1142,13 +1201,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
 #if MALI_USE_CSF
 	/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
 	 * whilst MMU command is in progress.
+	 * Also defer the power-down if MMU is in process of page migration.
 	 */
-	return !kbdev->mmu_hw_operation_in_progress;
+	return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress;
 #else
-	return true;
+	return !kbdev->mmu_page_migrate_in_progress;
 #endif
 }
 
+static bool can_power_up_l2(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Avoiding l2 transition if MMU is undergoing page migration */
+	return !kbdev->mmu_page_migrate_in_progress;
+}
+
 static bool need_tiler_control(struct kbase_device *kbdev)
 {
 #if MALI_USE_CSF
@@ -1220,7 +1288,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 
 		switch (backend->l2_state) {
 		case KBASE_L2_OFF:
-			if (kbase_pm_is_l2_desired(kbdev)) {
+			if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) {
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
 				/* Enable HW timer of IPA control before
 				 * L2 cache is powered-up.
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
index 115cd3c34d90..e66ce57d3120 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_internal.h
@@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd
 }
 #endif
 
+/**
+ * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Check whether the L2 state is in power transition phase or not. If it is, the MMU
+ * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU
+ * page migration is intended, immediately start the MMU migration action without
+ * dropping the lock. When page migration begins, a flag is set in kbdev that would
+ * prevent the L2 state machine traversing into power transition phases, until
+ * the MMU migration action ends.
+ *
+ * Return: true if MMU page migration is allowed
+ */
+static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF);
+}
+
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
index 5e57c9d0c0e1..3b448e397e72 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,6 +66,13 @@
  *                                      is being put to sleep.
  * @ON_PEND_SLEEP:                      MCU sleep is in progress.
  * @IN_SLEEP:                           Sleep request is completed and MCU has halted.
+ * @ON_PMODE_ENTER_CORESIGHT_DISABLE:   The MCU is on, protected mode enter is about to
+ *                                      be requested, Coresight is being disabled.
+ * @ON_PMODE_EXIT_CORESIGHT_ENABLE :    The MCU is on, protected mode exit has happened
+ *                                      Coresight is being enabled.
+ * @CORESIGHT_DISABLE:                  The MCU is on and Coresight is being disabled.
+ * @CORESIGHT_ENABLE:                   The MCU is on, host does not have control and
+ *                                      Coresight is being enabled.
  */
 KBASEP_MCU_STATE(OFF)
 KBASEP_MCU_STATE(PEND_ON_RELOAD)
@@ -92,3 +99,10 @@ KBASEP_MCU_STATE(HCTL_SHADERS_CORE_OFF_PEND)
 KBASEP_MCU_STATE(ON_SLEEP_INITIATE)
 KBASEP_MCU_STATE(ON_PEND_SLEEP)
 KBASEP_MCU_STATE(IN_SLEEP)
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/* Additional MCU states for Coresight */
+KBASEP_MCU_STATE(ON_PMODE_ENTER_CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(ON_PMODE_EXIT_CORESIGHT_ENABLE)
+KBASEP_MCU_STATE(CORESIGHT_DISABLE)
+KBASEP_MCU_STATE(CORESIGHT_ENABLE)
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
diff --git a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
index 29e945d00fbe..865f526f61f2 100644
--- a/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/bifrost/backend/gpu/mali_kbase_pm_metrics.c
@@ -464,7 +464,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev)
  */
 static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
 
diff --git a/drivers/gpu/arm/bifrost/build.bp b/drivers/gpu/arm/bifrost/build.bp
index a17ff432398c..48c1fb44f494 100644
--- a/drivers/gpu/arm/bifrost/build.bp
+++ b/drivers/gpu/arm/bifrost/build.bp
@@ -28,7 +28,7 @@ bob_defaults {
     defaults: [
         "kernel_defaults",
     ],
-    no_mali: {
+    mali_no_mali: {
         kbuild_options: [
             "CONFIG_MALI_BIFROST_NO_MALI=y",
             "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
@@ -140,6 +140,9 @@ bob_defaults {
     mali_fw_core_dump: {
         kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
     },
+    mali_coresight: {
+        kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
+    },
     kbuild_options: [
         "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
         "MALI_CUSTOMER_RELEASE={{.release}}",
diff --git a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
index 3abc7a2a66f4..07d277b947d2 100644
--- a/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
+++ b/drivers/gpu/arm/bifrost/context/backend/mali_kbase_context_csf.c
@@ -34,6 +34,7 @@
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 #include <csf/mali_kbase_csf_csg_debugfs.h>
 #include <csf/mali_kbase_csf_kcpu_debugfs.h>
+#include <csf/mali_kbase_csf_sync_debugfs.h>
 #include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
 #include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
 #include <mali_kbase_debug_mem_view.h>
@@ -50,6 +51,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
 	kbase_jit_debugfs_init(kctx);
 	kbase_csf_queue_group_debugfs_init(kctx);
 	kbase_csf_kcpu_debugfs_init(kctx);
+	kbase_csf_sync_debugfs_init(kctx);
 	kbase_csf_tiler_heap_debugfs_init(kctx);
 	kbase_csf_tiler_heap_total_debugfs_init(kctx);
 	kbase_csf_cpu_queue_debugfs_init(kctx);
diff --git a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
index b6abfc44d212..792f724f16e4 100644
--- a/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bifrost/context/mali_kbase_context.c
@@ -129,10 +129,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	kctx->as_nr = KBASEP_AS_NR_INVALID;
-
-	atomic_set(&kctx->refcount, 0);
-
 	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
@@ -251,15 +247,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
 
 void kbase_context_common_term(struct kbase_context *kctx)
 {
-	unsigned long flags;
 	int pages;
 
-	mutex_lock(&kctx->kbdev->mmu_hw_mutex);
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
-	kbase_ctx_sched_remove_ctx(kctx);
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kctx->kbdev->mmu_hw_mutex);
-
 	pages = atomic_read(&kctx->used_pages);
 	if (pages != 0)
 		dev_warn(kctx->kbdev->dev,
diff --git a/drivers/gpu/arm/bifrost/csf/Kbuild b/drivers/gpu/arm/bifrost/csf/Kbuild
index 1474bdaacb0d..44217dba10c0 100644
--- a/drivers/gpu/arm/bifrost/csf/Kbuild
+++ b/drivers/gpu/arm/bifrost/csf/Kbuild
@@ -31,20 +31,24 @@ bifrost_kbase-y += \
     csf/mali_kbase_csf_reset_gpu.o \
     csf/mali_kbase_csf_csg_debugfs.o \
     csf/mali_kbase_csf_kcpu_debugfs.o \
+    csf/mali_kbase_csf_sync_debugfs.o \
     csf/mali_kbase_csf_protected_memory.o \
     csf/mali_kbase_csf_tiler_heap_debugfs.o \
     csf/mali_kbase_csf_cpu_queue_debugfs.o \
     csf/mali_kbase_csf_event.o \
     csf/mali_kbase_csf_firmware_log.o \
-    csf/mali_kbase_csf_tiler_heap_reclaim.o
+    csf/mali_kbase_csf_firmware_core_dump.o \
+    csf/mali_kbase_csf_tiler_heap_reclaim.o \
+    csf/mali_kbase_csf_mcu_shared_reg.o
 
-bifrost_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
-
-bifrost_kbase-$(CONFIG_MALI_BIFROST_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+ifeq ($(CONFIG_MALI_BIFROST_NO_MALI),y)
+bifrost_kbase-y += csf/mali_kbase_csf_firmware_no_mali.o
+else
+bifrost_kbase-y += csf/mali_kbase_csf_firmware.o
+endif
 
 bifrost_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
 
-
 ifeq ($(KBUILD_EXTMOD),)
 # in-tree
     -include $(src)/csf/ipa_control/Kbuild
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
index b77007300c5c..5f4061b2ab62 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,12 +34,15 @@
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <mali_kbase_hwaccess_time.h>
 #include "mali_kbase_csf_event.h"
-#include <linux/protected_memory_allocator.h>
+#include <tl/mali_kbase_tracepoints.h>
+#include "mali_kbase_csf_mcu_shared_reg.h"
 
 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
 #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 
+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
+
 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
 	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
 	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
@@ -130,21 +133,6 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
 	return 0;
 }
 
-static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
-				     struct tagged_addr *phys)
-{
-	size_t num_pages = 2;
-
-	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
-				 num_pages, MCU_AS_NR);
-
-	WARN_ON(reg->flags & KBASE_REG_FREE);
-
-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	kbase_remove_va_region(kctx->kbdev, reg);
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
-}
-
 static void init_user_io_pages(struct kbase_queue *queue)
 {
 	u32 *input_addr = (u32 *)(queue->user_io_addr);
@@ -162,76 +150,15 @@ static void init_user_io_pages(struct kbase_queue *queue)
 	output_addr[CS_ACTIVE/4] = 0;
 }
 
-/* Map the input/output pages in the shared interface segment of MCU firmware
- * address space.
- */
-static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
-		struct tagged_addr *phys, struct kbase_va_region *reg)
-{
-	unsigned long mem_flags = KBASE_REG_GPU_RD;
-	const size_t num_pages = 2;
-	int ret;
-
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
-	if (kbdev->system_coherency == COHERENCY_NONE) {
-		mem_flags |=
-			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
-	} else {
-		mem_flags |= KBASE_REG_SHARE_BOTH |
-			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
-	}
-
-	mutex_lock(&kbdev->csf.reg_lock);
-	ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
-	reg->flags &= ~KBASE_REG_FREE;
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-	if (ret)
-		return ret;
-
-	/* Map input page */
-	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
-				     &phys[0], 1, mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_IO, mmu_sync_info);
-	if (ret)
-		goto bad_insert;
-
-	/* Map output page, it needs rw access */
-	mem_flags |= KBASE_REG_GPU_WR;
-	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
-				     reg->start_pfn + 1, &phys[1], 1, mem_flags,
-				     MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO,
-				     mmu_sync_info);
-	if (ret)
-		goto bad_insert_output_page;
-
-	return 0;
-
-bad_insert_output_page:
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR);
-bad_insert:
-	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(kbdev, reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-	return ret;
-}
-
 static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
 			struct kbase_queue *queue)
 {
-	const size_t num_pages = 2;
-
 	kbase_gpu_vm_lock(kctx);
 
 	vunmap(queue->user_io_addr);
 
-	WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
-	atomic_sub(num_pages, &kctx->permanent_mapped_pages);
+	WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
+	atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
 
 	kbase_gpu_vm_unlock(kctx);
 }
@@ -312,63 +239,56 @@ static void release_queue(struct kbase_queue *queue);
  * If an explicit or implicit unbind was missed by the userspace then the
  * mapping will persist. On process exit kernel itself will remove the mapping.
  */
-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
-		struct kbase_queue *queue)
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
 {
-	const size_t num_pages = 2;
-
-	gpu_munmap_user_io_pages(kctx, queue->reg, &queue->phys[0]);
 	kernel_unmap_user_io_pages(kctx, queue);
 
 	kbase_mem_pool_free_pages(
 		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
-		num_pages, queue->phys, true, false);
+		KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
+	kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
 
-	kfree(queue->reg);
-	queue->reg = NULL;
+	/* The user_io_gpu_va should have been unmapped inside the scheduler */
+	WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
 
 	/* If the queue has already been terminated by userspace
 	 * then the ref count for queue object will drop to 0 here.
 	 */
 	release_queue(queue);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
 
-int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
-			struct kbase_queue *queue)
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_va_region *reg;
-	const size_t num_pages = 2;
 	int ret;
 
 	lockdep_assert_held(&kctx->csf.lock);
 
-	reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
-				      num_pages, KBASE_REG_ZONE_MCU_SHARED);
-	if (!reg)
-		return -ENOMEM;
-
-	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages,
+	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+					 KBASEP_NUM_CS_USER_IO_PAGES,
 					 queue->phys, false);
-
-	if (ret != num_pages)
-		goto phys_alloc_failed;
+	if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
+		/* Marking both the phys to zero for indicating there is no phys allocated */
+		queue->phys[0].tagged_addr = 0;
+		queue->phys[1].tagged_addr = 0;
+		return -ENOMEM;
+	}
 
 	ret = kernel_map_user_io_pages(kctx, queue);
 	if (ret)
 		goto kernel_map_failed;
 
+	kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
 	init_user_io_pages(queue);
 
-	ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
-	if (ret)
-		goto gpu_mmap_failed;
-
-	queue->reg = reg;
+	/* user_io_gpu_va is only mapped when scheduler decides to put the queue
+	 * on slot at runtime. Initialize it to 0, signalling no mapping.
+	 */
+	queue->user_io_gpu_va = 0;
 
 	mutex_lock(&kbdev->csf.reg_lock);
-	if (kbdev->csf.db_file_offsets >
-			(U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
+	if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
 		kbdev->csf.db_file_offsets = 0;
 
 	queue->db_file_offset = kbdev->csf.db_file_offsets;
@@ -388,19 +308,16 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 
 	return 0;
 
-gpu_mmap_failed:
-	kernel_unmap_user_io_pages(kctx, queue);
-
 kernel_map_failed:
-	kbase_mem_pool_free_pages(
-		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
-		num_pages, queue->phys, false, false);
+	kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+				  KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
+	/* Marking both the phys to zero for indicating there is no phys allocated */
+	queue->phys[0].tagged_addr = 0;
+	queue->phys[1].tagged_addr = 0;
 
-phys_alloc_failed:
-	kfree(reg);
-
-	return -ENOMEM;
+	return ret;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
 
 static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
 	u8 group_handle)
@@ -418,6 +335,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
 	return NULL;
 }
 
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
+{
+	return find_queue_group(kctx, group_handle);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
+
 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
 	u8 group_handle)
 {
@@ -468,6 +391,17 @@ static void release_queue(struct kbase_queue *queue)
 			"Remove any pending command queue fatal from ctx %d_%d",
 			queue->kctx->tgid, queue->kctx->id);
 		kbase_csf_event_remove_error(queue->kctx, &queue->error);
+
+		/* After this the Userspace would be able to free the
+		 * memory for GPU queue. In case the Userspace missed
+		 * terminating the queue, the cleanup will happen on
+		 * context termination where tear down of region tracker
+		 * would free up the GPU queue memory.
+		 */
+		kbase_gpu_vm_lock(queue->kctx);
+		kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+		kbase_gpu_vm_unlock(queue->kctx);
+
 		kfree(queue);
 	}
 }
@@ -521,7 +455,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
 								    queue_addr);
 
-	if (kbase_is_region_invalid_or_free(region)) {
+	if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
+	    region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
 		ret = -ENOENT;
 		goto out_unlock_vm;
 	}
@@ -570,7 +505,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->kctx = kctx;
 	queue->base_addr = queue_addr;
-	queue->queue_reg = region;
+	queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
 	queue->size = (queue_size << PAGE_SHIFT);
 	queue->csi_index = KBASEP_IF_NR_INVALID;
 	queue->enabled = false;
@@ -608,7 +543,6 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->extract_ofs = 0;
 
-	region->flags |= KBASE_REG_NO_USER_FREE;
 	region->user_data = queue;
 
 	/* Initialize the cs_trace configuration parameters, When buffer_size
@@ -702,16 +636,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 		unbind_queue(kctx, queue);
 
 		kbase_gpu_vm_lock(kctx);
-		if (!WARN_ON(!queue->queue_reg)) {
-			/* After this the Userspace would be able to free the
-			 * memory for GPU queue. In case the Userspace missed
-			 * terminating the queue, the cleanup will happen on
-			 * context termination where tear down of region tracker
-			 * would free up the GPU queue memory.
-			 */
-			queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		if (!WARN_ON(!queue->queue_reg))
 			queue->queue_reg->user_data = NULL;
-		}
 		kbase_gpu_vm_unlock(kctx);
 
 		release_queue(queue);
@@ -875,6 +801,15 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 	if (WARN_ON(slot_bitmap > allowed_bitmap))
 		return;
 
+	/* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
+	 * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
+	 * or 2 CSI requests overlap and FW ends up missing the 2nd request.
+	 * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
+	 *
+	 * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
+	 */
+	dmb(osh);
+
 	value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
 	value ^= slot_bitmap;
 	kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value,
@@ -913,6 +848,14 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
 	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
+	/* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
+	 * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
+	 * FW before CS_REQ/ACK is set.
+	 *
+	 * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
+	 */
+	dmb(osh);
+
 	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
 	value ^= (1 << csi_index);
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
@@ -930,6 +873,8 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 	struct kbase_va_region *region;
 	int err = 0;
 
+	KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
+
 	/* GPU work submission happening asynchronously to prevent the contention with
 	 * scheduler lock and as the result blocking application thread. For this reason,
 	 * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
@@ -1018,6 +963,15 @@ static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
 	}
 }
 
+static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
+{
+	/* The queue's phys are zeroed when allocation fails. Both of them being
+	 * zero is an impossible condition for a successful allocated set of phy pages.
+	 */
+
+	return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
+}
+
 void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
 {
 	struct kbase_context *kctx = queue->kctx;
@@ -1043,8 +997,8 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
 		unbind_queue(kctx, queue);
 	}
 
-	/* Free the resources, if allocated for this queue. */
-	if (queue->reg)
+	/* Free the resources, if allocated phys for this queue */
+	if (kbase_csf_queue_phys_allocated(queue))
 		kbase_csf_free_command_stream_user_pages(kctx, queue);
 }
 
@@ -1057,8 +1011,8 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
 	WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
 	unbind_stopped_queue(kctx, queue);
 
-	/* Free the resources, if allocated for this queue. */
-	if (queue->reg)
+	/* Free the resources, if allocated phys for this queue */
+	if (kbase_csf_queue_phys_allocated(queue))
 		kbase_csf_free_command_stream_user_pages(kctx, queue);
 }
 
@@ -1121,167 +1075,39 @@ static bool iface_has_enough_streams(struct kbase_device *const kbdev,
  * @kctx:	Pointer to kbase context where the queue group is created at
  * @s_buf:	Pointer to suspend buffer that is attached to queue group
  *
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- *         MMU page table. Otherwise -ENOMEM.
+ * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
+ *	   Otherwise -ENOMEM or error code.
  */
 static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 		struct kbase_normal_suspend_buffer *s_buf)
 {
-	struct kbase_va_region *reg = NULL;
-	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
 	const size_t nr_pages =
 		PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
-	int err = 0;
-
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	int err;
 
 	lockdep_assert_held(&kctx->csf.lock);
 
-	/* Allocate and initialize Region Object */
-	reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
-			nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
-	if (!reg)
-		return -ENOMEM;
+	/* The suspend buffer's mapping address is valid only when the CSG is to
+	 * run on slot, initializing it 0, signalling the buffer is not mapped.
+	 */
+	s_buf->gpu_va = 0;
 
 	s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
 
-	if (!s_buf->phy) {
-		err = -ENOMEM;
-		goto phy_alloc_failed;
-	}
+	if (!s_buf->phy)
+		return -ENOMEM;
 
 	/* Get physical page for a normal suspend buffer */
 	err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
 					 &s_buf->phy[0], false);
 
-	if (err < 0)
-		goto phy_pages_alloc_failed;
-
-	/* Insert Region Object into rbtree and make virtual address available
-	 * to map it to physical page
-	 */
-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
-	reg->flags &= ~KBASE_REG_FREE;
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-	if (err)
-		goto add_va_region_failed;
-
-	/* Update MMU table */
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
-				     reg->start_pfn, &s_buf->phy[0], nr_pages,
-				     mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
-	if (err)
-		goto mmu_insert_failed;
-
-	s_buf->reg = reg;
-
-	return 0;
-
-mmu_insert_failed:
-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	kbase_remove_va_region(kctx->kbdev, reg);
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-add_va_region_failed:
-	kbase_mem_pool_free_pages(
-		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
-		&s_buf->phy[0], false, false);
-
-phy_pages_alloc_failed:
-	kfree(s_buf->phy);
-phy_alloc_failed:
-	kfree(reg);
-
-	return err;
-}
-
-/**
- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
- *					per queue group
- *
- * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to suspend buffer that is attached to queue group
- *
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- *         MMU page table. Otherwise -ENOMEM.
- */
-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
-		struct kbase_protected_suspend_buffer *s_buf)
-{
-	struct kbase_va_region *reg = NULL;
-	struct tagged_addr *phys = NULL;
-	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
-	const size_t nr_pages =
-		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
-	int err = 0;
-
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
-	/* Allocate and initialize Region Object */
-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			nr_pages, KBASE_REG_ZONE_MCU_SHARED);
-
-	if (!reg)
-		return -ENOMEM;
-
-	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
-	if (!phys) {
-		err = -ENOMEM;
-		goto phy_alloc_failed;
+	if (err < 0) {
+		kfree(s_buf->phy);
+		return err;
 	}
 
-	s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
-			nr_pages, true);
-	if (s_buf->pma == NULL) {
-		err = -ENOMEM;
-		goto pma_alloc_failed;
-	}
-
-	/* Insert Region Object into rbtree and make virtual address available
-	 * to map it to physical page
-	 */
-	mutex_lock(&kbdev->csf.reg_lock);
-	err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
-	reg->flags &= ~KBASE_REG_FREE;
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-	if (err)
-		goto add_va_region_failed;
-
-	/* Update MMU table */
-	err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
-				     phys, nr_pages, mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
-	if (err)
-		goto mmu_insert_failed;
-
-	s_buf->reg = reg;
-	kfree(phys);
+	kbase_process_page_usage_inc(kctx, nr_pages);
 	return 0;
-
-mmu_insert_failed:
-	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(kbdev, reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-add_va_region_failed:
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
-pma_alloc_failed:
-	kfree(phys);
-phy_alloc_failed:
-	kfree(reg);
-
-	return err;
 }
 
 static void timer_event_worker(struct work_struct *data);
@@ -1302,26 +1128,17 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
 static int create_suspend_buffers(struct kbase_context *const kctx,
 		struct kbase_queue_group * const group)
 {
-	int err = 0;
-
 	if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
 		dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
 		return -ENOMEM;
 	}
 
-	if (kctx->kbdev->csf.pma_dev) {
-		err = create_protected_suspend_buffer(kctx->kbdev,
-				&group->protected_suspend_buf);
-		if (err) {
-			term_normal_suspend_buffer(kctx,
-					&group->normal_suspend_buf);
-			dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
-		}
-	} else {
-		group->protected_suspend_buf.reg = NULL;
-	}
+	/* Protected suspend buffer, runtime binding so just initialize it */
+	group->protected_suspend_buf.gpu_va = 0;
+	group->protected_suspend_buf.pma = NULL;
+	group->protected_suspend_buf.alloc_retries = 0;
 
-	return err;
+	return 0;
 }
 
 /**
@@ -1387,6 +1204,9 @@ static int create_queue_group(struct kbase_context *const kctx,
 			group->cs_unrecoverable = false;
 			group->reevaluate_idle_status = false;
 
+			group->csg_reg = NULL;
+			group->csg_reg_bind_retries = 0;
+
 			group->dvs_buf = create->in.dvs_buf;
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -1518,65 +1338,39 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
  * @s_buf:	Pointer to queue group suspend buffer to be freed
  */
 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
-		struct kbase_normal_suspend_buffer *s_buf)
+				       struct kbase_normal_suspend_buffer *s_buf)
 {
-	const size_t nr_pages =
-		PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+	const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
 
 	lockdep_assert_held(&kctx->csf.lock);
 
-	WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
-					 s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR));
+	/* The group should not have a bind remaining on any suspend buf region */
+	WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
 
-	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	kbase_remove_va_region(kctx->kbdev, s_buf->reg);
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
-
-	kbase_mem_pool_free_pages(
-			&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			nr_pages, &s_buf->phy[0], false, false);
+	kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+				  &s_buf->phy[0], false, false);
+	kbase_process_page_usage_dec(kctx, nr_pages);
 
 	kfree(s_buf->phy);
 	s_buf->phy = NULL;
-	kfree(s_buf->reg);
-	s_buf->reg = NULL;
 }
 
 /**
- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
  *					queue group
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to queue group suspend buffer to be freed
+ * @sbuf: Pointer to queue group suspend buffer to be freed
  */
 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
-		struct kbase_protected_suspend_buffer *s_buf)
+					  struct kbase_protected_suspend_buffer *sbuf)
 {
-	const size_t nr_pages =
-		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
-	struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
-	size_t i = 0;
-
-	for (i = 0; phys && i < nr_pages; i++)
-		phys[i] = as_tagged(s_buf->pma[i]->pa);
-
-	WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys,
-					 nr_pages, MCU_AS_NR));
-
-	kfree(phys);
-
-	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
-
-	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(kbdev, s_buf->reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
-	s_buf->pma = NULL;
-	kfree(s_buf->reg);
-	s_buf->reg = NULL;
+	WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
+	if (sbuf->pma) {
+		const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+		kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
+		sbuf->pma = NULL;
+	}
 }
 
 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
@@ -1743,6 +1537,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 
 	kfree(group);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
 
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 				  struct kbase_suspend_copy_buffer *sus_buf,
@@ -2022,12 +1817,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		 * registered.
 		 */
 #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-		if (atomic_read(&queue->refcount) != 1)
+		WARN_ON(atomic_read(&queue->refcount) != 1);
 #else
-		if (refcount_read(&queue->refcount) != 1)
+		WARN_ON(refcount_read(&queue->refcount) != 1);
 #endif
-			dev_warn(kctx->kbdev->dev,
-				 "Releasing queue with incorrect refcounting!\n");
 		list_del_init(&queue->link);
 		release_queue(queue);
 	}
@@ -2378,6 +2171,81 @@ static void handle_progress_timer_event(struct kbase_queue_group *const group)
 	queue_work(group->kctx->csf.wq, &group->timer_event_work);
 }
 
+/**
+ * alloc_grp_protected_suspend_buffer_pages() -  Allocate physical pages from the protected
+ *                                               memory for the protected mode suspend buffer.
+ * @group: Pointer to the GPU queue group.
+ *
+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
+ * negative error value.
+ */
+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
+{
+	struct kbase_device *const kbdev = group->kctx->kbdev;
+	struct kbase_context *kctx = group->kctx;
+	struct tagged_addr *phys = NULL;
+	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+	size_t nr_pages;
+	int err = 0;
+
+	if (likely(sbuf->pma))
+		return 0;
+
+	nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
+	if (unlikely(!phys)) {
+		err = -ENOMEM;
+		goto phys_free;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+	kbase_csf_scheduler_lock(kbdev);
+
+	if (unlikely(!group->csg_reg)) {
+		/* The only chance of the bound csg_reg is removed from the group is
+		 * that it has been put off slot by the scheduler and the csg_reg resource
+		 * is contended by other groups. In this case, it needs another occasion for
+		 * mapping the pma, which needs a bound csg_reg. Since the group is already
+		 * off-slot, returning no error is harmless as the scheduler, when place the
+		 * group back on-slot again would do the required MMU map operation on the
+		 * allocated and retained pma.
+		 */
+		WARN_ON(group->csg_nr >= 0);
+		dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
+			group->kctx->tgid, group->kctx->id, group->handle);
+		goto unlock;
+	}
+
+	/* Allocate the protected mode pages */
+	sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
+	if (unlikely(!sbuf->pma)) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Map the bound susp_reg to the just allocated pma pages */
+	err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+
+unlock:
+	kbase_csf_scheduler_unlock(kbdev);
+	mutex_unlock(&kctx->csf.lock);
+phys_free:
+	kfree(phys);
+	return err;
+}
+
+static void report_group_fatal_error(struct kbase_queue_group *const group)
+{
+	struct base_gpu_queue_group_error const
+		err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+				.payload = { .fatal_group = {
+						     .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
+					     } } };
+
+	kbase_csf_add_group_fatal_error(group, &err_payload);
+	kbase_event_wakeup(group->kctx);
+}
+
 /**
  * protm_event_worker - Protected mode switch request event handler
  *			called from a workqueue.
@@ -2390,10 +2258,26 @@ static void protm_event_worker(struct work_struct *data)
 {
 	struct kbase_queue_group *const group =
 		container_of(data, struct kbase_queue_group, protm_event_work);
+	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+	int err = 0;
 
 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
 				 group, 0u);
-	kbase_csf_scheduler_group_protm_enter(group);
+
+	err = alloc_grp_protected_suspend_buffer_pages(group);
+	if (!err) {
+		kbase_csf_scheduler_group_protm_enter(group);
+	} else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+		sbuf->alloc_retries++;
+		/* try again to allocate pages */
+		queue_work(group->kctx->csf.wq, &group->protm_event_work);
+	} else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+		dev_err(group->kctx->kbdev->dev,
+			"Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+			group->handle, group->kctx->tgid, group->kctx->id);
+		report_group_fatal_error(group);
+	}
+
 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
 				 group, 0u);
 }
@@ -2517,7 +2401,10 @@ static void cs_error_worker(struct work_struct *const data)
 	struct kbase_queue_group *group;
 	u8 group_handle;
 	bool reset_prevented = false;
-	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+	int err;
+
+	kbase_debug_csf_fault_wait_completion(kbdev);
+	err = kbase_reset_gpu_prevent_and_wait(kbdev);
 
 	if (err)
 		dev_warn(
@@ -2526,7 +2413,6 @@ static void cs_error_worker(struct work_struct *const data)
 	else
 		reset_prevented = true;
 
-	kbase_debug_csf_fault_wait_completion(kbdev);
 	mutex_lock(&kctx->csf.lock);
 
 	group = get_bound_queue_group(queue);
@@ -2724,12 +2610,17 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 				get_queue(queue);
 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
 							 group, queue, cs_req ^ cs_ack);
-				if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
+				if (!queue_work(wq, &queue->oom_event_work)) {
 					/* The work item shall not have been
 					 * already queued, there can be only
 					 * one pending OoM event for a
 					 * queue.
 					 */
+					dev_warn(
+						kbdev->dev,
+						"Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
+						queue->csi_index, group->handle, queue->kctx->tgid,
+						queue->kctx->id);
 					release_queue(queue);
 				}
 			}
@@ -2760,6 +2651,9 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 			track->protm_grp = group;
 		}
 
+		if (!group->protected_suspend_buf.pma)
+			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+
 		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
 			clear_bit(group->csg_nr,
 				  scheduler->csg_slots_idle_mask);
@@ -2801,8 +2695,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
 		return;
 
-	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
-
 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
 	req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
 	ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2811,7 +2703,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 
 	/* There may not be any pending CSG/CS interrupts to process */
 	if ((req == ack) && (irqreq == irqack))
-		goto out;
+		return;
 
 	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
 	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
@@ -2832,10 +2724,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	 * slot scheduler spinlock is required.
 	 */
 	if (!group)
-		goto out;
+		return;
 
 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
-		goto out;
+		return;
+
+	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
 
 	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo,
@@ -2897,8 +2791,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 
 	process_cs_interrupts(group, ginfo, irqreq, irqack, track);
 
-out:
-	/* group may still be NULL here */
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
 				 ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
 }
@@ -3058,6 +2950,10 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
 		kbase_ipa_control_protm_exited(kbdev);
 		kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
 	}
+
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 }
 
 static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
index b2677405761f..9fbc932b7905 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,12 +40,15 @@
  */
 #define KBASEP_USER_DB_NR_INVALID ((s8)-1)
 
+/* Number of pages used for GPU command queue's User input & output data */
+#define KBASEP_NUM_CS_USER_IO_PAGES (2)
+
 /* Indicates an invalid value for the scan out sequence number, used to
  * signify there is no group that has protected mode execution pending.
  */
 #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
 
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */
 
 /* Idle hysteresis time can be scaled down when GPU sleep feature is used */
 #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -123,6 +126,25 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			      struct kbase_ioctl_cs_queue_terminate *term);
 
+/**
+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
+ *				    for a queue at the time of bind.
+ *
+ * @kctx:	Address of the kbase context within which the queue was created.
+ * @queue:	Pointer to the queue to be unlinked.
+ *
+ * This function will free the pair of physical pages allocated for a GPU
+ * command queue, and also release the hardware doorbell page, that were mapped
+ * into the process address space to enable direct submission of commands to
+ * the hardware. Also releases the reference taken on the queue when the mapping
+ * was created.
+ *
+ * If an explicit or implicit unbind was missed by the userspace then the
+ * mapping will persist. On process exit kernel itself will remove the mapping.
+ */
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
+					      struct kbase_queue *queue);
+
 /**
  * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
  *                                             GPU command queue.
@@ -185,6 +207,20 @@ void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue);
 int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick);
 
+/**
+ * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding
+ *                                         to the indicated handle.
+ *
+ * @kctx:          The kbase context under which the queue group exists.
+ * @group_handle:  Handle for the group which uniquely identifies it within
+ *                 the context with which it was created.
+ *
+ * This function is used to find the queue group when passed a handle.
+ *
+ * Return: Pointer to a queue group on success, NULL on failure
+ */
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle);
+
 /**
  * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
  *                                         is valid.
@@ -464,4 +500,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
 	return 0;
 #endif
 }
+
 #endif /* _KBASE_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
index 3afbe6d4005e..e96044ae6239 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.c
@@ -100,7 +100,7 @@ static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev,
 	}
 }
 
-static void update_active_groups_status(struct kbase_device *kbdev, struct seq_file *file)
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev)
 {
 	u32 max_csg_slots = kbdev->csf.global_iface.group_num;
 	DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 };
@@ -116,6 +116,8 @@ static void update_active_groups_status(struct kbase_device *kbdev, struct seq_f
 	 * status of all on-slot groups when MCU sleep request is sent to it.
 	 */
 	if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
+		/* Wait for the MCU sleep request to complete. */
+		kbase_pm_wait_for_desired_state(kbdev);
 		bitmap_copy(csg_slots_status_updated,
 			    kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots);
 		return;
@@ -496,23 +498,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file,
 {
 	u32 gr;
 	struct kbase_context *const kctx = file->private;
-	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_device *kbdev;
 
 	if (WARN_ON(!kctx))
 		return -EINVAL;
 
+	kbdev = kctx->kbdev;
+
 	seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n",
 			MALI_CSF_CSG_DEBUGFS_VERSION);
 
 	mutex_lock(&kctx->csf.lock);
 	kbase_csf_scheduler_lock(kbdev);
-	if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
-		/* Wait for the MCU sleep request to complete. Please refer the
-		 * update_active_groups_status() function for the explanation.
-		 */
-		kbase_pm_wait_for_desired_state(kbdev);
-	}
-	update_active_groups_status(kbdev, file);
+	kbase_csf_debugfs_update_active_groups_status(kbdev);
 	for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
 		struct kbase_queue_group *const group =
 			kctx->csf.queue_groups[gr];
@@ -546,13 +544,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file,
 			MALI_CSF_CSG_DEBUGFS_VERSION);
 
 	kbase_csf_scheduler_lock(kbdev);
-	if (kbdev->csf.scheduler.state == SCHED_SLEEPING) {
-		/* Wait for the MCU sleep request to complete. Please refer the
-		 * update_active_groups_status() function for the explanation.
-		 */
-		kbase_pm_wait_for_desired_state(kbdev);
-	}
-	update_active_groups_status(kbdev, file);
+	kbase_csf_debugfs_update_active_groups_status(kbdev);
 	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
 		struct kbase_queue_group *const group =
 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
index 397e657d2cb6..16a548bf8acb 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_csg_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,4 +44,11 @@ void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx);
  */
 void kbase_csf_debugfs_init(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_debugfs_update_active_groups_status() - Update on-slot group statuses
+ *
+ * @kbdev: Pointer to the device
+ */
+void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev);
+
 #endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
index 32a1c557e387..e4a69cb169c3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_defs.h
@@ -33,6 +33,10 @@
 #include "mali_kbase_csf_event.h"
 #include <uapi/gpu/arm/bifrost/csf/mali_kbase_csf_errors_dumpfault.h>
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 /* Maximum number of KCPU command queues to be created per GPU address space.
  */
 #define KBASEP_MAX_KCPU_QUEUES ((size_t)256)
@@ -298,9 +302,9 @@ struct kbase_csf_notification {
  *
  * @kctx:        Pointer to the base context with which this GPU command queue
  *               is associated.
- * @reg:         Pointer to the region allocated from the shared
- *               interface segment for mapping the User mode
- *               input/output pages in MCU firmware address space.
+ * @user_io_gpu_va: The start GPU VA address of this queue's userio pages. Only
+ *                  valid (i.e. not 0 ) when the queue is enabled and its owner
+ *                  group has a runtime bound csg_reg (group region).
  * @phys:        Pointer to the physical pages allocated for the
  *               pair or User mode input/output page
  * @user_io_addr: Pointer to the permanent kernel mapping of User mode
@@ -376,7 +380,7 @@ struct kbase_csf_notification {
  */
 struct kbase_queue {
 	struct kbase_context *kctx;
-	struct kbase_va_region *reg;
+	u64 user_io_gpu_va;
 	struct tagged_addr phys[2];
 	char *user_io_addr;
 	u64 handle;
@@ -421,26 +425,33 @@ struct kbase_queue {
 /**
  * struct kbase_normal_suspend_buffer - Object representing a normal
  *		suspend buffer for queue group.
- * @reg:	Memory region allocated for the normal-mode suspend buffer.
+ * @gpu_va:     The start GPU VA address of the bound suspend buffer. Note, this
+ *              field is only valid when the owner group has a region bound at
+ *              runtime.
  * @phy:	Array of physical memory pages allocated for the normal-
  *		mode suspend buffer.
  */
 struct kbase_normal_suspend_buffer {
-	struct kbase_va_region *reg;
+	u64 gpu_va;
 	struct tagged_addr *phy;
 };
 
 /**
  * struct kbase_protected_suspend_buffer - Object representing a protected
  *		suspend buffer for queue group.
- * @reg:	Memory region allocated for the protected-mode suspend buffer.
+ * @gpu_va:     The start GPU VA address of the bound protected mode suspend buffer.
+ *              Note, this field is only valid when the owner group has a region
+ *              bound at runtime.
  * @pma:	Array of pointer to protected mode allocations containing
  *		information about memory pages allocated for protected mode
  *		suspend	buffer.
+ * @alloc_retries:	Number of times we retried allocing physical pages
+ *			for protected suspend buffers.
  */
 struct kbase_protected_suspend_buffer {
-	struct kbase_va_region *reg;
+	u64 gpu_va;
 	struct protected_memory_allocation **pma;
+	u8 alloc_retries;
 };
 
 /**
@@ -512,6 +523,13 @@ struct kbase_protected_suspend_buffer {
  * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
  *                           that tried to deschedule the group and had to defer
  *                           the descheduling due to the dump on fault.
+ * @csg_reg:     An opaque pointer to the runtime bound shared regions. It is
+ *               dynamically managed by the scheduler and can be NULL if the
+ *               group is off-slot.
+ * @csg_reg_bind_retries: Runtime MCU shared region map operation attempted counts.
+ *                  It is accumulated on consecutive mapping attempt failures. On
+ *                  reaching a preset limit, the group is regarded as suffered
+ *                  a fatal error and triggers a fatal error notification.
  */
 struct kbase_queue_group {
 	struct kbase_context *kctx;
@@ -562,6 +580,8 @@ struct kbase_queue_group {
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	u32 deschedule_deferred_cnt;
 #endif
+	void *csg_reg;
+	u8 csg_reg_bind_retries;
 };
 
 /**
@@ -623,6 +643,8 @@ struct kbase_csf_cpu_queue_context {
  * @lock:     Lock preventing concurrent access to the @in_use bitmap.
  * @in_use:   Bitmap that indicates which heap context structures are currently
  *            allocated (in @region).
+ * @heap_context_size_aligned: Size of a heap context structure, in bytes,
+ *                             aligned to GPU cacheline size.
  *
  * Heap context structures are allocated by the kernel for use by the firmware.
  * The current implementation subdivides a single GPU memory region for use as
@@ -634,6 +656,7 @@ struct kbase_csf_heap_context_allocator {
 	u64 gpu_va;
 	struct mutex lock;
 	DECLARE_BITMAP(in_use, MAX_TILER_HEAPS);
+	u32 heap_context_size_aligned;
 };
 
 /**
@@ -874,6 +897,33 @@ struct kbase_csf_sched_heap_reclaim_mgr {
 	atomic_t unused_pages;
 };
 
+/**
+ * struct kbase_csf_mcu_shared_regions - Control data for managing the MCU shared
+ *                                       interface segment regions for scheduler
+ *                                       operations
+ *
+ * @array_csg_regs:   Base pointer of an internally created array_csg_regs[].
+ * @unused_csg_regs:  List contains unused csg_regs items. When an item is bound to a
+ *                    group that is placed onto on-slot by the scheduler, it is dropped
+ *                    from the list (i.e busy active). The Scheduler will put an active
+ *                    item back when it's becoming off-slot (not in use).
+ * @dummy_phys:       An array of dummy phys[nr_susp_pages] pages for use with normal
+ *                    and pmode suspend buffers, as a default replacement of a CSG's pages
+ *                    for the MMU mapping when the csg_reg is not bound to a group.
+ * @pma_phys:         Pre-allocated array phy[nr_susp_pages] for transitional use with
+ *                    protected suspend buffer MMU map operations.
+ * @userio_mem_rd_flags: Userio input page's read access mapping configuration flags.
+ * @dummy_phys_allocated: Indicating the @p dummy_phy page is allocated when true.
+ */
+struct kbase_csf_mcu_shared_regions {
+	void *array_csg_regs;
+	struct list_head unused_csg_regs;
+	struct tagged_addr *dummy_phys;
+	struct tagged_addr *pma_phys;
+	unsigned long userio_mem_rd_flags;
+	bool dummy_phys_allocated;
+};
+
 /**
  * struct kbase_csf_scheduler - Object representing the scheduler used for
  *                              CSF for an instance of GPU platform device.
@@ -1008,6 +1058,9 @@ struct kbase_csf_sched_heap_reclaim_mgr {
  *                          @interrupt_lock is used to serialize the access.
  * @protm_enter_time:       GPU protected mode enter time.
  * @reclaim_mgr:            CSGs tiler heap manager object.
+ * @mcu_regs_data:          Scheduler MCU shared regions data for managing the
+ *                          shared interface mappings for on-slot queues and
+ *                          CSG suspend buffers.
  */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -1051,6 +1104,7 @@ struct kbase_csf_scheduler {
 	u32 tick_protm_pending_seq;
 	ktime_t protm_enter_time;
 	struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
+	struct kbase_csf_mcu_shared_regions mcu_regs_data;
 };
 
 /*
@@ -1328,6 +1382,24 @@ struct kbase_csf_firmware_log {
 	u32 func_call_list_va_end;
 };
 
+/**
+ * struct kbase_csf_firmware_core_dump - Object containing members for handling
+ *                                       firmware core dump.
+ *
+ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer
+ *                 in Firmware.
+ * @version:       Version of the FW image header core dump data format. Bits
+ *                 7:0 specify version minor and 15:8 specify version major.
+ * @available:     Flag to identify if the FW core dump buffer is available.
+ *                 True if entry is available in the FW image header and version
+ *                 is supported, False otherwise.
+ */
+struct kbase_csf_firmware_core_dump {
+	u32 mcu_regs_addr;
+	u16 version;
+	bool available;
+};
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 /**
  * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
@@ -1458,9 +1530,9 @@ struct kbase_csf_dump_on_fault {
  *                              the glb_pwoff register. This is separated from
  *                              the @p mcu_core_pwroff_dur_count as an update
  *                              to the latter is asynchronous.
- * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
- *                          window in unit of ms. The firmware does not use it
- *                          directly.
+ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of microseconds. The firmware does not 
+ *                          use it directly.
  * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
  *                          interface required format, ready to be used
  *                          directly in the firmware.
@@ -1470,6 +1542,8 @@ struct kbase_csf_dump_on_fault {
  *                          HW counters.
  * @fw:                     Copy of the loaded MCU firmware image.
  * @fw_log:                 Contain members required for handling firmware log.
+ * @fw_core_dump:           Contain members required for handling the firmware
+ *                          core dump.
  * @dof:                    Structure for dump on fault.
  */
 struct kbase_csf_device {
@@ -1507,15 +1581,22 @@ struct kbase_csf_device {
 	u32 mcu_core_pwroff_dur_us;
 	u32 mcu_core_pwroff_dur_count;
 	u32 mcu_core_pwroff_reg_shadow;
-	u32 gpu_idle_hysteresis_ms;
+	u32 gpu_idle_hysteresis_us;
 	u32 gpu_idle_dur_count;
 	unsigned int fw_timeout_ms;
 	struct kbase_csf_hwcnt hwcnt;
 	struct kbase_csf_mcu_fw fw;
 	struct kbase_csf_firmware_log fw_log;
+	struct kbase_csf_firmware_core_dump fw_core_dump;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_dump_on_fault dof;
 #endif /* CONFIG_DEBUG_FS */
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	/**
+	 * @coresight: Coresight device structure.
+	 */
+	struct kbase_debug_coresight_device coresight;
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 };
 
 /**
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
index 1f4a4d9b6876..548657bc0a38 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.c
@@ -22,6 +22,7 @@
 #include "mali_kbase.h"
 #include "mali_kbase_csf_firmware_cfg.h"
 #include "mali_kbase_csf_firmware_log.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
@@ -38,7 +39,6 @@
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <csf/mali_kbase_csf_registers.h>
-
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
@@ -81,7 +81,7 @@ MODULE_PARM_DESC(fw_debug,
 
 #define FIRMWARE_HEADER_MAGIC		(0xC3F13A6Eul)
 #define FIRMWARE_HEADER_VERSION_MAJOR	(0ul)
-#define FIRMWARE_HEADER_VERSION_MINOR	(2ul)
+#define FIRMWARE_HEADER_VERSION_MINOR	(3ul)
 #define FIRMWARE_HEADER_LENGTH		(0x14ul)
 
 #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
@@ -93,12 +93,13 @@ MODULE_PARM_DESC(fw_debug,
 	 CSF_FIRMWARE_ENTRY_ZERO | \
 	 CSF_FIRMWARE_ENTRY_CACHE_MODE)
 
-#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE     (0)
-#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
-#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER  (3)
-#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE           (0)
+#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION       (1)
+#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER        (3)
+#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA   (4)
 #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
-#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST    (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST      (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP           (9)
 
 #define CSF_FIRMWARE_CACHE_MODE_NONE              (0ul << 3)
 #define CSF_FIRMWARE_CACHE_MODE_CACHED            (1ul << 3)
@@ -120,7 +121,6 @@ MODULE_PARM_DESC(fw_debug,
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |                             \
 	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
-
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
 	WARN_ON(offset % sizeof(u32));
@@ -286,6 +286,13 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
 {
 	kbase_csf_firmware_enable_mcu(kbdev);
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	kbase_debug_coresight_csf_state_request(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED);
+
+	if (!kbase_debug_coresight_csf_state_wait(kbdev, KBASE_DEBUG_CORESIGHT_CSF_ENABLED))
+		dev_err(kbdev->dev, "Timeout waiting for CoreSight to be enabled");
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 	wait_for_firmware_boot(kbdev);
 }
 
@@ -488,6 +495,7 @@ out:
  * @kbdev: Kbase device structure
  * @virtual_start: Start of the virtual address range required for an entry allocation
  * @virtual_end: End of the virtual address range required for an entry allocation
+ * @flags: Firmware entry flags for comparison with the reusable pages found
  * @phys: Pointer to the array of physical (tagged) addresses making up the new
  *        FW interface entry. It is an output parameter which would be made to
  *        point to an already existing array allocated for the previously parsed
@@ -508,10 +516,12 @@ out:
  *
  * Return: true if a large page can be reused, false otherwise.
  */
-static inline bool entry_find_large_page_to_reuse(
-	struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
-	struct tagged_addr **phys, struct protected_memory_allocation ***pma,
-	u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
+static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
+						  const u32 virtual_start, const u32 virtual_end,
+						  const u32 flags, struct tagged_addr **phys,
+						  struct protected_memory_allocation ***pma,
+						  u32 num_pages, u32 *num_pages_aligned,
+						  bool *is_small_page)
 {
 	struct kbase_csf_firmware_interface *interface = NULL;
 	struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -557,7 +567,7 @@ static inline bool entry_find_large_page_to_reuse(
 		if (interface->virtual & (SZ_2M - 1))
 			continue;
 
-		if (virtual_diff < virtual_diff_min) {
+		if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) {
 			target_interface = interface;
 			virtual_diff_min = virtual_diff;
 		}
@@ -620,6 +630,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	struct protected_memory_allocation **pma = NULL;
 	bool reuse_pages = false;
 	bool is_small_page = true;
+	bool ignore_page_migration = true;
 
 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -662,9 +673,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;
 
-	reuse_pages = entry_find_large_page_to_reuse(
-		kbdev, virtual_start, virtual_end, &phys, &pma,
-		num_pages, &num_pages_aligned, &is_small_page);
+	reuse_pages =
+		entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys,
+					       &pma, num_pages, &num_pages_aligned, &is_small_page);
 	if (!reuse_pages)
 		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
 
@@ -685,6 +696,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
 							    is_small_page),
 				num_pages_aligned, phys, false);
+			ignore_page_migration = false;
 		}
 	}
 
@@ -794,7 +806,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
 						      virtual_start >> PAGE_SHIFT, phys,
 						      num_pages_aligned, mem_flags,
-						      KBASE_MEM_GROUP_CSF_FW, NULL);
+						      KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
+						      ignore_page_migration);
 
 		if (ret != 0) {
 			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -1023,20 +1036,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
 		return parse_build_info_metadata_entry(kbdev, fw, entry, size);
 	case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
 		/* Function call list section */
-		if (size < 2 * sizeof(*entry)) {
+		if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) {
 			dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
 				size);
 			return -EINVAL;
 		}
 		kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
-		break;
-	}
-
-	if (!optional) {
-		dev_err(kbdev->dev,
-			"Unsupported non-optional entry type %u in firmware\n",
-			type);
-		return -EINVAL;
+		return 0;
+	case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
+		/* Core Dump section */
+		if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
+			dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size);
+			return -EINVAL;
+		}
+		return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry);
+	default:
+		if (!optional) {
+			dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n",
+				type);
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -1687,6 +1706,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 		kbdev->csf.gpu_idle_dur_count);
 }
 
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool complete = false;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+	    (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+		complete = true;
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+				     u32 const req_mask)
+{
+	u32 glb_debug_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+	glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_debug_req ^= req_mask;
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+	set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+	unsigned long flags;
+	int ret;
+
+	/* Serialize CORE_DUMP requests. */
+	mutex_lock(&kbdev->csf.reg_lock);
+
+	/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	request_fw_core_dump(global_iface);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+	ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+	if (!ret)
+		WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	return ret;
+}
 
 /**
  * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
@@ -1714,7 +1798,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
 		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
-		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
+		GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1740,6 +1824,14 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	/* Enable FW MCU read/write debug interfaces */
+	kbase_csf_firmware_global_input_mask(
+		global_iface, GLB_DEBUG_ACK_IRQ_MASK,
+		GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK,
+		GLB_DEBUG_REQ_FW_AS_READ_MASK | GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1890,12 +1982,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
 {
 #define HYSTERESIS_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
-	u64 dur_val = dur_ms;
+	u64 dur_val = dur_us;
 	u32 cnt_val_u32, reg_val_u32;
 	bool src_system_timestamp = freq > 0;
 
@@ -1913,9 +2005,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
 			"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
 	}
 
-	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	/* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
 	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
-	dur_val = div_u64(dur_val, 1000);
+	dur_val = div_u64(dur_val, 1000000);
 
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1938,7 +2030,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	dur = kbdev->csf.gpu_idle_hysteresis_us;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
@@ -1955,7 +2047,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
@@ -1986,7 +2078,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_hysteresis_us = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -2166,14 +2258,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
 	kbdev->csf.gpu_idle_dur_count =
-		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
 
 	return 0;
 }
@@ -2353,6 +2445,10 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}
 
+#ifdef CONFIG_MALI_FW_CORE_DUMP
+	kbase_csf_firmware_core_dump_init(kbdev);
+#endif
+
 	/* Firmware loaded successfully, ret = 0 */
 	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
 			(((u64)version_hash) << 32) |
@@ -2470,6 +2566,119 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 	kbdev->as_free |= MCU_AS_BITMASK;
 }
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+					  u32 const reg_val)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err;
+	u32 glb_req;
+
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Set the address and value to write */
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN1, reg_val);
+
+	/* Set the Global Debug request for FW MCU write */
+	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_req ^= GLB_DEBUG_REQ_FW_AS_WRITE_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+					     GLB_DEBUG_REQ_FW_AS_WRITE_MASK);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	/* Notify FW about the Global Debug request */
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	dev_dbg(kbdev->dev, "w: reg %08x val %08x", reg_addr, reg_val);
+
+	return err;
+}
+
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 *reg_val)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	unsigned long flags;
+	int err;
+	u32 glb_req;
+
+	if (WARN_ON(reg_val == NULL))
+		return -EINVAL;
+
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Set the address to read */
+	kbase_csf_firmware_global_input(global_iface, GLB_DEBUG_ARG_IN0, reg_addr);
+
+	/* Set the Global Debug request for FW MCU read */
+	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_req ^= GLB_DEBUG_REQ_FW_AS_READ_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_req,
+					     GLB_DEBUG_REQ_FW_AS_READ_MASK);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	/* Notify FW about the Global Debug request */
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	err = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+
+	if (!err) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		*reg_val = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ARG_OUT0);
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	dev_dbg(kbdev->dev, "r: reg %08x val %08x", reg_addr, *reg_val);
+
+	return err;
+}
+
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 const val_mask, u32 const reg_val)
+{
+	unsigned long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms) + jiffies;
+	u32 read_val;
+
+	dev_dbg(kbdev->dev, "p: reg %08x val %08x mask %08x", reg_addr, reg_val, val_mask);
+
+	while (time_before(jiffies, remaining)) {
+		int err = kbase_csf_firmware_mcu_register_read(kbdev, reg_addr, &read_val);
+
+		if (err) {
+			dev_err(kbdev->dev,
+				"Error reading MCU register value (read_val = %u, expect = %u)\n",
+				read_val, reg_val);
+			return err;
+		}
+
+		if ((read_val & val_mask) == reg_val)
+			return 0;
+	}
+
+	dev_err(kbdev->dev,
+		"Timeout waiting for MCU register value to be set (read_val = %u, expect = %u)\n",
+		read_val, reg_val);
+
+	return -ETIMEDOUT;
+}
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
@@ -2848,7 +3057,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
 	if (ret)
 		goto mmu_insert_pages_error;
 
@@ -2909,4 +3118,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
-
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
index 7560a298ac9c..714a14001189 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware.h
@@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask(
 u32 kbase_csf_firmware_csg_output(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
-
 /**
  * struct kbase_csf_global_iface - Global CSF interface
  *                                 provided by the firmware.
@@ -450,6 +449,50 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev);
  */
 void kbase_csf_firmware_unload_term(struct kbase_device *kbdev);
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+/**
+ * kbase_csf_firmware_mcu_register_write - Write to MCU register
+ *
+ * @kbdev:    Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to write into
+ * @reg_val:  Value to be written
+ *
+ * Write a desired value to a register in MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_write(struct kbase_device *const kbdev, u32 const reg_addr,
+					  u32 const reg_val);
+/**
+ * kbase_csf_firmware_mcu_register_read - Read from MCU register
+ *
+ * @kbdev:    Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @reg_val:  Value as present in reg_addr register
+ *
+ * Read a value from MCU address space.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_read(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 *reg_val);
+
+/**
+ * kbase_csf_firmware_mcu_register_poll - Poll MCU register
+ *
+ * @kbdev:    Instance of a gpu platform device that implements a csf interface.
+ * @reg_addr: Register address to read from
+ * @val_mask: Value to mask the read value for comparison
+ * @reg_val:  Value to be compared against
+ *
+ * Continue to read a value from MCU address space until it matches given mask and value.
+ *
+ * return: 0 on success, or negative on failure.
+ */
+int kbase_csf_firmware_mcu_register_poll(struct kbase_device *const kbdev, u32 const reg_addr,
+					 u32 const val_mask, u32 const reg_val);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 /**
  * kbase_csf_firmware_ping - Send the ping request to firmware.
  *
@@ -858,5 +901,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
  */
 int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_firmware_req_core_dump - Request a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Request a firmware core dump and wait for for firmware to acknowledge.
+ * Firmware will enter infinite loop after the firmware core dump is created.
+ *
+ * Return: 0 if success, or negative error code on failure.
+ */
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev);
 
 #endif
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c
new file mode 100644
index 000000000000..f0a10d197eec
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/* Page size in bytes in use by MCU. */
+#define FW_PAGE_SIZE 4096
+
+/*
+ * FW image header core dump data format supported.
+ * Currently only version 0.1 is supported.
+ */
+#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0
+#define FW_CORE_DUMP_DATA_VERSION_MINOR 1
+
+/* Full version of the image header core dump data format */
+#define FW_CORE_DUMP_DATA_VERSION                                                                  \
+	((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR)
+
+/* Validity flag to indicate if the MCU registers in the buffer are valid */
+#define FW_MCU_STATUS_MASK 0x1
+#define FW_MCU_STATUS_VALID (1 << 0)
+
+/* Core dump entry fields */
+#define FW_CORE_DUMP_VERSION_INDEX 0
+#define FW_CORE_DUMP_START_ADDR_INDEX 1
+
+/* MCU registers stored by a firmware core dump */
+struct fw_core_dump_mcu {
+	u32 r0;
+	u32 r1;
+	u32 r2;
+	u32 r3;
+	u32 r4;
+	u32 r5;
+	u32 r6;
+	u32 r7;
+	u32 r8;
+	u32 r9;
+	u32 r10;
+	u32 r11;
+	u32 r12;
+	u32 sp;
+	u32 lr;
+	u32 pc;
+};
+
+/* Any ELF definitions used in this file are from elf.h/elfcore.h except
+ * when specific 32-bit versions are required (mainly for the
+ * ELF_PRSTATUS32 note that is used to contain the MCU registers).
+ */
+
+/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */
+struct prstatus32_timeval {
+	int tv_sec;
+	int tv_usec;
+};
+
+/* - Structure defining ELF32 PRSTATUS note contents, as defined by the
+ *   GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h.
+ *   Note: GDB checks for the size of this structure to be 0x94.
+ *   Modified pr_reg (array containing the Arm 32-bit MCU registers) to
+ *   use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs.
+ */
+struct elf_prstatus32 {
+	struct elf_siginfo pr_info;		/* Info associated with signal. */
+	short int pr_cursig;			/* Current signal. */
+	unsigned int pr_sigpend;		/* Set of pending signals. */
+	unsigned int pr_sighold;		/* Set of held signals. */
+	pid_t pr_pid;
+	pid_t pr_ppid;
+	pid_t pr_pgrp;
+	pid_t pr_sid;
+	struct prstatus32_timeval pr_utime;	/* User time. */
+	struct prstatus32_timeval pr_stime;	/* System time. */
+	struct prstatus32_timeval pr_cutime;	/* Cumulative user time. */
+	struct prstatus32_timeval pr_cstime;	/* Cumulative system time. */
+	u32 pr_reg[18];				/* GP registers. */
+	int pr_fpvalid;				/* True if math copro being used. */
+};
+
+/**
+ * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+struct fw_core_dump_data {
+	struct kbase_device *kbdev;
+};
+
+/*
+ * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @interface: current firmware memory interface
+ * @page_num: current page number (0..) within @interface
+ */
+struct fw_core_dump_seq_off {
+	struct kbase_csf_firmware_interface *interface;
+	u32 page_num;
+};
+
+/**
+ * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @regs:  Pointer to a core dump mcu struct where the MCU registers are copied
+ *         to. Should be allocated by the called.
+ *
+ * Return: 0 if successfully copied the MCU registers, negative error code otherwise.
+ */
+static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs)
+{
+	unsigned int i;
+	u32 status = 0;
+	u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr;
+	u32 *data = (u32 *)regs;
+
+	/* Check if the core dump entry exposed the buffer */
+	if (!regs || !kbdev->csf.fw_core_dump.available)
+		return -EPERM;
+
+	/* Check if the data in the buffer is valid, if not, return error */
+	kbase_csf_read_firmware_memory(kbdev, data_addr, &status);
+	if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID)
+		return -EPERM;
+
+	/* According to image header documentation, the MCU registers core dump
+	 * buffer is 32-bit aligned.
+	 */
+	for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i)
+		kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]);
+
+	return 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_header - Initializes an ELF32 header
+ * @hdr:	ELF32 header to initialize
+ * @sections:	Number of entries in the ELF program header table
+ *
+ * Initializes an ELF32 header for an ARM 32-bit little-endian
+ * 'Core file' object file.
+ */
+static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections)
+{
+	/* Reset all members in header. */
+	memset(hdr, 0, sizeof(*hdr));
+
+	/* Magic number identifying file as an ELF object. */
+	memcpy(hdr->e_ident, ELFMAG, SELFMAG);
+
+	/* Identify file as 32-bit, little-endian, using current
+	 * ELF header version, with no OS or ABI specific ELF
+	 * extensions used.
+	 */
+	hdr->e_ident[EI_CLASS] = ELFCLASS32;
+	hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+	hdr->e_ident[EI_VERSION] = EV_CURRENT;
+	hdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+
+	/* 'Core file' type of object file. */
+	hdr->e_type = ET_CORE;
+
+	/* ARM 32-bit architecture (AARCH32) */
+	hdr->e_machine = EM_ARM;
+
+	/* Object file version: the original format. */
+	hdr->e_version = EV_CURRENT;
+
+	/* Offset of program header table in file. */
+	hdr->e_phoff = sizeof(struct elf32_hdr);
+
+	/* No processor specific flags. */
+	hdr->e_flags = 0;
+
+	/* Size of the ELF header in bytes. */
+	hdr->e_ehsize = sizeof(struct elf32_hdr);
+
+	/* Size of the ELF program header entry in bytes. */
+	hdr->e_phentsize = sizeof(struct elf32_phdr);
+
+	/* Number of entries in the program header table. */
+	hdr->e_phnum = sections;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header
+ * for holding auxiliary information
+ * @phdr:		ELF32 program header
+ * @file_offset:	Location of the note in the file in bytes
+ * @size:		Size of the note in bytes.
+ *
+ * Initializes an ELF32 program header describing auxiliary information (containing
+ * one or more notes) of @size bytes alltogether located in the file at offset
+ * @file_offset.
+ */
+static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset,
+						      u32 size)
+{
+	/* Auxiliary information (note) in program header. */
+	phdr->p_type = PT_NOTE;
+
+	/* Location of first note in file in bytes. */
+	phdr->p_offset = file_offset;
+
+	/* Size of all notes combined in bytes. */
+	phdr->p_filesz = size;
+
+	/* Other members not relevant for a note. */
+	phdr->p_vaddr = 0;
+	phdr->p_paddr = 0;
+	phdr->p_memsz = 0;
+	phdr->p_align = 0;
+	phdr->p_flags = 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment
+ * @phdr:		ELF32 program header to initialize.
+ * @file_offset:	Location of loadable segment in file in bytes
+ *                      (aligned to FW_PAGE_SIZE bytes)
+ * @vaddr:		32-bit virtual address where to write the segment
+ *                      (aligned to FW_PAGE_SIZE bytes)
+ * @size:		Size of the segment in bytes.
+ * @flags:		CSF_FIRMWARE_ENTRY_* flags describing access permissions.
+ *
+ * Initializes an ELF32 program header describing a loadable segment of
+ * @size bytes located in the file at offset @file_offset to be loaded
+ * at virtual address @vaddr with access permissions as described by
+ * CSF_FIRMWARE_ENTRY_* flags in @flags.
+ */
+static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset,
+						 u32 vaddr, u32 size, u32 flags)
+{
+	/* Loadable segment in program header. */
+	phdr->p_type = PT_LOAD;
+
+	/* Location of segment in file in bytes. Aligned to p_align bytes. */
+	phdr->p_offset = file_offset;
+
+	/* Virtual address of segment. Aligned to p_align bytes. */
+	phdr->p_vaddr = vaddr;
+
+	/* Physical address of segment. Not relevant. */
+	phdr->p_paddr = 0;
+
+	/* Size of segment in file and memory. */
+	phdr->p_filesz = size;
+	phdr->p_memsz = size;
+
+	/* Alignment of segment in the file and memory in bytes (integral power of 2). */
+	phdr->p_align = FW_PAGE_SIZE;
+
+	/* Set segment access permissions. */
+	phdr->p_flags = 0;
+	if (flags & CSF_FIRMWARE_ENTRY_READ)
+		phdr->p_flags |= PF_R;
+	if (flags & CSF_FIRMWARE_ENTRY_WRITE)
+		phdr->p_flags |= PF_W;
+	if (flags & CSF_FIRMWARE_ENTRY_EXECUTE)
+		phdr->p_flags |= PF_X;
+}
+
+/**
+ * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note
+ * @name:	Name given to the PRSTATUS note.
+ *
+ * Calculates the size of a 32-bit PRSTATUS note (which contains information
+ * about a process like the current MCU registers) taking into account
+ * @name must be padded to a 4-byte multiple.
+ *
+ * Return: size of 32-bit PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_get_prstatus_note_size(char *name)
+{
+	return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) +
+	       sizeof(struct elf_prstatus32);
+}
+
+/**
+ * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure
+ * @prs:	ELF32 PRSTATUS note to initialize
+ * @regs:	MCU registers to copy into the PRSTATUS note
+ *
+ * Initializes an ELF32 PRSTATUS structure with MCU registers @regs.
+ * Other process information is N/A for CSF Firmware.
+ */
+static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs,
+					   struct fw_core_dump_mcu *regs)
+{
+	/* Only fill in registers (32-bit) of PRSTATUS note. */
+	memset(prs, 0, sizeof(*prs));
+	prs->pr_reg[0] = regs->r0;
+	prs->pr_reg[1] = regs->r1;
+	prs->pr_reg[2] = regs->r2;
+	prs->pr_reg[3] = regs->r3;
+	prs->pr_reg[4] = regs->r4;
+	prs->pr_reg[5] = regs->r5;
+	prs->pr_reg[6] = regs->r0;
+	prs->pr_reg[7] = regs->r7;
+	prs->pr_reg[8] = regs->r8;
+	prs->pr_reg[9] = regs->r9;
+	prs->pr_reg[10] = regs->r10;
+	prs->pr_reg[11] = regs->r11;
+	prs->pr_reg[12] = regs->r12;
+	prs->pr_reg[13] = regs->sp;
+	prs->pr_reg[14] = regs->lr;
+	prs->pr_reg[15] = regs->pc;
+}
+
+/**
+ * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note
+ * @name:	Name for the PRSTATUS note
+ * @prs:	ELF32 PRSTATUS structure to put in the PRSTATUS note
+ * @created_prstatus_note:
+ *		Pointer to the allocated ELF32 PRSTATUS note
+ *
+ * Creates an ELF32 note with one PRSTATUS entry containing the
+ * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in
+ * @created_prstatus_note.
+ *
+ * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs,
+						      struct elf32_note **created_prstatus_note)
+{
+	struct elf32_note *note;
+	unsigned int note_name_sz;
+	unsigned int note_sz;
+
+	/* Allocate memory for ELF32 note containing a PRSTATUS note. */
+	note_name_sz = strlen(name) + 1;
+	note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) +
+		  sizeof(struct elf_prstatus32);
+	note = kmalloc(note_sz, GFP_KERNEL);
+	if (!note)
+		return 0;
+
+	/* Fill in ELF32 note with one entry for a PRSTATUS note. */
+	note->n_namesz = note_name_sz;
+	note->n_descsz = sizeof(struct elf_prstatus32);
+	note->n_type = NT_PRSTATUS;
+	memcpy(note + 1, name, note_name_sz);
+	memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs));
+
+	/* Return pointer and size of the created ELF32 note. */
+	*created_prstatus_note = note;
+	return note_sz;
+}
+
+/**
+ * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump
+ * @m: the seq_file handle
+ *
+ * Writes the ELF header of the core dump including program headers for
+ * memory sections and a note containing the current MCU register
+ * values.
+ *
+ * Excludes memory sections without read access permissions or
+ * are for protected memory.
+ *
+ * The data written is as follows:
+ * - ELF header
+ * - ELF PHDRs for memory sections
+ * - ELF PHDR for program header NOTE
+ * - ELF PRSTATUS note
+ * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE
+ *
+ * The actual memory section dumps should follow this (not written
+ * by this function).
+ *
+ * Retrieves the necessary information via the struct
+ * fw_core_dump_data stored in the private member of the seq_file
+ * handle.
+ *
+ * Return:
+ * * 0		- success
+ * * -ENOMEM	- not enough memory for allocating ELF32 note
+ */
+static int fw_core_dump_write_elf_header(struct seq_file *m)
+{
+	struct elf32_hdr hdr;
+	struct elf32_phdr phdr;
+	struct fw_core_dump_data *dump_data = m->private;
+	struct kbase_device *const kbdev = dump_data->kbdev;
+	struct kbase_csf_firmware_interface *interface;
+	struct elf_prstatus32 elf_prs;
+	struct elf32_note *elf_prstatus_note;
+	unsigned int sections = 0;
+	unsigned int elf_prstatus_note_size;
+	u32 elf_prstatus_offset;
+	u32 elf_phdr_note_offset;
+	u32 elf_memory_sections_data_offset;
+	u32 total_pages = 0;
+	u32 padding_size, *padding;
+	struct fw_core_dump_mcu regs = { 0 };
+
+	/* Count number of memory sections. */
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+		sections++;
+	}
+
+	/* Prepare ELF header. */
+	fw_core_dump_fill_elf_header(&hdr, sections + 1);
+	seq_write(m, &hdr, sizeof(struct elf32_hdr));
+
+	elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE");
+	/* PHDRs of PT_LOAD type. */
+	elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr);
+	/* PHDR of PT_NOTE type. */
+	elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr);
+	elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size;
+
+	/* Calculate padding size to page offset. */
+	padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) -
+		       elf_memory_sections_data_offset;
+	elf_memory_sections_data_offset += padding_size;
+
+	/* Prepare ELF program header table. */
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+
+		fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset,
+						     interface->virtual,
+						     interface->num_pages * FW_PAGE_SIZE,
+						     interface->flags);
+
+		seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+		elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE;
+		total_pages += interface->num_pages;
+	}
+
+	/* Prepare PHDR of PT_NOTE type. */
+	fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset,
+						  elf_prstatus_note_size);
+	seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+	/* Prepare ELF note of PRSTATUS type. */
+	if (fw_get_core_dump_mcu(kbdev, &regs))
+		dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero");
+	/* Even if MCU Registers are not available the ELF prstatus is still
+	 * filled with the registers equal to zero.
+	 */
+	fw_core_dump_fill_elf_prstatus(&elf_prs, &regs);
+	elf_prstatus_note_size =
+		fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note);
+	if (elf_prstatus_note_size == 0)
+		return -ENOMEM;
+
+	seq_write(m, elf_prstatus_note, elf_prstatus_note_size);
+	kfree(elf_prstatus_note);
+
+	/* Pad file to page size. */
+	padding = kzalloc(padding_size, GFP_KERNEL);
+	seq_write(m, padding, padding_size);
+	kfree(padding);
+
+	return 0;
+}
+
+/**
+ * fw_core_dump_create - Requests firmware to save state for a firmware core dump
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_create(struct kbase_device *kbdev)
+{
+	int err;
+
+	/* Ensure MCU is active before requesting the core dump. */
+	kbase_csf_scheduler_pm_active(kbdev);
+	err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+	if (!err)
+		err = kbase_csf_firmware_req_core_dump(kbdev);
+
+	kbase_csf_scheduler_pm_idle(kbdev);
+
+	return err;
+}
+
+/**
+ * fw_core_dump_seq_start - seq_file start operation for firmware core dump file
+ * @m: the seq_file handle
+ * @_pos: holds the current position in pages
+ *        (0 or most recent position used in previous session)
+ *
+ * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1
+ * within the firmware interface memory sections. @_pos value 0 is used to indicate the
+ * position of the ELF header at the start of the file.
+ *
+ * Retrieves the necessary information via the struct fw_core_dump_data stored in
+ * the private member of the seq_file handle.
+ *
+ * Return:
+ * * iterator pointer	- pointer to iterator struct fw_core_dump_seq_off
+ * * SEQ_START_TOKEN	- special iterator pointer indicating its is the start of the file
+ * * NULL		- iterator could not be allocated
+ */
+static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos)
+{
+	struct fw_core_dump_data *dump_data = m->private;
+	struct fw_core_dump_seq_off *data;
+	struct kbase_csf_firmware_interface *interface;
+	loff_t pos = *_pos;
+
+	if (pos == 0)
+		return SEQ_START_TOKEN;
+
+	/* Move iterator in the right position based on page number within
+	 * available pages of firmware interface memory sections.
+	 */
+	pos--; /* ignore start token */
+	list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+
+		if (pos >= interface->num_pages) {
+			pos -= interface->num_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->interface = interface;
+			data->page_num = pos;
+			return data;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Closes the current session and frees any memory related.
+ */
+static void fw_core_dump_seq_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+/**
+ * fw_core_dump_seq_next - seq_file next operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ * @pos: holds the current position in pages
+ *        (0 or most recent position used in previous session)
+ *
+ * Moves the iterator @v forward to the next page within the firmware interface
+ * memory sections and returns the updated position in @pos.
+ * @v value SEQ_START_TOKEN indicates the ELF header position.
+ *
+ * Return:
+ * * iterator pointer	- pointer to iterator struct fw_core_dump_seq_off
+ * * NULL		- iterator could not be allocated
+ */
+static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct fw_core_dump_data *dump_data = m->private;
+	struct fw_core_dump_seq_off *data = v;
+	struct kbase_csf_firmware_interface *interface;
+	struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces;
+
+	/* Is current position at the ELF header ? */
+	if (v == SEQ_START_TOKEN) {
+		if (list_empty(interfaces))
+			return NULL;
+
+		/* Prepare iterator for starting at first page in firmware interface
+		 * memory sections.
+		 */
+		data = kmalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return NULL;
+		data->interface =
+			list_first_entry(interfaces, struct kbase_csf_firmware_interface, node);
+		data->page_num = 0;
+		++*pos;
+		return data;
+	}
+
+	/* First attempt to satisfy from current firmware interface memory section. */
+	interface = data->interface;
+	if (data->page_num + 1 < interface->num_pages) {
+		data->page_num++;
+		++*pos;
+		return data;
+	}
+
+	/* Need next firmware interface memory section. This could be the last one. */
+	if (list_is_last(&interface->node, interfaces)) {
+		kfree(data);
+		return NULL;
+	}
+
+	/* Move to first page in next firmware interface memory section. */
+	data->interface = list_next_entry(interface, node);
+	data->page_num = 0;
+	++*pos;
+
+	return data;
+}
+
+/**
+ * fw_core_dump_seq_show - seq_file show operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Writes the current page in a firmware interface memory section indicated
+ * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF
+ * header is written.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_seq_show(struct seq_file *m, void *v)
+{
+	struct fw_core_dump_seq_off *data = v;
+	struct page *page;
+	u32 *p;
+
+	/* Either write the ELF header or current page. */
+	if (v == SEQ_START_TOKEN)
+		return fw_core_dump_write_elf_header(m);
+
+	/* Write the current page. */
+	page = as_page(data->interface->phys[data->page_num]);
+	p = kmap_atomic(page);
+	seq_write(m, p, FW_PAGE_SIZE);
+	kunmap_atomic(p);
+
+	return 0;
+}
+
+/* Sequence file operations for firmware core dump file. */
+static const struct seq_operations fw_core_dump_seq_ops = {
+	.start = fw_core_dump_seq_start,
+	.next = fw_core_dump_seq_next,
+	.stop = fw_core_dump_seq_stop,
+	.show = fw_core_dump_seq_show,
+};
+
+/**
+ * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file:  file pointer
+ *
+ * Prepares for servicing a write request to request a core dump from firmware and
+ * a read request to retrieve the core dump.
+ *
+ * Returns an error if the firmware is not initialized yet.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *const kbdev = inode->i_private;
+	struct fw_core_dump_data *dump_data;
+	int ret;
+
+	/* Fail if firmware is not initialized yet. */
+	if (!kbdev->csf.firmware_inited) {
+		ret = -ENODEV;
+		goto open_fail;
+	}
+
+	/* Open a sequence file for iterating through the pages in the
+	 * firmware interface memory pages. seq_open stores a
+	 * struct seq_file * in the private_data field of @file.
+	 */
+	ret = seq_open(file, &fw_core_dump_seq_ops);
+	if (ret)
+		goto open_fail;
+
+	/* Allocate a context for sequence file operations. */
+	dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL);
+	if (!dump_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Kbase device will be shared with sequence file operations. */
+	dump_data->kbdev = kbdev;
+
+	/* Link our sequence file context. */
+	((struct seq_file *)file->private_data)->private = dump_data;
+
+	return 0;
+out:
+	seq_release(inode, file);
+open_fail:
+	return ret;
+}
+
+/**
+ * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file
+ * @file:  file pointer
+ * @ubuf:  user buffer containing data to store
+ * @count: number of bytes in user buffer
+ * @ppos:  file position
+ *
+ * Any data written to the file triggers a firmware core dump request which
+ * subsequently can be retrieved by reading from the file.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count,
+					  loff_t *ppos)
+{
+	int err;
+	struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+	struct kbase_device *const kbdev = dump_data->kbdev;
+
+	CSTD_UNUSED(ppos);
+
+	err = fw_core_dump_create(kbdev);
+
+	return err ? err : count;
+}
+
+/**
+ * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file:  file pointer
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file)
+{
+	struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+
+	seq_release(inode, file);
+
+	kfree(dump_data);
+
+	return 0;
+}
+/* Debugfs file operations for firmware core dump file. */
+static const struct file_operations kbase_csf_fw_core_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = fw_core_dump_debugfs_open,
+	.read = seq_read,
+	.write = fw_core_dump_debugfs_write,
+	.llseek = seq_lseek,
+	.release = fw_core_dump_debugfs_release,
+};
+
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev,
+			    &kbase_csf_fw_core_dump_fops);
+#endif /* CONFIG_DEBUG_FS */
+}
+
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry)
+{
+	/* Casting to u16 as version is defined by bits 15:0 */
+	kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX];
+
+	if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION)
+		return -EPERM;
+
+	kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX];
+	kbdev->csf.fw_core_dump.available = true;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h
new file mode 100644
index 000000000000..0537dca4f37f
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_core_dump.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+
+struct kbase_device;
+
+/** Offset of the last field of core dump entry from the image header */
+#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4)
+
+/**
+ * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from
+ *                                              the image header.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @entry: Pointer to section.
+ *
+ * Read a "core dump" entry from the image header, check the version for
+ * compatibility and store the address pointer.
+ *
+ * Return: 0 if successfully parse entry, negative error code otherwise.
+ */
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry);
+
+/**
+ * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *         Must be zero-initialized.
+ *
+ * Creates the fw_core_dump debugfs file through which to request a firmware
+ * core dump. The created debugfs file is cleaned up as part of kbdev debugfs
+ * cleanup.
+ *
+ * The fw_core_dump debugs file that case be used in the following way:
+ *
+ * To explicitly request core dump:
+ *     echo 1 >/sys/kernel/debug/mali0/fw_core_dump
+ *
+ * To output current core dump (after explicitly requesting a core dump, or
+ * kernel driver reported an internal firmware error):
+ *     cat /sys/kernel/debug/mali0/fw_core_dump
+ */
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev);
+
+#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
index 20d8c0d4fdb1..6e0d3c2f5071 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.c
@@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
 		dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
 		enable_bits_count = 64;
 	}
-	new_mask = val & ((1 << enable_bits_count) - 1);
+	new_mask = val & (UINT64_MAX >> (64 - enable_bits_count));
 
 	if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
 		return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
@@ -353,7 +353,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo
 
 			diff = callee_address - calling_address - 4;
 			sign = !!(diff & 0x80000000);
-			if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff &&
+			if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff ||
 					ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
 				dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
 						calling_address);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h
index 8d7a2210a457..1008320464a9 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_log.h
@@ -24,6 +24,9 @@
 
 #include <mali_kbase.h>
 
+/** Offset of the last field of functions call list entry from the image header */
+#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8)
+
 /*
  * Firmware log dumping buffer size.
  */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
index f414d8894306..ab25ed4429e3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_firmware_no_mali.c
@@ -32,7 +32,8 @@
 #include "mali_kbase_csf_scheduler.h"
 #include "mmu/mali_kbase_mmu.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
+#include <csf/mali_kbase_csf_registers.h>
 
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -104,7 +105,6 @@ struct dummy_firmware_interface {
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
 	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
-
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
 	WARN_ON(offset % sizeof(u32));
@@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 		kbdev->csf.gpu_idle_dur_count);
 }
 
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool complete = false;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+	    (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+		complete = true;
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+				     u32 const req_mask)
+{
+	u32 glb_debug_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+	glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_debug_req ^= req_mask;
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+	set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+	unsigned long flags;
+	int ret;
+
+	/* Serialize CORE_DUMP requests. */
+	mutex_lock(&kbdev->csf.reg_lock);
+
+	/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	request_fw_core_dump(global_iface);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+	ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+	if (!ret)
+		WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	return ret;
+}
 
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
@@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
 		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
-		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
-		0;
+		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	dur = kbdev->csf.gpu_idle_hysteresis_us;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
@@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
@@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_hysteresis_us = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1076,14 +1140,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
 	kbdev->csf.gpu_idle_dur_count =
-		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
 
 	return 0;
 }
@@ -1166,8 +1230,6 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 
 	/* NO_MALI: Don't stop firmware or unload MMU tables */
 
-	kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
-
 	kbase_csf_scheduler_term(kbdev);
 
 	kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -1197,6 +1259,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 	 * entry parsed from the firmware image.
 	 */
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
+
+	kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu);
 }
 
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
@@ -1533,7 +1597,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
 	if (ret)
 		goto mmu_insert_pages_error;
 
@@ -1594,4 +1658,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
-
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
index 1876d505dd5b..42d19e1b6ad7 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c
@@ -23,10 +23,7 @@
 #include "mali_kbase_csf_heap_context_alloc.h"
 
 /* Size of one heap context structure, in bytes. */
-#define HEAP_CTX_SIZE ((size_t)32)
-
-/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
-#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)
+#define HEAP_CTX_SIZE ((u32)32)
 
 /**
  * sub_alloc - Sub-allocate a heap context from a GPU memory region
@@ -38,8 +35,8 @@
 static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
-	int heap_nr = 0;
-	size_t ctx_offset = 0;
+	unsigned long heap_nr = 0;
+	u32 ctx_offset = 0;
 	u64 heap_gpu_va = 0;
 	struct kbase_vmap_struct mapping;
 	void *ctx_ptr = NULL;
@@ -55,29 +52,64 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 		return 0;
 	}
 
-	ctx_offset = heap_nr * HEAP_CTX_SIZE;
+	ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
 	heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
 	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
-		HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);
+		ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
 
 	if (unlikely(!ctx_ptr)) {
 		dev_err(kctx->kbdev->dev,
-			"Failed to map tiler heap context %d (0x%llX)\n",
+			"Failed to map tiler heap context %lu (0x%llX)\n",
 			heap_nr, heap_gpu_va);
 		return 0;
 	}
 
-	memset(ctx_ptr, 0, HEAP_CTX_SIZE);
+	memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
 	kbase_vunmap(ctx_ptr, &mapping);
 
 	bitmap_set(ctx_alloc->in_use, heap_nr, 1);
 
-	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
+	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
 		heap_nr, heap_gpu_va);
 
 	return heap_gpu_va;
 }
 
+/**
+ * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
+ *
+ * @ctx_alloc:   Pointer to the heap context allocator.
+ * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
+ *
+ * This function is called when memory for the heap context is freed. It uses the
+ * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
+ * there is nothing done. The whole GPU cache is anyways expected to be flushed
+ * on older GPUs when initial chunks of the heap are freed just before the memory
+ * for heap context is freed.
+ */
+static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
+			      u64 const heap_gpu_va)
+{
+	struct kbase_context *const kctx = ctx_alloc->kctx;
+	u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
+	u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
+	u32 page_index = offset_in_bytes >> PAGE_SHIFT;
+	struct tagged_addr page =
+		kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
+	phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
+
+	lockdep_assert_held(&ctx_alloc->lock);
+
+	/* There is no need to take vm_lock here as the ctx_alloc region is no_user_free
+	 * refcounted. The region and the backing page can't disappear whilst this
+	 * function is executing.
+	 * Flush type is passed as FLUSH_PT to CLN+INV L2 only.
+	 */
+	kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
+				heap_context_pa, ctx_alloc->heap_context_size_aligned,
+				KBASE_MMU_OP_FLUSH_PT);
+}
+
 /**
  * sub_free - Free a heap context sub-allocated from a GPU memory region
  *
@@ -88,7 +120,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	u64 const heap_gpu_va)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
-	u64 ctx_offset = 0;
+	u32 ctx_offset = 0;
 	unsigned int heap_nr = 0;
 
 	lockdep_assert_held(&ctx_alloc->lock);
@@ -99,13 +131,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
 		return;
 
-	ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;
+	ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
 
-	if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) ||
-		WARN_ON(ctx_offset % HEAP_CTX_SIZE))
+	if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
+		WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
 		return;
 
-	heap_nr = ctx_offset / HEAP_CTX_SIZE;
+	evict_heap_context(ctx_alloc, heap_gpu_va);
+
+	heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
 	dev_dbg(kctx->kbdev->dev,
 		"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
 
@@ -116,12 +150,17 @@ int kbase_csf_heap_context_allocator_init(
 	struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	struct kbase_context *const kctx)
 {
+	const u32 gpu_cache_line_size =
+		(1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+
 	/* We cannot pre-allocate GPU memory here because the
 	 * custom VA zone may not have been created yet.
 	 */
 	ctx_alloc->kctx = kctx;
 	ctx_alloc->region = NULL;
 	ctx_alloc->gpu_va = 0;
+	ctx_alloc->heap_context_size_aligned =
+		(HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
 
 	mutex_init(&ctx_alloc->lock);
 	bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
@@ -142,7 +181,14 @@ void kbase_csf_heap_context_allocator_term(
 
 	if (ctx_alloc->region) {
 		kbase_gpu_vm_lock(kctx);
-		ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * We can't enforce (nor check) the no_user_free refcount
+		 * to be 0 here as other code regions can take such a reference.
+		 * Anyway, this isn't an issue as the region will eventually
+		 * be freed by the region tracker if its refcount didn't drop
+		 * to 0.
+		 */
+		kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
 		kbase_mem_free_region(kctx, ctx_alloc->region);
 		kbase_gpu_vm_unlock(kctx);
 	}
@@ -156,7 +202,7 @@ u64 kbase_csf_heap_context_allocator_alloc(
 	struct kbase_context *const kctx = ctx_alloc->kctx;
 	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
 		    BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
-	u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
+	u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
 	u64 heap_gpu_va = 0;
 
 	/* Calls to this function are inherently asynchronous, with respect to
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
index 0b3f1334a9e6..f1727224b243 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.c
@@ -80,7 +80,14 @@ static int kbase_kcpu_map_import_prepare(
 		 * on the physical pages tracking object. When the last
 		 * reference to the tracking object is dropped the pages
 		 * would be unpinned if they weren't unpinned before.
+		 *
+		 * Region should be CPU cached: abort if it isn't.
 		 */
+		if (WARN_ON(!(reg->flags & KBASE_REG_CPU_CACHED))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
 		ret = kbase_jd_user_buf_pin_pages(kctx, reg);
 		if (ret)
 			goto out;
@@ -674,9 +681,8 @@ static int kbase_csf_queue_group_suspend_prepare(
 		    (kbase_reg_current_backed_size(reg) < nr_pages) ||
 		    !(reg->flags & KBASE_REG_CPU_WR) ||
 		    (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
-		    (reg->flags & KBASE_REG_DONT_NEED) ||
-		    (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
-		    (reg->flags & KBASE_REG_NO_USER_FREE)) {
+		    (kbase_is_region_shrinkable(reg)) ||
+		    (kbase_va_region_is_no_user_free(kctx, reg))) {
 			ret = -EINVAL;
 			goto out_clean_pages;
 		}
@@ -784,13 +790,14 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 				return -EINVAL;
 			}
 
-			sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val;
+			sig_set =
+				evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)] > cqs_wait->objs[i].val;
 			if (sig_set) {
 				bool error = false;
 
 				bitmap_set(cqs_wait->signaled, i, 1);
 				if ((cqs_wait->inherit_err_flags & (1U << i)) &&
-				    evt[BASEP_EVENT_ERR_INDEX] > 0) {
+				    evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] > 0) {
 					queue->has_error = true;
 					error = true;
 				}
@@ -800,7 +807,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 						error);
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
-					kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
+					kbdev, queue, evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
 				queue->command_started = false;
 			}
 
@@ -817,12 +824,34 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 	return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs);
 }
 
+static inline bool kbase_kcpu_cqs_is_data_type_valid(u8 data_type)
+{
+	return data_type == BASEP_CQS_DATA_TYPE_U32 || data_type == BASEP_CQS_DATA_TYPE_U64;
+}
+
+static inline bool kbase_kcpu_cqs_is_aligned(u64 addr, u8 data_type)
+{
+	BUILD_BUG_ON(BASEP_EVENT32_ALIGN_BYTES != BASEP_EVENT32_SIZE_BYTES);
+	BUILD_BUG_ON(BASEP_EVENT64_ALIGN_BYTES != BASEP_EVENT64_SIZE_BYTES);
+	WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(data_type));
+
+	switch (data_type) {
+	default:
+		return false;
+	case BASEP_CQS_DATA_TYPE_U32:
+		return (addr & (BASEP_EVENT32_ALIGN_BYTES - 1)) == 0;
+	case BASEP_CQS_DATA_TYPE_U64:
+		return (addr & (BASEP_EVENT64_ALIGN_BYTES - 1)) == 0;
+	}
+}
+
 static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 		struct base_kcpu_command_cqs_wait_info *cqs_wait_info,
 		struct kbase_kcpu_command *current_command)
 {
 	struct base_cqs_wait_info *objs;
 	unsigned int nr_objs = cqs_wait_info->nr_objs;
+	unsigned int i;
 
 	lockdep_assert_held(&queue->lock);
 
@@ -842,6 +871,17 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 		return -ENOMEM;
 	}
 
+	/* Check the CQS objects as early as possible. By checking their alignment
+	 * (required alignment equals to size for Sync32 and Sync64 objects), we can
+	 * prevent overrunning the supplied event page.
+	 */
+	for (i = 0; i < nr_objs; i++) {
+		if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+			kfree(objs);
+			return -EINVAL;
+		}
+	}
+
 	if (++queue->cqs_wait_count == 1) {
 		if (kbase_csf_event_wait_add(queue->kctx,
 				event_cqs_callback, queue)) {
@@ -897,14 +937,13 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
 				"Sync memory %llx already freed", cqs_set->objs[i].addr);
 			queue->has_error = true;
 		} else {
-			evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
+			evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)] = queue->has_error;
 			/* Set to signaled */
-			evt[BASEP_EVENT_VAL_INDEX]++;
+			evt[BASEP_EVENT32_VAL_OFFSET / sizeof(u32)]++;
 			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
 
-			KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET,
-					queue, cqs_set->objs[i].addr,
-					evt[BASEP_EVENT_ERR_INDEX]);
+			KBASE_KTRACE_ADD_CSF_KCPU(kbdev, KCPU_CQS_SET, queue, cqs_set->objs[i].addr,
+						  evt[BASEP_EVENT32_ERR_OFFSET / sizeof(u32)]);
 		}
 	}
 
@@ -921,6 +960,7 @@ static int kbase_kcpu_cqs_set_prepare(
 {
 	struct base_cqs_set *objs;
 	unsigned int nr_objs = cqs_set_info->nr_objs;
+	unsigned int i;
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
@@ -940,6 +980,17 @@ static int kbase_kcpu_cqs_set_prepare(
 		return -ENOMEM;
 	}
 
+	/* Check the CQS objects as early as possible. By checking their alignment
+	 * (required alignment equals to size for Sync32 and Sync64 objects), we can
+	 * prevent overrunning the supplied event page.
+	 */
+	for (i = 0; i < nr_objs; i++) {
+		if (!kbase_kcpu_cqs_is_aligned(objs[i].addr, BASEP_CQS_DATA_TYPE_U32)) {
+			kfree(objs);
+			return -EINVAL;
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET;
 	current_command->info.cqs_set.nr_objs = nr_objs;
 	current_command->info.cqs_set.objs = objs;
@@ -982,8 +1033,9 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 		if (!test_bit(i, cqs_wait_operation->signaled)) {
 			struct kbase_vmap_struct *mapping;
 			bool sig_set;
-			u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx,
-						cqs_wait_operation->objs[i].addr, &mapping);
+			uintptr_t evt = (uintptr_t)kbase_phy_alloc_mapping_get(
+				queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
+			u64 val = 0;
 
 			/* GPUCORE-28172 RDT to review */
 			if (!queue->command_started)
@@ -996,12 +1048,29 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 				return -EINVAL;
 			}
 
+			switch (cqs_wait_operation->objs[i].data_type) {
+			default:
+				WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(
+					cqs_wait_operation->objs[i].data_type));
+				kbase_phy_alloc_mapping_put(queue->kctx, mapping);
+				queue->has_error = true;
+				return -EINVAL;
+			case BASEP_CQS_DATA_TYPE_U32:
+				val = *(u32 *)evt;
+				evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
+				break;
+			case BASEP_CQS_DATA_TYPE_U64:
+				val = *(u64 *)evt;
+				evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
+				break;
+			}
+
 			switch (cqs_wait_operation->objs[i].operation) {
 			case BASEP_CQS_WAIT_OPERATION_LE:
-				sig_set = *evt <= cqs_wait_operation->objs[i].val;
+				sig_set = val <= cqs_wait_operation->objs[i].val;
 				break;
 			case BASEP_CQS_WAIT_OPERATION_GT:
-				sig_set = *evt > cqs_wait_operation->objs[i].val;
+				sig_set = val > cqs_wait_operation->objs[i].val;
 				break;
 			default:
 				dev_dbg(kbdev->dev,
@@ -1013,24 +1082,10 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 				return -EINVAL;
 			}
 
-			/* Increment evt up to the error_state value depending on the CQS data type */
-			switch (cqs_wait_operation->objs[i].data_type) {
-			default:
-				dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type);
-				/* Fallthrough - hint to compiler that there's really only 2 options at present */
-				fallthrough;
-			case BASEP_CQS_DATA_TYPE_U32:
-				evt = (u64 *)((u8 *)evt + sizeof(u32));
-				break;
-			case BASEP_CQS_DATA_TYPE_U64:
-				evt = (u64 *)((u8 *)evt + sizeof(u64));
-				break;
-			}
-
 			if (sig_set) {
 				bitmap_set(cqs_wait_operation->signaled, i, 1);
 				if ((cqs_wait_operation->inherit_err_flags & (1U << i)) &&
-				    *evt > 0) {
+				    *(u32 *)evt > 0) {
 					queue->has_error = true;
 				}
 
@@ -1058,6 +1113,7 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
 {
 	struct base_cqs_wait_operation_info *objs;
 	unsigned int nr_objs = cqs_wait_operation_info->nr_objs;
+	unsigned int i;
 
 	lockdep_assert_held(&queue->lock);
 
@@ -1077,6 +1133,18 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
 		return -ENOMEM;
 	}
 
+	/* Check the CQS objects as early as possible. By checking their alignment
+	 * (required alignment equals to size for Sync32 and Sync64 objects), we can
+	 * prevent overrunning the supplied event page.
+	 */
+	for (i = 0; i < nr_objs; i++) {
+		if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+		    !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+			kfree(objs);
+			return -EINVAL;
+		}
+	}
+
 	if (++queue->cqs_wait_count == 1) {
 		if (kbase_csf_event_wait_add(queue->kctx,
 				event_cqs_callback, queue)) {
@@ -1107,6 +1175,44 @@ static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue
 	return 0;
 }
 
+static void kbasep_kcpu_cqs_do_set_operation_32(struct kbase_kcpu_command_queue *queue,
+						uintptr_t evt, u8 operation, u64 val)
+{
+	struct kbase_device *kbdev = queue->kctx->kbdev;
+
+	switch (operation) {
+	case BASEP_CQS_SET_OPERATION_ADD:
+		*(u32 *)evt += (u32)val;
+		break;
+	case BASEP_CQS_SET_OPERATION_SET:
+		*(u32 *)evt = val;
+		break;
+	default:
+		dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+		queue->has_error = true;
+		break;
+	}
+}
+
+static void kbasep_kcpu_cqs_do_set_operation_64(struct kbase_kcpu_command_queue *queue,
+						uintptr_t evt, u8 operation, u64 val)
+{
+	struct kbase_device *kbdev = queue->kctx->kbdev;
+
+	switch (operation) {
+	case BASEP_CQS_SET_OPERATION_ADD:
+		*(u64 *)evt += val;
+		break;
+	case BASEP_CQS_SET_OPERATION_SET:
+		*(u64 *)evt = val;
+		break;
+	default:
+		dev_dbg(kbdev->dev, "Unsupported CQS set operation %d", operation);
+		queue->has_error = true;
+		break;
+	}
+}
+
 static void kbase_kcpu_cqs_set_operation_process(
 		struct kbase_device *kbdev,
 		struct kbase_kcpu_command_queue *queue,
@@ -1121,9 +1227,9 @@ static void kbase_kcpu_cqs_set_operation_process(
 
 	for (i = 0; i < cqs_set_operation->nr_objs; i++) {
 		struct kbase_vmap_struct *mapping;
-		u64 *evt;
+		uintptr_t evt;
 
-		evt = (u64 *)kbase_phy_alloc_mapping_get(
+		evt = (uintptr_t)kbase_phy_alloc_mapping_get(
 			queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
 
 		/* GPUCORE-28172 RDT to review */
@@ -1133,39 +1239,31 @@ static void kbase_kcpu_cqs_set_operation_process(
 				"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
 			queue->has_error = true;
 		} else {
-			switch (cqs_set_operation->objs[i].operation) {
-			case BASEP_CQS_SET_OPERATION_ADD:
-				*evt += cqs_set_operation->objs[i].val;
-				break;
-			case BASEP_CQS_SET_OPERATION_SET:
-				*evt = cqs_set_operation->objs[i].val;
-				break;
-			default:
-				dev_dbg(kbdev->dev,
-					"Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation);
-				queue->has_error = true;
-				break;
-			}
+			struct base_cqs_set_operation_info *obj = &cqs_set_operation->objs[i];
 
-			/* Increment evt up to the error_state value depending on the CQS data type */
-			switch (cqs_set_operation->objs[i].data_type) {
+			switch (obj->data_type) {
 			default:
-				dev_dbg(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type);
-				/* Fallthrough - hint to compiler that there's really only 2 options at present */
-				fallthrough;
+				WARN_ON(!kbase_kcpu_cqs_is_data_type_valid(obj->data_type));
+				queue->has_error = true;
+				goto skip_err_propagation;
 			case BASEP_CQS_DATA_TYPE_U32:
-				evt = (u64 *)((u8 *)evt + sizeof(u32));
+				kbasep_kcpu_cqs_do_set_operation_32(queue, evt, obj->operation,
+								    obj->val);
+				evt += BASEP_EVENT32_ERR_OFFSET - BASEP_EVENT32_VAL_OFFSET;
 				break;
 			case BASEP_CQS_DATA_TYPE_U64:
-				evt = (u64 *)((u8 *)evt + sizeof(u64));
+				kbasep_kcpu_cqs_do_set_operation_64(queue, evt, obj->operation,
+								    obj->val);
+				evt += BASEP_EVENT64_ERR_OFFSET - BASEP_EVENT64_VAL_OFFSET;
 				break;
 			}
 
 			/* GPUCORE-28172 RDT to review */
 
 			/* Always propagate errors */
-			*evt = queue->has_error;
+			*(u32 *)evt = queue->has_error;
 
+skip_err_propagation:
 			kbase_phy_alloc_mapping_put(queue->kctx, mapping);
 		}
 	}
@@ -1183,6 +1281,7 @@ static int kbase_kcpu_cqs_set_operation_prepare(
 {
 	struct base_cqs_set_operation_info *objs;
 	unsigned int nr_objs = cqs_set_operation_info->nr_objs;
+	unsigned int i;
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
@@ -1202,6 +1301,18 @@ static int kbase_kcpu_cqs_set_operation_prepare(
 		return -ENOMEM;
 	}
 
+	/* Check the CQS objects as early as possible. By checking their alignment
+	 * (required alignment equals to size for Sync32 and Sync64 objects), we can
+	 * prevent overrunning the supplied event page.
+	 */
+	for (i = 0; i < nr_objs; i++) {
+		if (!kbase_kcpu_cqs_is_data_type_valid(objs[i].data_type) ||
+		    !kbase_kcpu_cqs_is_aligned(objs[i].addr, objs[i].data_type)) {
+			kfree(objs);
+			return -EINVAL;
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION;
 	current_command->info.cqs_set_operation.nr_objs = nr_objs;
 	current_command->info.cqs_set_operation.objs = objs;
@@ -1234,9 +1345,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
 	queue_work(kcpu_queue->wq, &kcpu_queue->work);
 }
 
-static void kbase_kcpu_fence_wait_cancel(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct kbase_kcpu_command_fence_info *fence_info)
+static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
+					  struct kbase_kcpu_command_fence_info *fence_info)
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 
@@ -1410,15 +1520,14 @@ static int kbase_kcpu_fence_wait_process(
 	 */
 
 	if (fence_status)
-		kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
+		kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
 
 	return fence_status;
 }
 
-static int kbase_kcpu_fence_wait_prepare(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct base_kcpu_command_fence_info *fence_info,
-		struct kbase_kcpu_command *current_command)
+static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+					 struct base_kcpu_command_fence_info *fence_info,
+					 struct kbase_kcpu_command *current_command)
 {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence_in;
@@ -1429,8 +1538,7 @@ static int kbase_kcpu_fence_wait_prepare(
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
-	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
-			sizeof(fence)))
+	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
 		return -ENOMEM;
 
 	fence_in = sync_file_get_fence(fence.basep.fd);
@@ -1444,9 +1552,8 @@ static int kbase_kcpu_fence_wait_prepare(
 	return 0;
 }
 
-static int kbase_kcpu_fence_signal_process(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct kbase_kcpu_command_fence_info *fence_info)
+static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+					    struct kbase_kcpu_command_fence_info *fence_info)
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	int ret;
@@ -1467,37 +1574,37 @@ static int kbase_kcpu_fence_signal_process(
 				  fence_info->fence->seqno);
 
 	/* dma_fence refcount needs to be decreased to release it. */
-	dma_fence_put(fence_info->fence);
+	kbase_fence_put(fence_info->fence);
 	fence_info->fence = NULL;
 
 	return ret;
 }
 
-static int kbase_kcpu_fence_signal_prepare(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct base_kcpu_command_fence_info *fence_info,
-		struct kbase_kcpu_command *current_command)
+static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+					 struct kbase_kcpu_command *current_command,
+					 struct base_fence *fence, struct sync_file **sync_file,
+					 int *fd)
 {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence_out;
 #else
 	struct dma_fence *fence_out;
 #endif
-	struct base_fence fence;
-	struct sync_file *sync_file;
+	struct kbase_kcpu_dma_fence *kcpu_fence;
 	int ret = 0;
-	int fd;
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
-	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
-			sizeof(fence)))
-		return -EFAULT;
-
-	fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL);
-	if (!fence_out)
+	kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL);
+	if (!kcpu_fence)
 		return -ENOMEM;
 
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	fence_out = (struct fence *)kcpu_fence;
+#else
+	fence_out = (struct dma_fence *)kcpu_fence;
+#endif
+
 	dma_fence_init(fence_out,
 		       &kbase_fence_ops,
 		       &kbase_csf_fence_lock,
@@ -1513,28 +1620,70 @@ static int kbase_kcpu_fence_signal_prepare(
 	dma_fence_get(fence_out);
 #endif
 
+	/* Set reference to KCPU metadata and increment refcount */
+	kcpu_fence->metadata = kcpu_queue->metadata;
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount));
+#else
+	WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+#endif
+
 	/* create a sync_file fd representing the fence */
-	sync_file = sync_file_create(fence_out);
-	if (!sync_file) {
+	*sync_file = sync_file_create(fence_out);
+	if (!(*sync_file)) {
 		ret = -ENOMEM;
 		goto file_create_fail;
 	}
 
-	fd = get_unused_fd_flags(O_CLOEXEC);
-	if (fd < 0) {
-		ret = fd;
+	*fd = get_unused_fd_flags(O_CLOEXEC);
+	if (*fd < 0) {
+		ret = *fd;
 		goto fd_flags_fail;
 	}
 
-	fence.basep.fd = fd;
+	fence->basep.fd = *fd;
 
 	current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
 	current_command->info.fence.fence = fence_out;
 
+	return 0;
+
+fd_flags_fail:
+	fput((*sync_file)->file);
+file_create_fail:
+	/*
+	 * Upon failure, dma_fence refcount that was increased by
+	 * dma_fence_get() or sync_file_create() needs to be decreased
+	 * to release it.
+	 */
+	kbase_fence_put(fence_out);
+	current_command->info.fence.fence = NULL;
+
+	return ret;
+}
+
+static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+					   struct base_kcpu_command_fence_info *fence_info,
+					   struct kbase_kcpu_command *current_command)
+{
+	struct base_fence fence;
+	struct sync_file *sync_file = NULL;
+	int fd;
+	int ret = 0;
+
+	lockdep_assert_held(&kcpu_queue->lock);
+
+	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
+		return -EFAULT;
+
+	ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd);
+	if (ret)
+		return ret;
+
 	if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
 			sizeof(fence))) {
 		ret = -EFAULT;
-		goto fd_flags_fail;
+		goto fail;
 	}
 
 	/* 'sync_file' pointer can't be safely dereferenced once 'fd' is
@@ -1544,21 +1693,34 @@ static int kbase_kcpu_fence_signal_prepare(
 	fd_install(fd, sync_file->file);
 	return 0;
 
-fd_flags_fail:
+fail:
 	fput(sync_file->file);
-file_create_fail:
-	/*
-	 * Upon failure, dma_fence refcount that was increased by
-	 * dma_fence_get() or sync_file_create() needs to be decreased
-	 * to release it.
-	 */
-	dma_fence_put(fence_out);
-
+	kbase_fence_put(current_command->info.fence.fence);
 	current_command->info.fence.fence = NULL;
-	kfree(fence_out);
 
 	return ret;
 }
+
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+				    struct kbase_kcpu_command_fence_info *fence_info)
+{
+	if (!kcpu_queue || !fence_info)
+		return -EINVAL;
+
+	return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+				 struct kbase_kcpu_command *current_command,
+				 struct base_fence *fence, struct sync_file **sync_file, int *fd)
+{
+	if (!kcpu_queue || !current_command || !fence || !sync_file || !fd)
+		return -EINVAL;
+
+	return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
 #endif /* CONFIG_SYNC_FILE */
 
 static void kcpu_queue_process_worker(struct work_struct *data)
@@ -1595,6 +1757,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
 
 		mutex_lock(&queue->lock);
 
+		/* Metadata struct may outlive KCPU queue.  */
+		kbase_kcpu_dma_fence_meta_put(queue->metadata);
+
 		/* Drain the remaining work for this queue first and go past
 		 * all the waits.
 		 */
@@ -1701,8 +1866,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			status = 0;
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 			if (drain_queue) {
-				kbase_kcpu_fence_wait_cancel(queue,
-					&cmd->info.fence);
+				kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence);
 			} else {
 				status = kbase_kcpu_fence_wait_process(queue,
 					&cmd->info.fence);
@@ -1732,8 +1896,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			status = 0;
 
 #if IS_ENABLED(CONFIG_SYNC_FILE)
-			status = kbase_kcpu_fence_signal_process(
-				queue, &cmd->info.fence);
+			status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence);
 
 			if (status < 0)
 				queue->has_error = true;
@@ -2103,14 +2266,30 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 		return -EINVAL;
 	}
 
+	/* There might be a race between one thread trying to enqueue commands to the queue
+	 * and other thread trying to delete the same queue.
+	 * This racing could lead to use-after-free problem by enqueuing thread if
+	 * resources for the queue has already been freed by deleting thread.
+	 *
+	 * To prevent the issue, two mutexes are acquired/release asymmetrically as follows.
+	 *
+	 * Lock A (kctx mutex)
+	 * Lock B (queue mutex)
+	 * Unlock A
+	 * Unlock B
+	 *
+	 * With the kctx mutex being held, enqueuing thread will check the queue
+	 * and will return error code if the queue had already been deleted.
+	 */
 	mutex_lock(&kctx->csf.kcpu_queues.lock);
 	queue = kctx->csf.kcpu_queues.array[enq->id];
-	mutex_unlock(&kctx->csf.kcpu_queues.lock);
-
-	if (queue == NULL)
+	if (queue == NULL) {
+		dev_dbg(kctx->kbdev->dev, "Invalid KCPU queue (id:%u)", enq->id);
+		mutex_unlock(&kctx->csf.kcpu_queues.lock);
 		return -EINVAL;
-
+	}
 	mutex_lock(&queue->lock);
+	mutex_unlock(&kctx->csf.kcpu_queues.lock);
 
 	if (kcpu_queue_get_space(queue) < enq->nr_commands) {
 		ret = -EBUSY;
@@ -2275,6 +2454,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
 
 	mutex_destroy(&kctx->csf.kcpu_queues.lock);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
 
 int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 			struct kbase_ioctl_kcpu_queue_delete *del)
@@ -2288,7 +2468,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	struct kbase_kcpu_command_queue *queue;
 	int idx;
 	int ret = 0;
-
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+	struct kbase_kcpu_dma_fence_meta *metadata;
+#endif
 	/* The queue id is of u8 type and we use the index of the kcpu_queues
 	 * array as an id, so the number of elements in the array can't be
 	 * more than 256.
@@ -2334,7 +2516,27 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	queue->fence_context = dma_fence_context_alloc(1);
 	queue->fence_seqno = 0;
 	queue->fence_wait_processed = false;
+
+	metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+	if (!metadata) {
+		kfree(queue);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	metadata->kbdev = kctx->kbdev;
+	metadata->kctx_id = kctx->id;
+	snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id,
+		 kctx->tgid, kctx->id, queue->fence_context);
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	atomic_set(&metadata->refcount, 1);
+#else
+	refcount_set(&metadata->refcount, 1);
 #endif
+	queue->metadata = metadata;
+	atomic_inc(&kctx->kbdev->live_fence_metadata);
+#endif /* CONFIG_SYNC_FILE */
 	queue->enqueue_failed = false;
 	queue->command_started = false;
 	INIT_LIST_HEAD(&queue->jit_blocked);
@@ -2360,3 +2562,4 @@ out:
 
 	return ret;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
index 5f9b8e0684bc..85db53867c06 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_kcpu.h
@@ -22,6 +22,9 @@
 #ifndef _KBASE_CSF_KCPU_H_
 #define _KBASE_CSF_KCPU_H_
 
+#include <mali_kbase_fence.h>
+#include <mali_kbase_sync.h>
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 #include <linux/fence.h>
 #else
@@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info {
 };
 
 /**
- * struct kbase_kcpu_command_fence_info - Structure which holds information
- *		about the fence object enqueued in the kcpu command queue
+ * struct kbase_kcpu_command_fence_info - Structure which holds information about the
+ *                                        fence object enqueued in the kcpu command queue
  *
  * @fence_cb:      Fence callback
  * @fence:         Fence
@@ -274,6 +277,8 @@ struct kbase_kcpu_command {
  * @jit_blocked:		Used to keep track of command queues blocked
  *				by a pending JIT allocation command.
  * @fence_timeout:		Timer used to detect the fence wait timeout.
+ * @metadata:                   Metadata structure containing basic information about
+ *                              this queue for any fence objects associated with this queue.
  */
 struct kbase_kcpu_command_queue {
 	struct mutex lock;
@@ -295,6 +300,9 @@ struct kbase_kcpu_command_queue {
 #ifdef CONFIG_MALI_BIFROST_FENCE_DEBUG
 	struct timer_list fence_timeout;
 #endif /* CONFIG_MALI_BIFROST_FENCE_DEBUG */
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+	struct kbase_kcpu_dma_fence_meta *metadata;
+#endif /* CONFIG_SYNC_FILE */
 };
 
 /**
@@ -359,4 +367,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
  */
 void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);
 
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+/* Test wrappers for dma fence operations. */
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+				    struct kbase_kcpu_command_fence_info *fence_info);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+				 struct kbase_kcpu_command *current_command,
+				 struct base_fence *fence, struct sync_file **sync_file, int *fd);
+#endif /* CONFIG_SYNC_FILE */
+
 #endif /* _KBASE_CSF_KCPU_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
new file mode 100644
index 000000000000..77e19dba4262
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/protected_memory_allocator.h>
+#include <mali_kbase.h>
+#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
+#include <mali_kbase_mem_migrate.h>
+
+/* Scaling factor in pre-allocating shared regions for suspend bufs and userios */
+#define MCU_SHARED_REGS_PREALLOCATE_SCALE (8)
+
+/* MCU shared region map attempt limit */
+#define MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT (4)
+
+/* Convert a VPFN to its start addr */
+#define GET_VPFN_VA(vpfn) ((vpfn) << PAGE_SHIFT)
+
+/* Macros for extract the corresponding VPFNs from a CSG_REG */
+#define CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn)
+#define CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages) (reg->start_pfn + nr_susp_pages)
+#define CSG_REG_USERIO_VPFN(reg, csi, nr_susp_pages) (reg->start_pfn + 2 * (nr_susp_pages + csi))
+
+/* MCU shared segment dummy page mapping flags */
+#define DUMMY_PAGE_MAP_FLAGS (KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT) | KBASE_REG_GPU_NX)
+
+/* MCU shared segment suspend buffer mapping flags */
+#define SUSP_PAGE_MAP_FLAGS                                                                        \
+	(KBASE_REG_GPU_RD | KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |                                  \
+	 KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT))
+
+/**
+ * struct kbase_csg_shared_region - Wrapper object for use with a CSG on runtime
+ *                                  resources for suspend buffer pages, userio pages
+ *                                  and their corresponding mapping GPU VA addresses
+ *                                  from the MCU shared interface segment
+ *
+ * @link:       Link to the managing list for the wrapper object.
+ * @reg:        pointer to the region allocated from the shared interface segment, which
+ *              covers the normal/P-mode suspend buffers, userio pages of the queues
+ * @grp:        Pointer to the bound kbase_queue_group, or NULL if no binding (free).
+ * @pmode_mapped: Boolean for indicating the region has MMU mapped with the bound group's
+ *              protected mode suspend buffer pages.
+ */
+struct kbase_csg_shared_region {
+	struct list_head link;
+	struct kbase_va_region *reg;
+	struct kbase_queue_group *grp;
+	bool pmode_mapped;
+};
+
+static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
+{
+	unsigned long userio_map_flags;
+
+	if (kbdev->system_coherency == COHERENCY_NONE)
+		userio_map_flags =
+			KBASE_REG_GPU_RD | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+	else
+		userio_map_flags = KBASE_REG_GPU_RD | KBASE_REG_SHARE_BOTH |
+				   KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
+
+	return (userio_map_flags | KBASE_REG_GPU_NX);
+}
+
+static void set_page_meta_status_not_movable(struct tagged_addr phy)
+{
+	if (kbase_page_migration_enabled) {
+		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
+
+		if (page_md) {
+			spin_lock(&page_md->migrate_lock);
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+}
+
+static struct kbase_csg_shared_region *get_group_bound_csg_reg(struct kbase_queue_group *group)
+{
+	return (struct kbase_csg_shared_region *)group->csg_reg;
+}
+
+static inline int update_mapping_with_dummy_pages(struct kbase_device *kbdev, u64 vpfn,
+						  u32 nr_pages)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+
+	return kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, shared_regs->dummy_phys, nr_pages,
+					      mem_flags, KBASE_MEM_GROUP_CSF_FW);
+}
+
+static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 nr_pages)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	const unsigned long mem_flags = DUMMY_PAGE_MAP_FLAGS;
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+				      nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+				      mmu_sync_info, NULL, false);
+}
+
+/* Reset consecutive retry count to zero */
+static void notify_group_csg_reg_map_done(struct kbase_queue_group *group)
+{
+	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+	/* Just clear the internal map retry count */
+	group->csg_reg_bind_retries = 0;
+}
+
+/* Return true if a fatal group error has already been triggered */
+static bool notify_group_csg_reg_map_error(struct kbase_queue_group *group)
+{
+	struct kbase_device *kbdev = group->kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (group->csg_reg_bind_retries < U8_MAX)
+		group->csg_reg_bind_retries++;
+
+	/* Allow only one fatal error notification */
+	if (group->csg_reg_bind_retries == MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT) {
+		struct base_gpu_queue_group_error const err_payload = {
+			.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+			.payload = { .fatal_group = { .status = GPU_EXCEPTION_TYPE_SW_FAULT_0 } }
+		};
+
+		dev_err(kbdev->dev, "Fatal: group_%d_%d_%d exceeded shared region map retry limit",
+			group->kctx->tgid, group->kctx->id, group->handle);
+		kbase_csf_add_group_fatal_error(group, &err_payload);
+		kbase_event_wakeup(group->kctx);
+	}
+
+	return group->csg_reg_bind_retries >= MCU_SHARED_REGS_BIND_ATTEMPT_LIMIT;
+}
+
+/* Replace the given phys at vpfn (reflecting a queue's userio_pages) mapping.
+ * If phys is NULL, the internal dummy_phys is used, which effectively
+ * restores back to the initialized state for the given queue's userio_pages
+ * (i.e. mapped to the default dummy page).
+ * In case of CSF mmu update error on a queue, the dummy phy is used to restore
+ * back the default 'unbound' (i.e. mapped to dummy) condition.
+ *
+ * It's the caller's responsibility to ensure that the given vpfn is extracted
+ * correctly from a CSG_REG object, for example, using CSG_REG_USERIO_VPFN().
+ */
+static int userio_pages_replace_phys(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	int err = 0, err1;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (phys) {
+		unsigned long mem_flags_input = shared_regs->userio_mem_rd_flags;
+		unsigned long mem_flags_output = mem_flags_input | KBASE_REG_GPU_WR;
+
+		/* Dealing with a queue's INPUT page */
+		err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, &phys[0], 1, mem_flags_input,
+						     KBASE_MEM_GROUP_CSF_IO);
+		/* Dealing with a queue's OUTPUT page */
+		err1 = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn + 1, &phys[1], 1,
+						      mem_flags_output, KBASE_MEM_GROUP_CSF_IO);
+		if (unlikely(err1))
+			err = err1;
+	}
+
+	if (unlikely(err) || !phys) {
+		/* Restore back to dummy_userio_phy */
+		update_mapping_with_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+	}
+
+	return err;
+}
+
+/* Update a group's queues' mappings for a group with its runtime bound group region */
+static int csg_reg_update_on_csis(struct kbase_device *kbdev, struct kbase_queue_group *group,
+				  struct kbase_queue_group *prev_grp)
+{
+	struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+	struct tagged_addr *phy;
+	int err = 0, err1;
+	u32 i;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (WARN_ONCE(!csg_reg, "Update_userio pages: group has no bound csg_reg"))
+		return -EINVAL;
+
+	for (i = 0; i < nr_csis; i++) {
+		struct kbase_queue *queue = group->bound_queues[i];
+		struct kbase_queue *prev_queue = prev_grp ? prev_grp->bound_queues[i] : NULL;
+
+		/* Set the phy if the group's queue[i] needs mapping, otherwise NULL */
+		phy = (queue && queue->enabled && !queue->user_io_gpu_va) ? queue->phys : NULL;
+
+		/* Either phy is valid, or this update is for a transition change from
+		 * prev_group, and the prev_queue was mapped, so an update is required.
+		 */
+		if (phy || (prev_queue && prev_queue->user_io_gpu_va)) {
+			u64 vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, i, nr_susp_pages);
+
+			err1 = userio_pages_replace_phys(kbdev, vpfn, phy);
+
+			if (unlikely(err1)) {
+				dev_warn(kbdev->dev,
+					 "%s: Error in update queue-%d mapping for csg_%d_%d_%d",
+					 __func__, i, group->kctx->tgid, group->kctx->id,
+					 group->handle);
+				err = err1;
+			} else if (phy)
+				queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+
+			/* Mark prev_group's queue has lost its mapping */
+			if (prev_queue)
+				prev_queue->user_io_gpu_va = 0;
+		}
+	}
+
+	return err;
+}
+
+/* Bind a group to a given csg_reg, any previous mappings with the csg_reg are replaced
+ * with the given group's phy pages, or, if no replacement, the default dummy pages.
+ * Note, the csg_reg's fields are in transition step-by-step from the prev_grp to its
+ * new binding owner in this function. At the end, the prev_grp would be completely
+ * detached away from the previously bound csg_reg.
+ */
+static int group_bind_csg_reg(struct kbase_device *kbdev, struct kbase_queue_group *group,
+			      struct kbase_csg_shared_region *csg_reg)
+{
+	const unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	struct kbase_queue_group *prev_grp = csg_reg->grp;
+	struct kbase_va_region *reg = csg_reg->reg;
+	struct tagged_addr *phy;
+	int err = 0, err1;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	/* The csg_reg is expected still on the unused list so its link is not empty */
+	if (WARN_ON_ONCE(list_empty(&csg_reg->link))) {
+		dev_dbg(kbdev->dev, "csg_reg is marked in active use");
+		return -EINVAL;
+	}
+
+	if (WARN_ON_ONCE(prev_grp && prev_grp->csg_reg != csg_reg)) {
+		dev_dbg(kbdev->dev, "Unexpected bound lost on prev_group");
+		prev_grp->csg_reg = NULL;
+		return -EINVAL;
+	}
+
+	/* Replacing the csg_reg bound group to the newly given one */
+	csg_reg->grp = group;
+	group->csg_reg = csg_reg;
+
+	/* Resolving mappings, deal with protected mode first */
+	if (group->protected_suspend_buf.pma) {
+		/* We are binding a new group with P-mode ready, the prev_grp's P-mode mapping
+		 * status is now stale during this transition of ownership. For the new owner,
+		 * its mapping would have been updated away when it lost its binding previously.
+		 * So it needs an update to this pma map. By clearing here the mapped flag
+		 * ensures it reflects the new owner's condition.
+		 */
+		csg_reg->pmode_mapped = false;
+		err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+	} else if (csg_reg->pmode_mapped) {
+		/* Need to unmap the previous one, use the dummy pages */
+		err = update_mapping_with_dummy_pages(
+			kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+
+		if (unlikely(err))
+			dev_warn(kbdev->dev, "%s: Failed to update P-mode dummy for csg_%d_%d_%d",
+				 __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+		csg_reg->pmode_mapped = false;
+	}
+
+	/* Unlike the normal suspend buf, the mapping of the protected mode suspend buffer is
+	 * actually reflected by a specific mapped flag (due to phys[] is only allocated on
+	 * in-need basis). So the GPU_VA is always updated to the bound region's corresponding
+	 * VA, as a reflection of the binding to the csg_reg.
+	 */
+	group->protected_suspend_buf.gpu_va =
+		GET_VPFN_VA(CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages));
+
+	/* Deal with normal mode suspend buffer */
+	phy = group->normal_suspend_buf.phy;
+	err1 = kbase_mmu_update_csf_mcu_pages(kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), phy,
+					      nr_susp_pages, mem_flags, KBASE_MEM_GROUP_CSF_FW);
+
+	if (unlikely(err1)) {
+		dev_warn(kbdev->dev, "%s: Failed to update suspend buffer for csg_%d_%d_%d",
+			 __func__, group->kctx->tgid, group->kctx->id, group->handle);
+
+		/* Attempt a restore to default dummy for removing previous mapping */
+		if (prev_grp)
+			update_mapping_with_dummy_pages(
+				kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+		err = err1;
+		/* Marking the normal suspend buffer is not mapped (due to error) */
+		group->normal_suspend_buf.gpu_va = 0;
+	} else {
+		/* Marking the normal suspend buffer is actually mapped */
+		group->normal_suspend_buf.gpu_va =
+			GET_VPFN_VA(CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages));
+	}
+
+	/* Deal with queue uerio_pages */
+	err1 = csg_reg_update_on_csis(kbdev, group, prev_grp);
+	if (likely(!err1))
+		err = err1;
+
+	/* Reset the previous group's suspend buffers' GPU_VAs as it has lost its bound */
+	if (prev_grp) {
+		prev_grp->normal_suspend_buf.gpu_va = 0;
+		prev_grp->protected_suspend_buf.gpu_va = 0;
+		prev_grp->csg_reg = NULL;
+	}
+
+	return err;
+}
+
+/* Notify the group is placed on-slot, hence the bound csg_reg is active in use */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+						   struct kbase_queue_group *group)
+{
+	struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bounding",
+		      group->kctx->tgid, group->kctx->id, group->handle))
+		return;
+
+	/* By dropping out the csg_reg from the unused list, it becomes active and is tracked
+	 * by its bound group that is on-slot. The design is that, when this on-slot group is
+	 * moved to off-slot, the scheduler slot-clean up will add it back to the tail of the
+	 * unused list.
+	 */
+	if (!WARN_ON_ONCE(list_empty(&csg_reg->link)))
+		list_del_init(&csg_reg->link);
+}
+
+/* Notify the group is placed off-slot, hence the bound csg_reg is not in active use
+ * anymore. Existing bounding/mappings are left untouched. These would only be dealt with
+ * if the bound csg_reg is to be reused with another group.
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+						   struct kbase_queue_group *group)
+{
+	struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (WARN_ONCE(!csg_reg || csg_reg->grp != group, "Group_%d_%d_%d has no csg_reg bound",
+		      group->kctx->tgid, group->kctx->id, group->handle))
+		return;
+
+	/* By adding back the csg_reg to the unused list, it becomes available for another
+	 * group to break its existing binding and set up a new one.
+	 */
+	if (!list_empty(&csg_reg->link)) {
+		WARN_ONCE(group->csg_nr >= 0, "Group is assumed vacated from slot");
+		list_move_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+	} else
+		list_add_tail(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+/* Adding a new queue to an existing on-slot group */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+	struct kbase_queue_group *group = queue->group;
+	struct kbase_csg_shared_region *csg_reg;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	u64 vpfn;
+	int err;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (WARN_ONCE(!group || group->csg_nr < 0, "No bound group, or group is not on-slot"))
+		return -EIO;
+
+	csg_reg = get_group_bound_csg_reg(group);
+	if (WARN_ONCE(!csg_reg || !list_empty(&csg_reg->link),
+		      "No bound csg_reg, or in wrong state"))
+		return -EIO;
+
+	vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+	err = userio_pages_replace_phys(kbdev, vpfn, queue->phys);
+	if (likely(!err)) {
+		/* Mark the queue has been successfully mapped */
+		queue->user_io_gpu_va = GET_VPFN_VA(vpfn);
+	} else {
+		/* Mark the queue has no mapping on its phys[] */
+		queue->user_io_gpu_va = 0;
+		dev_dbg(kbdev->dev,
+			"%s: Error in mapping userio pages for queue-%d of csg_%d_%d_%d", __func__,
+			queue->csi_index, group->kctx->tgid, group->kctx->id, group->handle);
+
+		/* notify the error for the bound group */
+		if (notify_group_csg_reg_map_error(group))
+			err = -EIO;
+	}
+
+	return err;
+}
+
+/* Unmap a given queue's userio pages, when the queue is deleted */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue)
+{
+	struct kbase_queue_group *group;
+	struct kbase_csg_shared_region *csg_reg;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	u64 vpfn;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	/* The queue has no existing mapping, nothing to do */
+	if (!queue || !queue->user_io_gpu_va)
+		return;
+
+	group = queue->group;
+	if (WARN_ONCE(!group || !group->csg_reg, "Queue/Group has no bound region"))
+		return;
+
+	csg_reg = get_group_bound_csg_reg(group);
+
+	vpfn = CSG_REG_USERIO_VPFN(csg_reg->reg, queue->csi_index, nr_susp_pages);
+
+	WARN_ONCE(userio_pages_replace_phys(kbdev, vpfn, NULL),
+		  "Unexpected restoring to dummy map update error");
+	queue->user_io_gpu_va = 0;
+}
+
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+						struct kbase_queue_group *group)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	int err = 0, err1;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	if (WARN_ONCE(!csg_reg, "Update_pmode_map: the bound csg_reg can't be NULL"))
+		return -EINVAL;
+
+	/* If the pmode already mapped, nothing to do */
+	if (csg_reg->pmode_mapped)
+		return 0;
+
+	/* P-mode map not in place and the group has allocated P-mode pages, map it */
+	if (group->protected_suspend_buf.pma) {
+		unsigned long mem_flags = SUSP_PAGE_MAP_FLAGS;
+		struct tagged_addr *phy = shared_regs->pma_phys;
+		struct kbase_va_region *reg = csg_reg->reg;
+		u64 vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+		u32 i;
+
+		/* Populate the protected phys from pma to phy[] */
+		for (i = 0; i < nr_susp_pages; i++)
+			phy[i] = as_tagged(group->protected_suspend_buf.pma[i]->pa);
+
+		/* Add the P-mode suspend buffer mapping */
+		err = kbase_mmu_update_csf_mcu_pages(kbdev, vpfn, phy, nr_susp_pages, mem_flags,
+						     KBASE_MEM_GROUP_CSF_FW);
+
+		/* If error, restore to default dummpy */
+		if (unlikely(err)) {
+			err1 = update_mapping_with_dummy_pages(kbdev, vpfn, nr_susp_pages);
+			if (unlikely(err1))
+				dev_warn(
+					kbdev->dev,
+					"%s: Failed in recovering to P-mode dummy for csg_%d_%d_%d",
+					__func__, group->kctx->tgid, group->kctx->id,
+					group->handle);
+
+			csg_reg->pmode_mapped = false;
+		} else
+			csg_reg->pmode_mapped = true;
+	}
+
+	return err;
+}
+
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+						      struct kbase_queue_group *group)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	struct kbase_csg_shared_region *csg_reg = get_group_bound_csg_reg(group);
+	struct kbase_va_region *reg;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+	int err = 0;
+	u32 i;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	/* Nothing to do for clearing up if no bound csg_reg */
+	if (!csg_reg)
+		return;
+
+	reg = csg_reg->reg;
+	/* Restore mappings default dummy pages for any mapped pages */
+	if (csg_reg->pmode_mapped) {
+		err = update_mapping_with_dummy_pages(
+			kbdev, CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+		WARN_ONCE(unlikely(err), "Restore dummy failed for clearing pmod buffer mapping");
+
+		csg_reg->pmode_mapped = false;
+	}
+
+	if (group->normal_suspend_buf.gpu_va) {
+		err = update_mapping_with_dummy_pages(
+			kbdev, CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages), nr_susp_pages);
+		WARN_ONCE(err, "Restore dummy failed for clearing suspend buffer mapping");
+	}
+
+	/* Deal with queue uerio pages */
+	for (i = 0; i < nr_csis; i++)
+		kbase_csf_mcu_shared_drop_stopped_queue(kbdev, group->bound_queues[i]);
+
+	group->normal_suspend_buf.gpu_va = 0;
+	group->protected_suspend_buf.gpu_va = 0;
+
+	/* Break the binding */
+	group->csg_reg = NULL;
+	csg_reg->grp = NULL;
+
+	/* Put the csg_reg to the front of the unused list */
+	if (WARN_ON_ONCE(list_empty(&csg_reg->link)))
+		list_add(&csg_reg->link, &shared_regs->unused_csg_regs);
+	else
+		list_move(&csg_reg->link, &shared_regs->unused_csg_regs);
+}
+
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+					    struct kbase_queue_group *group)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	struct kbase_csg_shared_region *csg_reg;
+	int err;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	csg_reg = get_group_bound_csg_reg(group);
+	if (!csg_reg)
+		csg_reg = list_first_entry_or_null(&shared_regs->unused_csg_regs,
+						   struct kbase_csg_shared_region, link);
+
+	if (!WARN_ON_ONCE(!csg_reg)) {
+		struct kbase_queue_group *prev_grp = csg_reg->grp;
+
+		/* Deal with the previous binding and lazy unmap, i.e if the previous mapping not
+		 * the required one, unmap it.
+		 */
+		if (prev_grp == group) {
+			/* Update existing bindings, if there have been some changes */
+			err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
+			if (likely(!err))
+				err = csg_reg_update_on_csis(kbdev, group, NULL);
+		} else
+			err = group_bind_csg_reg(kbdev, group, csg_reg);
+	} else {
+		/* This should not have been possible if the code operates rightly */
+		dev_err(kbdev->dev, "%s: Unexpected NULL csg_reg for group %d of context %d_%d",
+			__func__, group->handle, group->kctx->tgid, group->kctx->id);
+		return -EIO;
+	}
+
+	if (likely(!err))
+		notify_group_csg_reg_map_done(group);
+	else
+		notify_group_csg_reg_map_error(group);
+
+	return err;
+}
+
+static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
+				   struct kbase_csg_shared_region *csg_reg)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+	const size_t nr_csg_reg_pages = 2 * (nr_susp_pages + nr_csis);
+	struct kbase_va_region *reg;
+	u64 vpfn;
+	int err, i;
+
+	INIT_LIST_HEAD(&csg_reg->link);
+	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+				      KBASE_REG_ZONE_MCU_SHARED);
+
+	if (!reg) {
+		dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
+			__func__, nr_csg_reg_pages);
+		return -ENOMEM;
+	}
+
+	/* Insert the region into rbtree, so it becomes ready to use */
+	mutex_lock(&kbdev->csf.reg_lock);
+	err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_csg_reg_pages, 1);
+	reg->flags &= ~KBASE_REG_FREE;
+	mutex_unlock(&kbdev->csf.reg_lock);
+	if (err) {
+		kfree(reg);
+		dev_err(kbdev->dev, "%s: Failed to add a region of %zu pages into rbtree", __func__,
+			nr_csg_reg_pages);
+		return err;
+	}
+
+	/* Initialize the mappings so MMU only need to update the the corresponding
+	 * mapped phy-pages at runtime.
+	 * Map the normal suspend buffer pages to the prepared dummy phys[].
+	 */
+	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+	err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+	if (unlikely(err))
+		goto fail_susp_map_fail;
+
+	/* Map the protected suspend buffer pages to the prepared dummy phys[] */
+	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+	err = insert_dummy_pages(kbdev, vpfn, nr_susp_pages);
+
+	if (unlikely(err))
+		goto fail_pmod_map_fail;
+
+	for (i = 0; i < nr_csis; i++) {
+		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+		err = insert_dummy_pages(kbdev, vpfn, KBASEP_NUM_CS_USER_IO_PAGES);
+
+		if (unlikely(err))
+			goto fail_userio_pages_map_fail;
+	}
+
+	/* Replace the previous NULL-valued field with the successully initialized reg */
+	csg_reg->reg = reg;
+
+	return 0;
+
+fail_userio_pages_map_fail:
+	while (i-- > 0) {
+		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+	}
+
+	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+				 nr_susp_pages, MCU_AS_NR, true);
+fail_pmod_map_fail:
+	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+				 nr_susp_pages, MCU_AS_NR, true);
+fail_susp_map_fail:
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_remove_va_region(kbdev, reg);
+	mutex_unlock(&kbdev->csf.reg_lock);
+	kfree(reg);
+
+	return err;
+}
+
+/* Note, this helper can only be called on scheduler shutdown */
+static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
+				    struct kbase_csg_shared_region *csg_reg)
+{
+	struct kbase_csf_mcu_shared_regions *shared_regs = &kbdev->csf.scheduler.mcu_regs_data;
+	struct kbase_va_region *reg = csg_reg->reg;
+	const u32 nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	const u32 nr_csis = kbdev->csf.global_iface.groups[0].stream_num;
+	u64 vpfn;
+	int i;
+
+	for (i = 0; i < nr_csis; i++) {
+		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
+		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+	}
+
+	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
+	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+				 nr_susp_pages, MCU_AS_NR, true);
+	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
+	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+				 nr_susp_pages, MCU_AS_NR, true);
+
+	mutex_lock(&kbdev->csf.reg_lock);
+	kbase_remove_va_region(kbdev, reg);
+	mutex_unlock(&kbdev->csf.reg_lock);
+	kfree(reg);
+}
+
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+	struct kbase_csg_shared_region *array_csg_regs;
+	const size_t nr_susp_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	const u32 nr_groups = kbdev->csf.global_iface.group_num;
+	const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+	const u32 nr_dummy_phys = MAX(nr_susp_pages, KBASEP_NUM_CS_USER_IO_PAGES);
+	u32 i;
+	int err;
+
+	shared_regs->userio_mem_rd_flags = get_userio_mmu_flags(kbdev);
+	INIT_LIST_HEAD(&shared_regs->unused_csg_regs);
+
+	shared_regs->dummy_phys =
+		kcalloc(nr_dummy_phys, sizeof(*shared_regs->dummy_phys), GFP_KERNEL);
+	if (!shared_regs->dummy_phys)
+		return -ENOMEM;
+
+	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
+				       &shared_regs->dummy_phys[0], false) <= 0)
+		return -ENOMEM;
+
+	shared_regs->dummy_phys_allocated = true;
+	set_page_meta_status_not_movable(shared_regs->dummy_phys[0]);
+
+	/* Replicate the allocated single shared_regs->dummy_phys[0] to the full array */
+	for (i = 1; i < nr_dummy_phys; i++)
+		shared_regs->dummy_phys[i] = shared_regs->dummy_phys[0];
+
+	shared_regs->pma_phys = kcalloc(nr_susp_pages, sizeof(*shared_regs->pma_phys), GFP_KERNEL);
+	if (!shared_regs->pma_phys)
+		return -ENOMEM;
+
+	array_csg_regs = kcalloc(nr_csg_regs, sizeof(*array_csg_regs), GFP_KERNEL);
+	if (!array_csg_regs)
+		return -ENOMEM;
+	shared_regs->array_csg_regs = array_csg_regs;
+
+	/* All fields in scheduler->mcu_regs_data except the shared_regs->array_csg_regs
+	 * are properly populated and ready to use. Now initialize the items in
+	 * shared_regs->array_csg_regs[]
+	 */
+	for (i = 0; i < nr_csg_regs; i++) {
+		err = shared_mcu_csg_reg_init(kbdev, &array_csg_regs[i]);
+		if (err)
+			return err;
+
+		list_add_tail(&array_csg_regs[i].link, &shared_regs->unused_csg_regs);
+	}
+
+	return 0;
+}
+
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	struct kbase_csf_mcu_shared_regions *shared_regs = &scheduler->mcu_regs_data;
+	struct kbase_csg_shared_region *array_csg_regs =
+		(struct kbase_csg_shared_region *)shared_regs->array_csg_regs;
+	const u32 nr_groups = kbdev->csf.global_iface.group_num;
+	const u32 nr_csg_regs = MCU_SHARED_REGS_PREALLOCATE_SCALE * nr_groups;
+
+	if (array_csg_regs) {
+		struct kbase_csg_shared_region *csg_reg;
+		u32 i, cnt_csg_regs = 0;
+
+		for (i = 0; i < nr_csg_regs; i++) {
+			csg_reg = &array_csg_regs[i];
+			/* There should not be any group mapping bindings */
+			WARN_ONCE(csg_reg->grp, "csg_reg has a bound group");
+
+			if (csg_reg->reg) {
+				shared_mcu_csg_reg_term(kbdev, csg_reg);
+				cnt_csg_regs++;
+			}
+		}
+
+		/* The nr_susp_regs counts should match the array_csg_regs' length */
+		list_for_each_entry(csg_reg, &shared_regs->unused_csg_regs, link)
+			cnt_csg_regs--;
+
+		WARN_ONCE(cnt_csg_regs, "Unmatched counts of susp_regs");
+		kfree(shared_regs->array_csg_regs);
+	}
+
+	if (shared_regs->dummy_phys_allocated) {
+		struct page *page = as_page(shared_regs->dummy_phys[0]);
+
+		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+	}
+
+	kfree(shared_regs->dummy_phys);
+	kfree(shared_regs->pma_phys);
+}
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h
new file mode 100644
index 000000000000..61943cbbf083
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_mcu_shared_reg.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_MCU_SHARED_REG_H_
+#define _KBASE_CSF_MCU_SHARED_REG_H_
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_active - Notify that the group is active on-slot with
+ *                                                 scheduling action. Essential runtime resources
+ *                                                 are bound with the group for it to run
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed into active on-slot running by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_active(struct kbase_device *kbdev,
+						   struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_set_group_csg_reg_unused - Notify that the group is placed off-slot with
+ *                                                 scheduling action. Some of bound runtime
+ *                                                 resources can be reallocated for others to use
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that is placed off-slot by the scheduler.
+ *
+ */
+void kbase_csf_mcu_shared_set_group_csg_reg_unused(struct kbase_device *kbdev,
+						   struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_group_update_pmode_map - Request to update the given group's protected
+ *                                             suspend buffer pages to be mapped for supporting
+ *                                             protected mode operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group for attempting a protected mode suspend buffer binding/mapping.
+ *
+ * Return: 0 for success, the group has a protected suspend buffer region mapped. Otherwise an
+ *         error code is returned.
+ */
+int kbase_csf_mcu_shared_group_update_pmode_map(struct kbase_device *kbdev,
+						struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_clear_evicted_group_csg_reg - Clear any bound regions/mappings as the
+ *                                                    given group is evicted out of the runtime
+ *                                                    operations.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the group that has been evicted out of set of operational groups.
+ *
+ * This function will taken away any of the bindings/mappings immediately so the resources
+ * are not tied up to the given group, which has been evicted out of scheduling action for
+ * termination.
+ */
+void kbase_csf_mcu_shared_clear_evicted_group_csg_reg(struct kbase_device *kbdev,
+						      struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_add_queue - Request to add a newly activated queue for a group to be
+ *                                  run on slot.
+ *
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @queue:     Pointer to the queue that requires some runtime resource to be bound for joining
+ *             others that are already running on-slot with their bound group.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_add_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_drop_stopped_queue - Request to drop a queue after it has been stopped
+ *                                           from its operational state from a group.
+ *
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @queue:     Pointer to the queue that has been stopped from operational state.
+ *
+ */
+void kbase_csf_mcu_shared_drop_stopped_queue(struct kbase_device *kbdev, struct kbase_queue *queue);
+
+/**
+ * kbase_csf_mcu_shared_group_bind_csg_reg - Bind some required runtime resources to the given
+ *                                           group for ready to run on-slot.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @group: Pointer to the queue group that requires the runtime resources.
+ *
+ * This function binds/maps the required suspend buffer pages and userio pages for the given
+ * group, readying it to run on-slot.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_mcu_shared_group_bind_csg_reg(struct kbase_device *kbdev,
+					    struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_init - Allocate and initialize the MCU shared regions data for
+ *                                       the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function allocate and initialize the MCU shared VA regions for runtime operations
+ * of the CSF scheduler.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_mcu_shared_regs_data_term - Terminate the allocated MCU shared regions data for
+ *                                       the given device.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function terminates the MCU shared VA regions allocated for runtime operations
+ * of the CSF scheduler.
+ */
+void kbase_csf_mcu_shared_regs_data_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_CSF_MCU_SHARED_REG_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
index 6dde56cb161a..82389e5bf2a3 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_registers.h
@@ -229,22 +229,44 @@
 #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
 #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
 
-#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
-#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
-#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
-#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
+#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */
+#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */
+#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */
+#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */
+
+/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */
+#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */
+#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */
+#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */
+#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */
+
 #define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
 #define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
 
 /* GLB_OUTPUT_BLOCK register offsets */
+#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */
+#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */
+#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */
+#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */
+
 #define GLB_ACK 0x0000 /* () Global acknowledge */
 #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
 #define GLB_HALT_STATUS 0x0010 /* () Global halt status */
 #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
 #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
-#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
+#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
 #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
 
+#ifdef CONFIG_MALI_CORESIGHT
+#define GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT 4
+#define GLB_DEBUG_REQ_FW_AS_WRITE_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_WRITE_SHIFT)
+#define GLB_DEBUG_REQ_FW_AS_READ_SHIFT 5
+#define GLB_DEBUG_REQ_FW_AS_READ_MASK (0x1 << GLB_DEBUG_REQ_FW_AS_READ_SHIFT)
+#define GLB_DEBUG_ARG_IN0 0x0FE0
+#define GLB_DEBUG_ARG_IN1 0x0FE4
+#define GLB_DEBUG_ARG_OUT0 0x0FE0
+#endif /* CONFIG_MALI_CORESIGHT */
+
 /* USER register offsets */
 #define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
 
@@ -304,10 +326,17 @@
 #define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
 #define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
 #define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
-	(((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+	(((reg_val) & CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
 #define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
 	(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) |  \
 	 (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
+#define CS_REQ_IDLE_SHARED_SB_DEC_SHIFT 12
+#define CS_REQ_IDLE_SHARED_SB_DEC_MASK (0x1 << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_GET(reg_val) \
+	(((reg_val) & CS_REQ_IDLE_SHARED_SB_DEC_MASK) >> CS_REQ_IDLE_SHARED_SB_DEC_SHIFT)
+#define CS_REQ_IDLE_SHARED_SB_DEC_REQ_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_SHARED_SB_DEC_MASK) |  \
+	 (((value) << CS_REQ_IDLE_SHARED_SB_DEC_SHIFT) & CS_REQ_IDLE_SHARED_SB_DEC_MASK))
 #define CS_REQ_TILER_OOM_SHIFT 26
 #define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
 #define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
@@ -582,6 +611,13 @@
 #define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
 	(((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) |  \
 	 (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT 30
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(reg_val)                                                 \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SIZE_SET(reg_val, value)                                          \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK) |                                       \
+	 (((value) << CS_STATUS_WAIT_SYNC_WAIT_SIZE_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_SIZE_MASK))
 #define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
 #define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
 #define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
@@ -1590,4 +1626,43 @@
 	 ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) &  \
 	  GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
 
+/* GLB_DEBUG_REQ register */
+#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val)                                                       \
+	(((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value)                                                \
+	(((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) |                                             \
+	 (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val)                                                        \
+	(((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value)                                                 \
+	(((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) |                                              \
+	 (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
+
+/* GLB_DEBUG_ACK register */
+#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val)                                                       \
+	(((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value)                                                \
+	(((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) |                                             \
+	 (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val)                                                        \
+	(((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value)                                                 \
+	(((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) |                                              \
+	 (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
+
+/* RUN_MODE values */
+#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
+#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
+/* End of RUN_MODE values */
+
 #endif /* _KBASE_CSF_REGISTERS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
index b3cdef7dae52..135d3b01a2ff 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c
@@ -32,6 +32,7 @@
 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
 #include "mali_kbase_csf_tiler_heap_reclaim.h"
+#include "mali_kbase_csf_mcu_shared_reg.h"
 
 /* Value to indicate that a queue group is not groups_to_schedule list */
 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -57,6 +58,9 @@
 /* Time to wait for completion of PING req before considering MCU as hung */
 #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
 
+/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
+#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
+
 static int scheduler_group_schedule(struct kbase_queue_group *group);
 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
 static
@@ -561,7 +565,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 			 * updated whilst gpu_idle_worker() is executing.
 			 */
 			scheduler->fast_gpu_idle_handling =
-				(kbdev->csf.gpu_idle_hysteresis_ms == 0) ||
+				(kbdev->csf.gpu_idle_hysteresis_us == 0) ||
 				!kbase_csf_scheduler_all_csgs_idle(kbdev);
 
 			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
@@ -1458,6 +1462,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
 			err = sched_halt_stream(queue);
 
 		unassign_user_doorbell_from_queue(kbdev, queue);
+		kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
 	}
 
 	mutex_unlock(&kbdev->csf.scheduler.lock);
@@ -1575,17 +1580,15 @@ static void program_cs(struct kbase_device *kbdev,
 	kbase_csf_firmware_cs_input(stream, CS_SIZE,
 				    queue->size);
 
-	user_input = (queue->reg->start_pfn << PAGE_SHIFT);
-	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
-				    user_input & 0xFFFFFFFF);
-	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
-				    user_input >> 32);
+	user_input = queue->user_io_gpu_va;
+	WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
 
-	user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
-	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
-				    user_output & 0xFFFFFFFF);
-	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
-				    user_output >> 32);
+	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
+	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
+
+	user_output = user_input + PAGE_SIZE;
+	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
+	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
 
 	kbase_csf_firmware_cs_input(stream, CS_CONFIG,
 		(queue->doorbell_nr << 8) | (queue->priority & 0xF));
@@ -1616,8 +1619,10 @@ static void program_cs(struct kbase_device *kbdev,
 	 * or protected mode switch.
 	 */
 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
-			CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
-			CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
+					 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+						 CS_REQ_IDLE_SHARED_SB_DEC_MASK,
+					 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
+						 CS_REQ_IDLE_SHARED_SB_DEC_MASK);
 
 	/* Set state to START/STOP */
 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
@@ -1632,6 +1637,20 @@ static void program_cs(struct kbase_device *kbdev,
 	update_hw_active(queue, true);
 }
 
+static int onslot_csg_add_new_queue(struct kbase_queue *queue)
+{
+	struct kbase_device *kbdev = queue->kctx->kbdev;
+	int err;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
+	if (!err)
+		program_cs(kbdev, queue, true);
+
+	return err;
+}
+
 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 {
 	struct kbase_queue_group *group = queue->group;
@@ -1687,8 +1706,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 					 * user door-bell on such a case.
 					 */
 					kbase_csf_ring_cs_user_doorbell(kbdev, queue);
-				} else
-					program_cs(kbdev, queue, true);
+				} else {
+					err = onslot_csg_add_new_queue(queue);
+					/* For an on slot CSG, the only error in adding a new
+					 * queue to run is that the scheduler could not map
+					 * the required userio pages due to likely some resource
+					 * issues. In such a case, and if the group is yet
+					 * to enter its fatal error state, we return a -EBUSY
+					 * to the submitter for another kick. The queue itself
+					 * has yet to be programmed hence needs to remain its
+					 * previous (disabled) state. If the error persists,
+					 * the group will eventually reports a fatal error by
+					 * the group's error reporting mechanism, when the MCU
+					 * shared region map retry limit of the group is
+					 * exceeded. For such a case, the expected error value
+					 * is -EIO.
+					 */
+					if (unlikely(err)) {
+						queue->enabled = cs_enabled;
+						mutex_unlock(&kbdev->csf.scheduler.lock);
+						return (err != -EIO) ? -EBUSY : err;
+					}
+				}
 			}
 			queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
 					   msecs_to_jiffies(kbase_get_timeout_ms(
@@ -1899,9 +1938,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 	struct kbase_vmap_struct *mapping;
 	bool updated = false;
 	u32 *sync_ptr;
+	u32 sync_wait_size;
+	u32 sync_wait_align_mask;
 	u32 sync_wait_cond;
 	u32 sync_current_val;
 	struct kbase_device *kbdev;
+	bool sync_wait_align_valid = false;
 	bool sync_wait_cond_valid = false;
 
 	if (WARN_ON(!queue))
@@ -1911,6 +1953,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
+	sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
+	sync_wait_align_mask =
+		(sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
+	sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
+	if (!sync_wait_align_valid) {
+		dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
+			queue->sync_ptr);
+		goto out;
+	}
+
 	sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
 					&mapping);
 
@@ -1995,7 +2047,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 	KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
 				   queue, status);
 
-	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
+	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
 		queue->status_wait = status;
 		queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
 			CS_STATUS_WAIT_SYNC_POINTER_LO);
@@ -2011,7 +2063,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 			kbase_csf_firmware_cs_output(stream,
 						     CS_STATUS_BLOCKED_REASON));
 
-		if (!evaluate_sync_update(queue)) {
+		if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
+		    !evaluate_sync_update(queue)) {
 			is_waiting = true;
 		} else {
 			/* Sync object already got updated & met the condition
@@ -2305,7 +2358,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
 	insert_group_to_idle_wait(group);
 }
 
-static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
 {
 	struct kbase_device *kbdev = group->kctx->kbdev;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -2442,9 +2495,14 @@ static void save_csg_slot(struct kbase_queue_group *group)
 			if (!queue || !queue->enabled)
 				continue;
 
-			if (save_slot_cs(ginfo, queue))
-				sync_wait = true;
-			else {
+			if (save_slot_cs(ginfo, queue)) {
+				/* sync_wait is only true if the queue is blocked on
+				 * a CQS and not a scoreboard.
+				 */
+				if (queue->blocked_reason !=
+				    CS_STATUS_BLOCKED_ON_SB_WAIT)
+					sync_wait = true;
+			} else {
 				/* Need to confirm if ringbuffer of the GPU
 				 * queue is empty or not. A race can arise
 				 * between the flush of GPU queue and suspend
@@ -2558,6 +2616,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
 	KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
 		kbdev->gpu_props.props.raw_props.gpu_id, slot);
 
+	/* Notify the group is off-slot and the csg_reg might be available for
+	 * resue with other groups in a 'lazy unbinding' style.
+	 */
+	kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+
 	return as_fault;
 }
 
@@ -2641,8 +2704,8 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	u32 state;
 	int i;
 	unsigned long flags;
-	const u64 normal_suspend_buf =
-		group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
+	u64 normal_suspend_buf;
+	u64 protm_suspend_buf;
 	struct kbase_csf_csg_slot *csg_slot =
 		&kbdev->csf.scheduler.csg_slots[slot];
 
@@ -2654,6 +2717,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 
 	WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
 
+	if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
+		dev_warn(kbdev->dev,
+			 "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
+			 group->handle, group->kctx->tgid, kctx->id, slot);
+		kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
+		return;
+	}
+
+	/* The suspend buf has already been mapped through binding to csg_reg */
+	normal_suspend_buf = group->normal_suspend_buf.gpu_va;
+	protm_suspend_buf = group->protected_suspend_buf.gpu_va;
+	WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
+
 	ginfo = &global_iface->groups[slot];
 
 	/* Pick an available address space for this context */
@@ -2666,6 +2742,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
 		dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
 			 group->handle, kctx->tgid, kctx->id, slot);
+		kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
 		return;
 	}
 
@@ -2716,15 +2793,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
 			normal_suspend_buf >> 32);
 
-	if (group->protected_suspend_buf.reg) {
-		const u64 protm_suspend_buf =
-			group->protected_suspend_buf.reg->start_pfn <<
-				PAGE_SHIFT;
-		kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
-			protm_suspend_buf & U32_MAX);
-		kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
-			protm_suspend_buf >> 32);
-	}
+	/* Note, we program the P-mode buffer pointer here, but actual runtime
+	 * enter into pmode execution is controlled by the P-mode phy pages are
+	 * allocated and mapped with the bound csg_reg, which has a specific flag
+	 * for indicating this P-mode runnable condition before a group is
+	 * granted its p-mode section entry. Without a P-mode entry, the buffer
+	 * pointed is not going to be accessed at all.
+	 */
+	kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
+	kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
 
 	if (group->dvs_buf) {
 		kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
@@ -2777,6 +2854,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 
 	/* Programming a slot consumes a group from scanout */
 	update_offslot_non_idle_cnt_for_onslot_grp(group);
+
+	/* Notify the group's bound csg_reg is now in active use */
+	kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
 }
 
 static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -2797,7 +2877,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
 }
 
 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
-			      bool update_non_idle_offslot_grps_cnt)
+			      bool update_non_idle_offslot_grps_cnt_from_run_state)
 {
 	struct kbase_context *kctx = group->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -2808,7 +2888,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 	if (queue_group_scheduled_locked(group)) {
 		u32 i;
 
-		if (update_non_idle_offslot_grps_cnt &&
+		if (update_non_idle_offslot_grps_cnt_from_run_state &&
 		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
 		     group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
 			int new_val = atomic_dec_return(
@@ -2823,8 +2903,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 		}
 
 		if (group->prepared_seq_num !=
-				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
+				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
+			if (!update_non_idle_offslot_grps_cnt_from_run_state)
+				update_offslot_non_idle_cnt(group);
 			remove_scheduled_group(kbdev, group);
+		}
 
 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
 			remove_group_from_idle_wait(group);
@@ -2851,6 +2934,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 	}
 
 	kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
+
+	/* Clear all the bound shared regions and unmap any in-place MMU maps */
+	kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
 }
 
 static int term_group_sync(struct kbase_queue_group *group)
@@ -3230,8 +3316,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
 					scheduler->remaining_tick_slots--;
 				}
 			} else {
-				update_offslot_non_idle_cnt_for_faulty_grp(
-					group);
+				update_offslot_non_idle_cnt(group);
 				remove_scheduled_group(kbdev, group);
 			}
 		}
@@ -3421,8 +3506,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				 */
 				clear_bit(i, slot_mask);
 				set_bit(i, scheduler->csgs_events_enable_mask);
-				update_offslot_non_idle_cnt_for_onslot_grp(
-					group);
 			}
 
 			suspend_wait_failed = true;
@@ -3882,11 +3965,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				struct kbase_queue_group *const input_grp)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
 	unsigned long flags;
 	bool protm_in_use;
 
 	lockdep_assert_held(&scheduler->lock);
 
+	/* Return early if the physical pages have not been allocated yet */
+	if (unlikely(!sbuf->pma))
+		return;
+
 	/* This lock is taken to prevent the issuing of MMU command during the
 	 * transition to protected mode. This helps avoid the scenario where the
 	 * entry to protected mode happens with a memory region being locked and
@@ -3945,6 +4033,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
 							 0u);
 
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+
+				/* Coresight must be disabled before entering protected mode. */
+				kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
+
+				spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+
 				kbase_csf_enter_protected_mode(kbdev);
 				/* Set the pending protm seq number to the next one */
 				protm_enter_set_next_pending_seq(kbdev);
@@ -4057,8 +4154,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
 
 			if (!kctx_as_enabled(group->kctx) || group->faulted) {
 				/* Drop the head group and continue */
-				update_offslot_non_idle_cnt_for_faulty_grp(
-					group);
+				update_offslot_non_idle_cnt(group);
 				remove_scheduled_group(kbdev, group);
 				continue;
 			}
@@ -4337,6 +4433,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 			set_bit(i, csg_bitmap);
 		} else {
 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+						group->run_state);
 		}
 	}
 
@@ -5170,16 +5268,12 @@ redo_local_tock:
 	 * queue jobs.
 	 */
 	if (protm_grp && scheduler->top_grp == protm_grp) {
-		int new_val;
-
 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
 			protm_grp->handle);
-		new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
-		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
-					 new_val);
-
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
+		update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
+		remove_scheduled_group(kbdev, protm_grp);
 		scheduler_check_pmode_progress(kbdev);
 	} else if (scheduler->top_grp) {
 		if (protm_grp)
@@ -5993,8 +6087,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 
 	mutex_lock(&scheduler->lock);
 
-	if (group->run_state == KBASE_CSF_GROUP_IDLE)
+	if (group->run_state == KBASE_CSF_GROUP_IDLE) {
 		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+					group->run_state);
+	}
 	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
 		scheduler_group_check_protm_enter(kbdev, group);
@@ -6262,6 +6359,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	int priority;
 	int err;
 
+	kbase_ctx_sched_init_ctx(kctx);
+
 	for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
 	     ++priority) {
 		INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
@@ -6278,7 +6377,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	if (!kctx->csf.sched.sync_update_wq) {
 		dev_err(kctx->kbdev->dev,
 			"Failed to initialize scheduler context workqueue");
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto alloc_wq_failed;
 	}
 
 	INIT_WORK(&kctx->csf.sched.sync_update_work,
@@ -6291,10 +6391,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	if (err) {
 		dev_err(kctx->kbdev->dev,
 			"Failed to register a sync update callback");
-		destroy_workqueue(kctx->csf.sched.sync_update_wq);
+		goto event_wait_add_failed;
 	}
 
 	return err;
+
+event_wait_add_failed:
+	destroy_workqueue(kctx->csf.sched.sync_update_wq);
+alloc_wq_failed:
+	kbase_ctx_sched_remove_ctx(kctx);
+	return err;
 }
 
 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
@@ -6302,6 +6408,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
 	kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
 	cancel_work_sync(&kctx->csf.sched.sync_update_work);
 	destroy_workqueue(kctx->csf.sched.sync_update_wq);
+
+	kbase_ctx_sched_remove_ctx(kctx);
 }
 
 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -6320,7 +6428,7 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 	}
 
-	return 0;
+	return kbase_csf_mcu_shared_regs_data_init(kbdev);
 }
 
 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
@@ -6420,6 +6528,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 	}
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
 				 kbase_csf_scheduler_get_nr_active_csgs(kbdev));
+	/* Terminating the MCU shared regions, following the release of slots */
+	kbase_csf_mcu_shared_regs_data_term(kbdev);
 }
 
 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c
new file mode 100644
index 000000000000..a5e0ab5eaf17
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase_csf_sync_debugfs.h"
+#include "mali_kbase_csf_csg_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+#include "mali_kbase_sync.h"
+#endif
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through debugfs.
+ */
+enum debugfs_gpu_sync_type {
+	DEBUGFS_GPU_SYNC_WAIT,
+	DEBUGFS_GPU_SYNC_SET,
+	DEBUGFS_GPU_SYNC_ADD,
+	NUM_DEBUGFS_GPU_SYNC_TYPES
+};
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
+ *
+ * @kctx:     The context of the queue.
+ * @obj_addr: Pointer to the CQS live 32-bit value.
+ * @live_val: Pointer to the u32 that will be set to the CQS object's current, live
+ *            value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u32(struct kbase_context *kctx, u64 obj_addr,
+					       u32 *live_val)
+{
+	struct kbase_vmap_struct *mapping;
+	u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+	if (!cpu_ptr)
+		return -1;
+
+	*live_val = *cpu_ptr;
+	kbase_phy_alloc_mapping_put(kctx, mapping);
+	return 0;
+}
+
+/**
+ * kbasep_csf_debugfs_get_cqs_live_u64() - Obtain live (u64) value for a CQS object.
+ *
+ * @kctx:     The context of the queue.
+ * @obj_addr: Pointer to the CQS live value (32 or 64-bit).
+ * @live_val: Pointer to the u64 that will be set to the CQS object's current, live
+ *            value.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 obj_addr,
+					       u64 *live_val)
+{
+	struct kbase_vmap_struct *mapping;
+	u64 *cpu_ptr = (u64 *)kbase_phy_alloc_mapping_get(kctx, obj_addr, &mapping);
+
+	if (!cpu_ptr)
+		return -1;
+
+	*live_val = *cpu_ptr;
+	kbase_phy_alloc_mapping_put(kctx, mapping);
+	return 0;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_fence_wait_or_signal() - Print details of a CSF SYNC Fence Wait
+ *                                                     or Fence Signal command, contained in a
+ *                                                     KCPU queue.
+ *
+ * @file:     The seq_file for printing to.
+ * @cmd:      The KCPU Command to be printed.
+ * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
+ */
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
+							    struct kbase_kcpu_command *cmd,
+							    const char *cmd_name)
+{
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence *fence = NULL;
+#else
+	struct dma_fence *fence = NULL;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+
+	struct kbase_sync_fence_info info;
+	const char *timeline_name = NULL;
+	bool is_signaled = false;
+
+	fence = cmd->info.fence.fence;
+	if (WARN_ON(!fence))
+		return;
+
+	kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
+	timeline_name = fence->ops->get_timeline_name(fence);
+	is_signaled = info.status > 0;
+
+	seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
+		   is_signaled);
+
+	/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
+	seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+		   timeline_name, fence->context, (u64)fence->seqno);
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
+ *                                         contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd:  The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
+						struct kbase_kcpu_command *cmd)
+{
+	struct kbase_context *kctx = file->private;
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+		struct base_cqs_wait_info *cqs_obj = &cmd->info.cqs_wait.objs[i];
+
+		u32 live_val;
+		int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+		bool live_val_valid = (ret >= 0);
+
+		seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+		if (live_val_valid)
+			seq_printf(file, "0x%.16llx", (u64)live_val);
+		else
+			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+		seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
+	}
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
+ *                                        Set command, contained in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd:  The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
+					       struct kbase_kcpu_command *cmd)
+{
+	struct kbase_context *kctx = file->private;
+	size_t i;
+
+	for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
+		struct base_cqs_set *cqs_obj = &cmd->info.cqs_set.objs[i];
+
+		u32 live_val;
+		int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
+		bool live_val_valid = (ret >= 0);
+
+		seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+
+		if (live_val_valid)
+			seq_printf(file, "0x%.16llx", (u64)live_val);
+		else
+			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+		seq_printf(file, " | op:add arg_value:0x%.8x", 1);
+	}
+}
+
+/**
+ * kbasep_csf_sync_get_wait_op_name() - Print the name of a CQS Wait Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_wait_op_name(basep_cqs_wait_operation_op op)
+{
+	const char *string;
+
+	switch (op) {
+	case BASEP_CQS_WAIT_OPERATION_LE:
+		string = "le";
+		break;
+	case BASEP_CQS_WAIT_OPERATION_GT:
+		string = "gt";
+		break;
+	default:
+		string = "??";
+		break;
+	}
+	return string;
+}
+
+/**
+ * kbasep_csf_sync_get_set_op_name() - Print the name of a CQS Set Operation.
+ *
+ * @op: The numerical value of operation.
+ *
+ * Return: const static pointer to the command name, or '??' if unknown.
+ */
+static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op)
+{
+	const char *string;
+
+	switch (op) {
+	case BASEP_CQS_SET_OPERATION_ADD:
+		string = "add";
+		break;
+	case BASEP_CQS_SET_OPERATION_SET:
+		string = "set";
+		break;
+	default:
+		string = "???";
+		break;
+	}
+	return string;
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_wait_op() - Print details of a CSF SYNC CQS
+ *                                            Wait Operation command, contained
+ *                                            in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd:  The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
+						   struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+	struct kbase_context *kctx = file->private;
+
+	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
+		struct base_cqs_wait_operation_info *wait_op =
+			&cmd->info.cqs_wait_operation.objs[i];
+		const char *op_name = kbasep_csf_sync_get_wait_op_name(wait_op->operation);
+
+		u64 live_val;
+		int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, wait_op->addr, &live_val);
+
+		bool live_val_valid = (ret >= 0);
+
+		seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+
+		if (live_val_valid)
+			seq_printf(file, "0x%.16llx", live_val);
+		else
+			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+		seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+	}
+}
+
+/**
+ * kbasep_csf_sync_print_kcpu_cqs_set_op() - Print details of a CSF SYNC CQS
+ *                                           Set Operation command, contained
+ *                                           in a KCPU queue.
+ *
+ * @file: The seq_file for printing to.
+ * @cmd:  The KCPU Command to be printed.
+ */
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
+						  struct kbase_kcpu_command *cmd)
+{
+	size_t i;
+	struct kbase_context *kctx = file->private;
+
+	for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+		struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
+		const char *op_name = kbasep_csf_sync_get_set_op_name(
+			(basep_cqs_set_operation_op)set_op->operation);
+
+		u64 live_val;
+		int ret = kbasep_csf_debugfs_get_cqs_live_u64(kctx, set_op->addr, &live_val);
+
+		bool live_val_valid = (ret >= 0);
+
+		seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+
+		if (live_val_valid)
+			seq_printf(file, "0x%.16llx", live_val);
+		else
+			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+
+		seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+	}
+}
+
+/**
+ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
+ *
+ * @file:  The seq_file to print to.
+ * @queue: Pointer to the KCPU queue.
+ */
+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
+						     struct kbase_kcpu_command_queue *queue)
+{
+	char started_or_pending;
+	struct kbase_kcpu_command *cmd;
+	struct kbase_context *kctx = file->private;
+	size_t i;
+
+	if (WARN_ON(!queue))
+		return;
+
+	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+	mutex_lock(&queue->lock);
+
+	for (i = 0; i != queue->num_pending_cmds; ++i) {
+		started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
+		seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
+			   started_or_pending);
+
+		cmd = &queue->commands[queue->start_offset + i];
+		switch (cmd->type) {
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+		case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
+			break;
+		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
+			break;
+#endif
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
+			kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET:
+			kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
+			kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
+			break;
+		case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
+			kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
+			break;
+		default:
+			seq_puts(file, ", U, Unknown blocking command");
+			break;
+		}
+
+		seq_puts(file, "\n");
+	}
+
+	mutex_unlock(&queue->lock);
+}
+
+/**
+ * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
+{
+	struct kbase_context *kctx = file->private;
+	unsigned long queue_idx;
+
+	mutex_lock(&kctx->csf.kcpu_queues.lock);
+	seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
+
+	queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
+
+	while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
+		kbasep_csf_sync_kcpu_debugfs_print_queue(file,
+							 kctx->csf.kcpu_queues.array[queue_idx]);
+
+		queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
+					  queue_idx + 1);
+	}
+
+	mutex_unlock(&kctx->csf.kcpu_queues.lock);
+	return 0;
+}
+
+/**
+ * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
+ *                                         from a MOVE instruction.
+ *
+ * @move_cmd:        Raw MOVE instruction.
+ * @sync_addr_reg:   Register identifier from SYNC_* instruction.
+ * @compare_val_reg: Register identifier from SYNC_* instruction.
+ * @sync_val:        Pointer to store CQS object address for sync operation.
+ * @compare_val:     Pointer to store compare value for sync operation.
+ *
+ * Return: True if value is obtained by checking for correct register identifier,
+ * or false otherwise.
+ */
+static bool kbasep_csf_get_move_immediate_value(u64 move_cmd, u64 sync_addr_reg,
+						u64 compare_val_reg, u64 *sync_val,
+						u64 *compare_val)
+{
+	u64 imm_mask;
+
+	/* Verify MOVE instruction and get immediate mask */
+	if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE32_OPCODE)
+		imm_mask = MOVE32_IMM_MASK;
+	else if (INSTR_OPCODE_GET(move_cmd) == GPU_CSF_MOVE_OPCODE)
+		imm_mask = MOVE_IMM_MASK;
+	else
+		/* Error return */
+		return false;
+
+	/* Verify value from MOVE instruction and assign to variable */
+	if (sync_addr_reg == MOVE_DEST_GET(move_cmd))
+		*sync_val = move_cmd & imm_mask;
+	else if (compare_val_reg == MOVE_DEST_GET(move_cmd))
+		*compare_val = move_cmd & imm_mask;
+	else
+		/* Error return */
+		return false;
+
+	return true;
+}
+
+/** kbasep_csf_read_ringbuffer_value() - Reads a u64 from the ringbuffer at a provided
+ *                                       offset.
+ *
+ * @queue:            Pointer to the queue.
+ * @ringbuff_offset:  Ringbuffer offset.
+ *
+ * Return: the u64 in the ringbuffer at the desired offset.
+ */
+static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringbuff_offset)
+{
+	u64 page_off = ringbuff_offset >> PAGE_SHIFT;
+	u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
+	struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
+	u64 *ringbuffer = kmap_atomic(page);
+	u64 value = ringbuffer[offset_within_page / sizeof(u64)];
+
+	kunmap_atomic(ringbuffer);
+	return value;
+}
+
+/**
+ * kbasep_csf_print_gpu_sync_op() - Print sync operation info for given sync command.
+ *
+ * @file:             Pointer to debugfs seq_file file struct for writing output.
+ * @kctx:             Pointer to kbase context.
+ * @queue:            Pointer to the GPU command queue.
+ * @ringbuff_offset:  Offset to index the ring buffer with, for the given sync command.
+ *                    (Useful for finding preceding MOVE commands)
+ * @sync_cmd:         Entire u64 of the sync command, which has both sync address and
+ *                    comparison-value encoded in it.
+ * @type:             Type of GPU sync command (e.g. SYNC_SET, SYNC_ADD, SYNC_WAIT).
+ * @is_64bit:         Bool to indicate if operation is 64 bit (true) or 32 bit (false).
+ * @follows_wait:     Bool to indicate if the operation follows at least one wait
+ *                    operation. Used to determine whether it's pending or started.
+ */
+static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_context *kctx,
+					 struct kbase_queue *queue, u32 ringbuff_offset,
+					 u64 sync_cmd, enum debugfs_gpu_sync_type type,
+					 bool is_64bit, bool follows_wait)
+{
+	u64 sync_addr = 0, compare_val = 0, live_val = 0;
+	u64 move_cmd;
+	u8 sync_addr_reg, compare_val_reg, wait_condition = 0;
+	int err;
+
+	static const char *const gpu_sync_type_name[] = { "SYNC_WAIT", "SYNC_SET", "SYNC_ADD" };
+	static const char *const gpu_sync_type_op[] = {
+		"wait", /* This should never be printed, only included to simplify indexing */
+		"set", "add"
+	};
+
+	if (type >= NUM_DEBUGFS_GPU_SYNC_TYPES) {
+		dev_warn(kctx->kbdev->dev, "Expected GPU queue sync type is unknown!");
+		return;
+	}
+
+	/* We expect there to be at least 2 preceding MOVE instructions, and
+	 * Base will always arrange for the 2 MOVE + SYNC instructions to be
+	 * contiguously located, and is therefore never expected to be wrapped
+	 * around the ringbuffer boundary.
+	 */
+	if (unlikely(ringbuff_offset < (2 * sizeof(u64)))) {
+		dev_warn(kctx->kbdev->dev,
+			 "Unexpected wraparound detected between %s & MOVE instruction",
+			 gpu_sync_type_name[type]);
+		return;
+	}
+
+	/* 1. Get Register identifiers from SYNC_* instruction */
+	sync_addr_reg = SYNC_SRC0_GET(sync_cmd);
+	compare_val_reg = SYNC_SRC1_GET(sync_cmd);
+
+	/* 2. Get values from first MOVE command */
+	ringbuff_offset -= sizeof(u64);
+	move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+	if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+						 &sync_addr, &compare_val))
+		return;
+
+	/* 3. Get values from next MOVE command */
+	ringbuff_offset -= sizeof(u64);
+	move_cmd = kbasep_csf_read_ringbuffer_value(queue, ringbuff_offset);
+	if (!kbasep_csf_get_move_immediate_value(move_cmd, sync_addr_reg, compare_val_reg,
+						 &sync_addr, &compare_val))
+		return;
+
+	/* 4. Get CQS object value */
+	if (is_64bit)
+		err = kbasep_csf_debugfs_get_cqs_live_u64(kctx, sync_addr, &live_val);
+	else
+		err = kbasep_csf_debugfs_get_cqs_live_u32(kctx, sync_addr, (u32 *)(&live_val));
+
+	if (err)
+		return;
+
+	/* 5. Print info */
+	seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
+		   queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
+		   gpu_sync_type_name[type]);
+
+	if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
+		seq_puts(file, "slot:-");
+	else
+		seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
+
+	seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+
+	if (type == DEBUGFS_GPU_SYNC_WAIT) {
+		wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
+		seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+	} else
+		seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
+
+	seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
+}
+
+/**
+ * kbasep_csf_dump_active_queue_sync_info() - Print GPU command queue sync information.
+ *
+ * @file:  seq_file for printing to.
+ * @queue: Address of a GPU command queue to examine.
+ *
+ * This function will iterate through each command in the ring buffer of the given GPU queue from
+ * CS_EXTRACT, and if is a SYNC_* instruction it will attempt to decode the sync operation and
+ * print relevant information to the debugfs file.
+ * This function will stop iterating once the CS_INSERT address is reached by the cursor (i.e.
+ * when there are no more commands to view) or a number of consumed GPU CALL commands have
+ * been observed.
+ */
+static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
+{
+	struct kbase_context *kctx;
+	u32 *addr;
+	u64 cs_extract, cs_insert, instr, cursor;
+	bool follows_wait = false;
+	int nr_calls = 0;
+
+	if (!queue)
+		return;
+
+	kctx = queue->kctx;
+
+	addr = (u32 *)queue->user_io_addr;
+	cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
+
+	addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+	cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
+
+	cursor = cs_extract;
+
+	if (!is_power_of_2(queue->size)) {
+		dev_warn(kctx->kbdev->dev, "GPU queue %u size of %u not a power of 2",
+			 queue->csi_index, queue->size);
+		return;
+	}
+
+	while ((cursor < cs_insert) && (nr_calls < MAX_NR_GPU_CALLS)) {
+		bool instr_is_64_bit = false;
+		/* Calculate offset into ringbuffer from the absolute cursor,
+		 * by finding the remainder of the cursor divided by the
+		 * ringbuffer size. The ringbuffer size is guaranteed to be
+		 * a power of 2, so the remainder can be calculated without an
+		 * explicit modulo. queue->size - 1 is the ringbuffer mask.
+		 */
+		u32 cursor_ringbuff_offset = (u32)(cursor & (queue->size - 1));
+
+		/* Find instruction that cursor is currently on */
+		instr = kbasep_csf_read_ringbuffer_value(queue, cursor_ringbuff_offset);
+
+		switch (INSTR_OPCODE_GET(instr)) {
+		case GPU_CSF_SYNC_ADD64_OPCODE:
+		case GPU_CSF_SYNC_SET64_OPCODE:
+		case GPU_CSF_SYNC_WAIT64_OPCODE:
+			instr_is_64_bit = true;
+		default:
+			break;
+		}
+
+		switch (INSTR_OPCODE_GET(instr)) {
+		case GPU_CSF_SYNC_ADD_OPCODE:
+		case GPU_CSF_SYNC_ADD64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+						     instr, DEBUGFS_GPU_SYNC_ADD, instr_is_64_bit,
+						     follows_wait);
+			break;
+		case GPU_CSF_SYNC_SET_OPCODE:
+		case GPU_CSF_SYNC_SET64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+						     instr, DEBUGFS_GPU_SYNC_SET, instr_is_64_bit,
+						     follows_wait);
+			break;
+		case GPU_CSF_SYNC_WAIT_OPCODE:
+		case GPU_CSF_SYNC_WAIT64_OPCODE:
+			kbasep_csf_print_gpu_sync_op(file, kctx, queue, cursor_ringbuff_offset,
+						     instr, DEBUGFS_GPU_SYNC_WAIT, instr_is_64_bit,
+						     follows_wait);
+			follows_wait = true; /* Future commands will follow at least one wait */
+			break;
+		case GPU_CSF_CALL_OPCODE:
+			nr_calls++;
+			/* Fallthrough */
+		default:
+			/* Unrecognized command, skip past it */
+			break;
+		}
+
+		cursor += sizeof(u64);
+	}
+}
+
+/**
+ * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
+ *                                             the provided queue group.
+ *
+ * @file:  seq_file for printing to.
+ * @group: Address of a GPU command group to iterate through.
+ *
+ * This function will iterate through each queue in the provided GPU queue group and
+ * print its SYNC related commands.
+ */
+static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
+						    struct kbase_queue_group *const group)
+{
+	struct kbase_context *kctx = file->private;
+	unsigned int i;
+
+	seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+		   group->csg_nr, kctx->tgid, kctx->id);
+
+	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
+		kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
+}
+
+/**
+ * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
+ *
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
+{
+	u32 gr;
+	struct kbase_context *kctx = file->private;
+	struct kbase_device *kbdev;
+
+	if (WARN_ON(!kctx))
+		return -EINVAL;
+
+	kbdev = kctx->kbdev;
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_debugfs_update_active_groups_status(kbdev);
+
+	for (gr = 0; gr < kbdev->csf.global_iface.group_num; gr++) {
+		struct kbase_queue_group *const group =
+			kbdev->csf.scheduler.csg_slots[gr].resident_group;
+		if (!group || group->kctx != kctx)
+			continue;
+		kbasep_csf_dump_active_group_sync_state(file, group);
+	}
+
+	kbase_csf_scheduler_unlock(kbdev);
+	return 0;
+}
+
+/**
+ * kbasep_csf_sync_debugfs_show() - Print CSF queue sync information
+ *
+ * @file: The seq_file for printing to.
+ * @data: The debugfs dentry private data, a pointer to kbase_context.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
+{
+	seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
+
+	kbasep_csf_sync_kcpu_debugfs_show(file);
+	kbasep_csf_sync_gpu_debugfs_show(file);
+	return 0;
+}
+
+static int kbasep_csf_sync_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_csf_sync_debugfs_show, in->i_private);
+}
+
+static const struct file_operations kbasep_csf_sync_debugfs_fops = {
+	.open = kbasep_csf_sync_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * kbase_csf_sync_debugfs_init() - Initialise debugfs file.
+ *
+ * @kctx: Kernel context pointer.
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+	struct dentry *file;
+	const mode_t mode = 0444;
+
+	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	file = debugfs_create_file("csf_sync", mode, kctx->kctx_dentry, kctx,
+				   &kbasep_csf_sync_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file))
+		dev_warn(kctx->kbdev->dev, "Unable to create CSF Sync debugfs entry");
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h
new file mode 100644
index 000000000000..177e15d85341
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_sync_debugfs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
+#define _KBASE_CSF_SYNC_DEBUGFS_H_
+
+/* Forward declaration */
+struct kbase_context;
+
+#define MALI_CSF_SYNC_DEBUGFS_VERSION 0
+
+/**
+ * kbase_csf_sync_debugfs_init() - Create a debugfs entry for CSF queue sync info
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
+
+#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
index 909362da0047..14d80970ff70 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c
@@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
  * @kctx:  kbase context the chunk belongs to.
  * @chunk: The chunk whose external mappings are going to be removed.
  *
- * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates
+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
  * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
  * parts of kbase outside of tiler heap management should not take references on its physical
  * pages, and should not modify them.
@@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	kbase_gpu_vm_lock(kctx);
 	kbase_vunmap(kctx, &chunk->map);
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
-	 * regions), and so we must clear that flag too before freeing
+	 * regions), and so we must clear that flag too before freeing.
+	 * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+	 * no other code part within kbase can take a reference to it.
 	 */
+	WARN_ON(chunk->region->no_user_free_refcnt > 1);
+	kbase_va_region_no_user_free_put(kctx, chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
-	chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
-#else
-	chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
 	kbase_mem_free_region(kctx, chunk->region);
 	kbase_gpu_vm_unlock(kctx);
@@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 
 	kbase_gpu_vm_lock(kctx);
 
-	/* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made
+	/* Some checks done here as NO_USER_FREE still allows such things to be made
 	 * whilst we had dropped the region lock
 	 */
 	if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
@@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 		goto unroll_region;
 	}
 
+	/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
+	 * thread can have the "no user free" refcount increased between kbase_mem_alloc
+	 * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
+	 * remove_external_chunk_mappings (below).
+	 *
+	 * It should be fine and not a security risk if we let the region leak till
+	 * region tracker termination in such a case.
+	 */
+	if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+		goto unroll_region;
+	}
+
 	/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
 	 * being requested, it's useful to document in code what those restrictions are, and ensure
 	 * they remain in place in future.
 	 */
 	if (WARN(!chunk->region->gpu_alloc,
-		 "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) {
+		 "NO_USER_FREE chunks should not have had their alloc freed")) {
 		goto unroll_region;
 	}
 
 	if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
-		 "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
 		goto unroll_region;
 	}
 
 	if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
-		 "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
 		goto unroll_region;
 	}
 
 	if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
-		 "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) {
+		 "NO_USER_FREE chunks should not have been made ephemeral")) {
 		goto unroll_region;
 	}
 
 	if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
-		 "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) {
+		 "NO_USER_FREE chunks should not have been aliased")) {
 		goto unroll_region;
 	}
 
@@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	remove_external_chunk_mappings(kctx, chunk);
 	kbase_gpu_vm_unlock(kctx);
 
+	/* If page migration is enabled, we don't want to migrate tiler heap pages.
+	 * This does not change if the constituent pages are already marked as isolated.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
+
 	return chunk;
 
 unroll_region:
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
 	 */
+	kbase_va_region_no_user_free_put(kctx, chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
-	chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
-#else
-	chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
 	kbase_mem_free_region(kctx, chunk->region);
 	kbase_gpu_vm_unlock(kctx);
@@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 	if (heap->buf_desc_reg) {
 		kbase_vunmap(kctx, &heap->buf_desc_map);
 		kbase_gpu_vm_lock(kctx);
-		heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 
@@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co
 		return false;
 	}
 
-	if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) ||
-	    (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) {
+	if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
+	    (reg->flags & KBASE_REG_PF_GROW)) {
 		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
 		return false;
 	}
@@ -719,14 +739,17 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 		/* If we don't prevent userspace from unmapping this, we may run into
 		 * use-after-free, as we don't check for the existence of the region throughout.
 		 */
-		buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE;
 
 		heap->buf_desc_va = buf_desc_va;
-		heap->buf_desc_reg = buf_desc_reg;
+		heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
 
 		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
 					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+
+		if (kbase_page_migration_enabled)
+			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
+
 		kbase_gpu_vm_unlock(kctx);
 
 		if (unlikely(!vmap_ptr)) {
@@ -811,7 +834,7 @@ heap_context_alloc_failed:
 buf_desc_vmap_failed:
 	if (heap->buf_desc_reg) {
 		kbase_gpu_vm_lock(kctx);
-		heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 buf_desc_not_suitable:
@@ -866,6 +889,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
 	return err;
 }
 
+/**
+ * validate_allocation_request - Check whether the chunk allocation request
+ *                               received on tiler OOM should be handled at
+ *                               current time.
+ *
+ * @heap:               The tiler heap the OOM is associated with
+ * @nr_in_flight:       Number of fragment jobs in flight
+ * @pending_frag_count: Number of pending fragment jobs
+ *
+ * Context: must hold the tiler heap lock to guarantee its lifetime
+ *
+ * Return:
+ * * 0       - allowed to allocate an additional chunk
+ * * -EINVAL - invalid
+ * * -EBUSY  - there are fragment jobs still in flight, which may free chunks
+ *             after completing
+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
+ *             no new ones will be allocated
+ */
 static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
 				       u32 pending_frag_count)
 {
diff --git a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
index bcab31d27945..069e827d16ff 100644
--- a/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
+++ b/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap_reclaim.c
@@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
 	reclaim->batch = HEAP_SHRINKER_BATCH;
 
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(reclaim);
+#else
+	register_shrinker(reclaim, "mali-csf-tiler-heap");
+#endif
 #endif
 }
 
diff --git a/drivers/gpu/arm/bifrost/debug/Kbuild b/drivers/gpu/arm/bifrost/debug/Kbuild
index 6e1f0f75c43e..ebf3ddb763a2 100644
--- a/drivers/gpu/arm/bifrost/debug/Kbuild
+++ b/drivers/gpu/arm/bifrost/debug/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@ bifrost_kbase-y += debug/mali_kbase_debug_ktrace.o
 
 ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
     bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o
+    bifrost_kbase-$(CONFIG_MALI_CORESIGHT) += debug/backend/mali_kbase_debug_coresight_csf.o
 else
     bifrost_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o
 endif
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
new file mode 100644
index 000000000000..ff5f947e2da5
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_csf.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <linux/slab.h>
+#include <csf/mali_kbase_csf_registers.h>
+#include <csf/mali_kbase_csf_firmware.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+#include <debug/backend/mali_kbase_debug_coresight_internal_csf.h>
+
+static const char *coresight_state_to_string(enum kbase_debug_coresight_csf_state state)
+{
+	switch (state) {
+	case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+		return "DISABLED";
+	case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+		return "ENABLED";
+	default:
+		break;
+	}
+
+	return "UNKNOWN";
+}
+
+static bool validate_reg_addr(struct kbase_debug_coresight_csf_client *client,
+			      struct kbase_device *kbdev, u32 reg_addr, u8 op_type)
+{
+	int i;
+
+	if (reg_addr & 0x3) {
+		dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not 32bit aligned",
+			op_type, reg_addr);
+		return false;
+	}
+
+	for (i = 0; i < client->nr_ranges; i++) {
+		struct kbase_debug_coresight_csf_address_range *range = &client->addr_ranges[i];
+
+		if ((range->start <= reg_addr) && (reg_addr <= range->end))
+			return true;
+	}
+
+	dev_err(kbdev->dev, "Invalid operation %d: reg_addr (0x%x) not in client range", op_type,
+		reg_addr);
+
+	return false;
+}
+
+static bool validate_op(struct kbase_debug_coresight_csf_client *client,
+			struct kbase_debug_coresight_csf_op *op)
+{
+	struct kbase_device *kbdev;
+	u32 reg;
+
+	if (!op)
+		return false;
+
+	if (!client)
+		return false;
+
+	kbdev = (struct kbase_device *)client->drv_data;
+
+	switch (op->type) {
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+		return true;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+		if (validate_reg_addr(client, kbdev, op->op.write_imm.reg_addr, op->type))
+			return true;
+
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+		for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+		     reg += sizeof(u32)) {
+			if (!validate_reg_addr(client, kbdev, reg, op->type))
+				return false;
+		}
+
+		return true;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+		if (!op->op.write.ptr) {
+			dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+			break;
+		}
+
+		if (validate_reg_addr(client, kbdev, op->op.write.reg_addr, op->type))
+			return true;
+
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+		if (!op->op.read.ptr) {
+			dev_err(kbdev->dev, "Invalid operation %d: ptr not set", op->type);
+			break;
+		}
+
+		if (validate_reg_addr(client, kbdev, op->op.read.reg_addr, op->type))
+			return true;
+
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+		if (validate_reg_addr(client, kbdev, op->op.poll.reg_addr, op->type))
+			return true;
+
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+		fallthrough;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+		fallthrough;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+		fallthrough;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+		if (op->op.bitw.ptr != NULL)
+			return true;
+
+		dev_err(kbdev->dev, "Invalid bitwise operation pointer");
+
+		break;
+	default:
+		dev_err(kbdev->dev, "Invalid operation %d", op->type);
+		break;
+	}
+
+	return false;
+}
+
+static bool validate_seq(struct kbase_debug_coresight_csf_client *client,
+			 struct kbase_debug_coresight_csf_sequence *seq)
+{
+	struct kbase_debug_coresight_csf_op *ops = seq->ops;
+	int nr_ops = seq->nr_ops;
+	int i;
+
+	for (i = 0; i < nr_ops; i++) {
+		if (!validate_op(client, &ops[i]))
+			return false;
+	}
+
+	return true;
+}
+
+static int execute_op(struct kbase_device *kbdev, struct kbase_debug_coresight_csf_op *op)
+{
+	int result = -EINVAL;
+	u32 reg;
+
+	dev_dbg(kbdev->dev, "Execute operation %d", op->type);
+
+	switch (op->type) {
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP:
+		result = 0;
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM:
+		result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+							       op->op.write_imm.val);
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE:
+		for (reg = op->op.write_imm_range.reg_start; reg <= op->op.write_imm_range.reg_end;
+		     reg += sizeof(u32)) {
+			result = kbase_csf_firmware_mcu_register_write(kbdev, reg,
+								       op->op.write_imm_range.val);
+			if (!result)
+				break;
+		}
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE:
+		result = kbase_csf_firmware_mcu_register_write(kbdev, op->op.write.reg_addr,
+							       *op->op.write.ptr);
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ:
+		result = kbase_csf_firmware_mcu_register_read(kbdev, op->op.read.reg_addr,
+							      op->op.read.ptr);
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL:
+		result = kbase_csf_firmware_mcu_register_poll(kbdev, op->op.poll.reg_addr,
+							      op->op.poll.mask, op->op.poll.val);
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND:
+		*op->op.bitw.ptr &= op->op.bitw.val;
+		result = 0;
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR:
+		*op->op.bitw.ptr |= op->op.bitw.val;
+		result = 0;
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR:
+		*op->op.bitw.ptr ^= op->op.bitw.val;
+		result = 0;
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT:
+		*op->op.bitw.ptr = ~(*op->op.bitw.ptr);
+		result = 0;
+		break;
+	default:
+		dev_err(kbdev->dev, "Invalid operation %d", op->type);
+		break;
+	}
+
+	return result;
+}
+
+static int coresight_config_enable(struct kbase_device *kbdev,
+				   struct kbase_debug_coresight_csf_config *config)
+{
+	int ret = 0;
+	int i;
+
+	if (!config)
+		return -EINVAL;
+
+	if (config->state == KBASE_DEBUG_CORESIGHT_CSF_ENABLED)
+		return ret;
+
+	for (i = 0; config->enable_seq && !ret && i < config->enable_seq->nr_ops; i++)
+		ret = execute_op(kbdev, &config->enable_seq->ops[i]);
+
+	if (!ret) {
+		dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+			coresight_state_to_string(config->state),
+			coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+		config->state = KBASE_DEBUG_CORESIGHT_CSF_ENABLED;
+	}
+
+	/* Always assign the return code during config enable.
+	 * It gets propagated when calling config disable.
+	 */
+	config->error = ret;
+
+	return ret;
+}
+
+static int coresight_config_disable(struct kbase_device *kbdev,
+				    struct kbase_debug_coresight_csf_config *config)
+{
+	int ret = 0;
+	int i;
+
+	if (!config)
+		return -EINVAL;
+
+	if (config->state == KBASE_DEBUG_CORESIGHT_CSF_DISABLED)
+		return ret;
+
+	for (i = 0; config->disable_seq && !ret && i < config->disable_seq->nr_ops; i++)
+		ret = execute_op(kbdev, &config->disable_seq->ops[i]);
+
+	if (!ret) {
+		dev_dbg(kbdev->dev, "Coresight config (0x%pK) state transition: %s to %s", config,
+			coresight_state_to_string(config->state),
+			coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+		config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+	} else {
+		/* Only assign the error if ret is not 0.
+		 * As we don't want to overwrite an error from config enable
+		 */
+		if (!config->error)
+			config->error = ret;
+	}
+
+	return ret;
+}
+
+void *kbase_debug_coresight_csf_register(void *drv_data,
+					 struct kbase_debug_coresight_csf_address_range *ranges,
+					 int nr_ranges)
+{
+	struct kbase_debug_coresight_csf_client *client, *client_entry;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int k;
+
+	if (unlikely(!drv_data)) {
+		pr_err("NULL drv_data");
+		return NULL;
+	}
+
+	kbdev = (struct kbase_device *)drv_data;
+
+	if (unlikely(!ranges)) {
+		dev_err(kbdev->dev, "NULL ranges");
+		return NULL;
+	}
+
+	if (unlikely(!nr_ranges)) {
+		dev_err(kbdev->dev, "nr_ranges is 0");
+		return NULL;
+	}
+
+	for (k = 0; k < nr_ranges; k++) {
+		if (ranges[k].end < ranges[k].start) {
+			dev_err(kbdev->dev, "Invalid address ranges 0x%08x - 0x%08x",
+				ranges[k].start, ranges[k].end);
+			return NULL;
+		}
+	}
+
+	client = kzalloc(sizeof(struct kbase_debug_coresight_csf_client), GFP_KERNEL);
+
+	if (!client)
+		return NULL;
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	list_for_each_entry(client_entry, &kbdev->csf.coresight.clients, link) {
+		struct kbase_debug_coresight_csf_address_range *client_ranges =
+			client_entry->addr_ranges;
+		int i;
+
+		for (i = 0; i < client_entry->nr_ranges; i++) {
+			int j;
+
+			for (j = 0; j < nr_ranges; j++) {
+				if ((ranges[j].start < client_ranges[i].end) &&
+				    (client_ranges[i].start < ranges[j].end)) {
+					spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+					kfree(client);
+					dev_err(kbdev->dev,
+						"Client with range 0x%08x - 0x%08x already present at address range 0x%08x - 0x%08x",
+						client_ranges[i].start, client_ranges[i].end,
+						ranges[j].start, ranges[j].end);
+
+					return NULL;
+				}
+			}
+		}
+	}
+
+	client->drv_data = drv_data;
+	client->addr_ranges = ranges;
+	client->nr_ranges = nr_ranges;
+	list_add(&client->link, &kbdev->csf.coresight.clients);
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	return client;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_register);
+
+void kbase_debug_coresight_csf_unregister(void *client_data)
+{
+	struct kbase_debug_coresight_csf_client *client;
+	struct kbase_debug_coresight_csf_config *config_entry;
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	bool retry = true;
+
+	if (unlikely(!client_data)) {
+		pr_err("NULL client");
+		return;
+	}
+
+	client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+	kbdev = (struct kbase_device *)client->drv_data;
+	if (unlikely(!kbdev)) {
+		pr_err("NULL drv_data in client");
+		return;
+	}
+
+	/* check for active config from client */
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	list_del_init(&client->link);
+
+	while (retry && !list_empty(&kbdev->csf.coresight.configs)) {
+		retry = false;
+		list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+			if (config_entry->client == client) {
+				spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+				kbase_debug_coresight_csf_config_free(config_entry);
+				spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+				retry = true;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	kfree(client);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_unregister);
+
+void *
+kbase_debug_coresight_csf_config_create(void *client_data,
+					struct kbase_debug_coresight_csf_sequence *enable_seq,
+					struct kbase_debug_coresight_csf_sequence *disable_seq)
+{
+	struct kbase_debug_coresight_csf_client *client;
+	struct kbase_debug_coresight_csf_config *config;
+	struct kbase_device *kbdev;
+
+	if (unlikely(!client_data)) {
+		pr_err("NULL client");
+		return NULL;
+	}
+
+	client = (struct kbase_debug_coresight_csf_client *)client_data;
+
+	kbdev = (struct kbase_device *)client->drv_data;
+	if (unlikely(!kbdev)) {
+		pr_err("NULL drv_data in client");
+		return NULL;
+	}
+
+	if (enable_seq) {
+		if (!validate_seq(client, enable_seq)) {
+			dev_err(kbdev->dev, "Invalid enable_seq");
+			return NULL;
+		}
+	}
+
+	if (disable_seq) {
+		if (!validate_seq(client, disable_seq)) {
+			dev_err(kbdev->dev, "Invalid disable_seq");
+			return NULL;
+		}
+	}
+
+	config = kzalloc(sizeof(struct kbase_debug_coresight_csf_config), GFP_KERNEL);
+	if (WARN_ON(!client))
+		return NULL;
+
+	config->client = client;
+	config->enable_seq = enable_seq;
+	config->disable_seq = disable_seq;
+	config->error = 0;
+	config->state = KBASE_DEBUG_CORESIGHT_CSF_DISABLED;
+
+	INIT_LIST_HEAD(&config->link);
+
+	return config;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_create);
+
+void kbase_debug_coresight_csf_config_free(void *config_data)
+{
+	struct kbase_debug_coresight_csf_config *config;
+
+	if (unlikely(!config_data)) {
+		pr_err("NULL config");
+		return;
+	}
+
+	config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+	kbase_debug_coresight_csf_config_disable(config);
+
+	kfree(config);
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_free);
+
+int kbase_debug_coresight_csf_config_enable(void *config_data)
+{
+	struct kbase_debug_coresight_csf_config *config;
+	struct kbase_debug_coresight_csf_client *client;
+	struct kbase_device *kbdev;
+	struct kbase_debug_coresight_csf_config *config_entry;
+	unsigned long flags;
+	int ret = 0;
+
+	if (unlikely(!config_data)) {
+		pr_err("NULL config");
+		return -EINVAL;
+	}
+
+	config = (struct kbase_debug_coresight_csf_config *)config_data;
+	client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+	if (unlikely(!client)) {
+		pr_err("NULL client in config");
+		return -EINVAL;
+	}
+
+	kbdev = (struct kbase_device *)client->drv_data;
+	if (unlikely(!kbdev)) {
+		pr_err("NULL drv_data in client");
+		return -EINVAL;
+	}
+
+	/* Check to prevent double entry of config */
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+		if (config_entry == config) {
+			spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+			dev_err(kbdev->dev, "Config already enabled");
+			return -EINVAL;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Check the state of Scheduler to confirm the desired state of MCU */
+	if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+	     (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+	     !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+	    kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		/* Wait for MCU to reach the stable ON state */
+		ret = kbase_pm_wait_for_desired_state(kbdev);
+
+		if (ret)
+			dev_err(kbdev->dev,
+				"Wait for PM state failed when enabling coresight config");
+		else
+			ret = coresight_config_enable(kbdev, config);
+
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	}
+
+	/* Add config to next enable sequence */
+	if (!ret) {
+		spin_lock(&kbdev->csf.coresight.lock);
+		list_add(&config->link, &kbdev->csf.coresight.configs);
+		spin_unlock(&kbdev->csf.coresight.lock);
+	}
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_enable);
+
+int kbase_debug_coresight_csf_config_disable(void *config_data)
+{
+	struct kbase_debug_coresight_csf_config *config;
+	struct kbase_debug_coresight_csf_client *client;
+	struct kbase_device *kbdev;
+	struct kbase_debug_coresight_csf_config *config_entry;
+	bool found_in_list = false;
+	unsigned long flags;
+	int ret = 0;
+
+	if (unlikely(!config_data)) {
+		pr_err("NULL config");
+		return -EINVAL;
+	}
+
+	config = (struct kbase_debug_coresight_csf_config *)config_data;
+
+	/* Exit early if not enabled prior */
+	if (list_empty(&config->link))
+		return ret;
+
+	client = (struct kbase_debug_coresight_csf_client *)config->client;
+
+	if (unlikely(!client)) {
+		pr_err("NULL client in config");
+		return -EINVAL;
+	}
+
+	kbdev = (struct kbase_device *)client->drv_data;
+	if (unlikely(!kbdev)) {
+		pr_err("NULL drv_data in client");
+		return -EINVAL;
+	}
+
+	/* Check if the config is in the correct list */
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+		if (config_entry == config) {
+			found_in_list = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	if (!found_in_list) {
+		dev_err(kbdev->dev, "Config looks corrupted");
+		return -EINVAL;
+	}
+
+	kbase_csf_scheduler_lock(kbdev);
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	/* Check the state of Scheduler to confirm the desired state of MCU */
+	if (((kbdev->csf.scheduler.state != SCHED_SUSPENDED) &&
+	     (kbdev->csf.scheduler.state != SCHED_SLEEPING) &&
+	     !kbase_csf_scheduler_protected_mode_in_use(kbdev)) ||
+	    kbase_pm_get_policy(kbdev) == &kbase_pm_always_on_policy_ops) {
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		/* Wait for MCU to reach the stable ON state */
+		ret = kbase_pm_wait_for_desired_state(kbdev);
+
+		if (ret)
+			dev_err(kbdev->dev,
+				"Wait for PM state failed when disabling coresight config");
+		else
+			ret = coresight_config_disable(kbdev, config);
+
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	} else if (kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) {
+		/* MCU is OFF, so the disable sequence was already executed.
+		 *
+		 * Propagate any error that would have occurred during the enable
+		 * or disable sequence.
+		 *
+		 * This is done as part of the disable sequence, since the call from
+		 * client is synchronous.
+		 */
+		ret = config->error;
+	}
+
+	/* Remove config from next disable sequence */
+	spin_lock(&kbdev->csf.coresight.lock);
+	list_del_init(&config->link);
+	spin_unlock(&kbdev->csf.coresight.lock);
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(kbase_debug_coresight_csf_config_disable);
+
+static void coresight_config_enable_all(struct work_struct *data)
+{
+	struct kbase_device *kbdev =
+		container_of(data, struct kbase_device, csf.coresight.enable_work);
+	struct kbase_debug_coresight_csf_config *config_entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+	list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+		spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+		if (coresight_config_enable(kbdev, config_entry))
+			dev_err(kbdev->dev, "enable config (0x%pK) failed", config_entry);
+		spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	}
+
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+static void coresight_config_disable_all(struct work_struct *data)
+{
+	struct kbase_device *kbdev =
+		container_of(data, struct kbase_device, csf.coresight.disable_work);
+	struct kbase_debug_coresight_csf_config *config_entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+	list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+		spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+		if (coresight_config_disable(kbdev, config_entry))
+			dev_err(kbdev->dev, "disable config (0x%pK) failed", config_entry);
+		spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+	}
+
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	wake_up_all(&kbdev->csf.coresight.event_wait);
+}
+
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	dev_dbg(kbdev->dev, "Coresight state %s before protected mode enter",
+		coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_ENABLED));
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+	kbase_pm_lock(kbdev);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	kbdev->csf.coresight.disable_on_pmode_enter = true;
+	kbdev->csf.coresight.enable_on_pmode_exit = false;
+	kbase_pm_update_state(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	kbase_pm_wait_for_desired_state(kbdev);
+
+	kbase_pm_unlock(kbdev);
+}
+
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev)
+{
+	dev_dbg(kbdev->dev, "Coresight state %s after protected mode exit",
+		coresight_state_to_string(KBASE_DEBUG_CORESIGHT_CSF_DISABLED));
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(kbdev->csf.coresight.disable_on_pmode_enter);
+
+	kbdev->csf.coresight.enable_on_pmode_exit = true;
+	kbase_pm_update_state(kbdev);
+}
+
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+					     enum kbase_debug_coresight_csf_state state)
+{
+	if (unlikely(!kbdev))
+		return;
+
+	if (unlikely(!kbdev->csf.coresight.workq))
+		return;
+
+	dev_dbg(kbdev->dev, "Coresight state %s requested", coresight_state_to_string(state));
+
+	switch (state) {
+	case KBASE_DEBUG_CORESIGHT_CSF_DISABLED:
+		queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.disable_work);
+		break;
+	case KBASE_DEBUG_CORESIGHT_CSF_ENABLED:
+		queue_work(kbdev->csf.coresight.workq, &kbdev->csf.coresight.enable_work);
+		break;
+	default:
+		dev_err(kbdev->dev, "Invalid Coresight state %d", state);
+		break;
+	}
+}
+
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+					   enum kbase_debug_coresight_csf_state state)
+{
+	struct kbase_debug_coresight_csf_config *config_entry;
+	unsigned long flags;
+	bool success = true;
+
+	dev_dbg(kbdev->dev, "Coresight check for state: %s", coresight_state_to_string(state));
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+	list_for_each_entry(config_entry, &kbdev->csf.coresight.configs, link) {
+		if (state != config_entry->state) {
+			success = false;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_check);
+
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+					  enum kbase_debug_coresight_csf_state state)
+{
+	const long wait_timeout = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
+	struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+	unsigned long flags;
+	bool success = true;
+
+	dev_dbg(kbdev->dev, "Coresight wait for state: %s", coresight_state_to_string(state));
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+	list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+				  link) {
+		const enum kbase_debug_coresight_csf_state prev_state = config_entry->state;
+		long remaining;
+
+		spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+		remaining = wait_event_timeout(kbdev->csf.coresight.event_wait,
+					       state == config_entry->state, wait_timeout);
+		spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+		if (!remaining) {
+			success = false;
+			dev_err(kbdev->dev,
+				"Timeout waiting for Coresight state transition %s to %s",
+				coresight_state_to_string(prev_state),
+				coresight_state_to_string(state));
+		}
+	}
+
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+
+	return success;
+}
+KBASE_EXPORT_TEST_API(kbase_debug_coresight_csf_state_wait);
+
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev)
+{
+	kbdev->csf.coresight.workq = alloc_ordered_workqueue("Mali CoreSight workqueue", 0);
+	if (kbdev->csf.coresight.workq == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&kbdev->csf.coresight.clients);
+	INIT_LIST_HEAD(&kbdev->csf.coresight.configs);
+	INIT_WORK(&kbdev->csf.coresight.enable_work, coresight_config_enable_all);
+	INIT_WORK(&kbdev->csf.coresight.disable_work, coresight_config_disable_all);
+	init_waitqueue_head(&kbdev->csf.coresight.event_wait);
+	spin_lock_init(&kbdev->csf.coresight.lock);
+
+	kbdev->csf.coresight.disable_on_pmode_enter = false;
+	kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+	return 0;
+}
+
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev)
+{
+	struct kbase_debug_coresight_csf_client *client_entry, *next_client_entry;
+	struct kbase_debug_coresight_csf_config *config_entry, *next_config_entry;
+	unsigned long flags;
+
+	kbdev->csf.coresight.disable_on_pmode_enter = false;
+	kbdev->csf.coresight.enable_on_pmode_exit = false;
+
+	cancel_work_sync(&kbdev->csf.coresight.enable_work);
+	cancel_work_sync(&kbdev->csf.coresight.disable_work);
+	destroy_workqueue(kbdev->csf.coresight.workq);
+	kbdev->csf.coresight.workq = NULL;
+
+	spin_lock_irqsave(&kbdev->csf.coresight.lock, flags);
+
+	list_for_each_entry_safe(config_entry, next_config_entry, &kbdev->csf.coresight.configs,
+				  link) {
+		list_del_init(&config_entry->link);
+		kfree(config_entry);
+	}
+
+	list_for_each_entry_safe(client_entry, next_client_entry, &kbdev->csf.coresight.clients,
+				  link) {
+		list_del_init(&client_entry->link);
+		kfree(client_entry);
+	}
+
+	spin_unlock_irqrestore(&kbdev->csf.coresight.lock, flags);
+}
diff --git a/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h
new file mode 100644
index 000000000000..06d62dc70182
--- /dev/null
+++ b/drivers/gpu/arm/bifrost/debug/backend/mali_kbase_debug_coresight_internal_csf.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+#define _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_
+
+#include <mali_kbase.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+
+/**
+ * struct kbase_debug_coresight_csf_client - Coresight client definition
+ *
+ * @drv_data:    Pointer to driver device data.
+ * @addr_ranges: Arrays of address ranges used by the registered client.
+ * @nr_ranges:   Size of @addr_ranges array.
+ * @link:        Link item of a Coresight client.
+ *               Linked to &struct_kbase_device.csf.coresight.clients.
+ */
+struct kbase_debug_coresight_csf_client {
+	void *drv_data;
+	struct kbase_debug_coresight_csf_address_range *addr_ranges;
+	u32 nr_ranges;
+	struct list_head link;
+};
+
+/**
+ * enum kbase_debug_coresight_csf_state - Coresight configuration states
+ *
+ * @KBASE_DEBUG_CORESIGHT_CSF_DISABLED: Coresight configuration is disabled.
+ * @KBASE_DEBUG_CORESIGHT_CSF_ENABLED:  Coresight configuration is enabled.
+ */
+enum kbase_debug_coresight_csf_state {
+	KBASE_DEBUG_CORESIGHT_CSF_DISABLED = 0,
+	KBASE_DEBUG_CORESIGHT_CSF_ENABLED,
+};
+
+/**
+ * struct kbase_debug_coresight_csf_config - Coresight configuration definition
+ *
+ * @client:      Pointer to the client for which the configuration is created.
+ * @enable_seq:  Array of operations for Coresight client enable sequence. Can be NULL.
+ * @disable_seq: Array of operations for Coresight client disable sequence. Can be NULL.
+ * @state:       Current Coresight configuration state.
+ * @error:       Error code used to know if an error occurred during the execution
+ *               of the enable or disable sequences.
+ * @link:        Link item of a Coresight configuration.
+ *               Linked to &struct_kbase_device.csf.coresight.configs.
+ */
+struct kbase_debug_coresight_csf_config {
+	void *client;
+	struct kbase_debug_coresight_csf_sequence *enable_seq;
+	struct kbase_debug_coresight_csf_sequence *disable_seq;
+	enum kbase_debug_coresight_csf_state state;
+	int error;
+	struct list_head link;
+};
+
+/**
+ * struct kbase_debug_coresight_device - Object representing the Coresight device
+ *
+ * @clients: List head to maintain Coresight clients.
+ * @configs: List head to maintain Coresight configs.
+ * @lock: A lock to protect client/config lists.
+ *                  Lists can be accessed concurrently by
+ *                  Coresight kernel modules and kernel threads.
+ * @workq: Work queue for Coresight enable/disable execution.
+ * @enable_work: Work item used to enable Coresight.
+ * @disable_work: Work item used to disable Coresight.
+ * @event_wait: Wait queue for Coresight events.
+ * @enable_on_pmode_exit: Flag used by the PM state machine to
+ *                        identify if Coresight enable is needed.
+ * @disable_on_pmode_enter: Flag used by the PM state machine to
+ *                         identify if Coresight disable is needed.
+ */
+struct kbase_debug_coresight_device {
+	struct list_head clients;
+	struct list_head configs;
+	spinlock_t lock;
+	struct workqueue_struct *workq;
+	struct work_struct enable_work;
+	struct work_struct disable_work;
+	wait_queue_head_t event_wait;
+	bool enable_on_pmode_exit;
+	bool disable_on_pmode_enter;
+};
+
+/**
+ * kbase_debug_coresight_csf_init - Initialize Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called once at device initialization.
+ *
+ * Return: 0 on success.
+ */
+int kbase_debug_coresight_csf_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_term - Terminate Coresight resources.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called at device termination to prevent any
+ * memory leaks if Coresight module would have been removed without calling
+ * kbasep_debug_coresight_csf_trace_disable().
+ */
+void kbase_debug_coresight_csf_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_disable_pmode_enter - Disable Coresight on Protected
+ *                                                 mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called just before requesting to enter protected mode.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_ENTER_CORESIGHT_DISABLE.
+ */
+void kbase_debug_coresight_csf_disable_pmode_enter(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_enable_pmode_exit - Enable Coresight on Protected
+ *                                                 mode enter.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function should be called after protected mode exit is acknowledged.
+ * It will trigger a PM state machine transition from MCU_ON
+ * to ON_PMODE_EXIT_CORESIGHT_ENABLE.
+ */
+void kbase_debug_coresight_csf_enable_pmode_exit(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_coresight_csf_state_request - Request Coresight state transition.
+ *
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @state:     Coresight state to check for.
+ */
+void kbase_debug_coresight_csf_state_request(struct kbase_device *kbdev,
+					     enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_check - Check Coresight state.
+ *
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @state:     Coresight state to check for.
+ *
+ * Return: true if all states of configs are @state.
+ */
+bool kbase_debug_coresight_csf_state_check(struct kbase_device *kbdev,
+					   enum kbase_debug_coresight_csf_state state);
+
+/**
+ * kbase_debug_coresight_csf_state_wait - Wait for Coresight state transition to complete.
+ *
+ * @kbdev:     Instance of a GPU platform device that implements a CSF interface.
+ * @state:     Coresight state to wait for.
+ *
+ * Return: true if all configs become @state in pre-defined time period.
+ */
+bool kbase_debug_coresight_csf_state_wait(struct kbase_device *kbdev,
+					  enum kbase_debug_coresight_csf_state state);
+
+#endif /* _KBASE_DEBUG_CORESIGHT_INTERNAL_CSF_H_ */
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
index 277569381292..e123b3ac57ac 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_csf.c
@@ -29,10 +29,7 @@
 #include <mali_kbase_reset_gpu.h>
 #include <csf/mali_kbase_csf.h>
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 #include <backend/gpu/mali_kbase_model_linux.h>
-#endif
 
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
@@ -92,13 +89,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 		goto fail_timer;
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#ifndef CONFIG_MALI_BIFROST_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
 		dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
 		err = -EINVAL;
 		goto fail_interrupt_test;
 	}
-#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
 	kbase_ipa_control_init(kbdev);
@@ -142,9 +139,9 @@ fail_pm_metrics_init:
 	kbase_ipa_control_term(kbdev);
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#ifndef CONFIG_MALI_BIFROST_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 fail_interrupt_test:
-#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
 	kbase_backend_timer_term(kbdev);
@@ -283,12 +280,13 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
 }
 
 static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
+	  "Dummy model initialization failed" },
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ power_control_init, power_control_term, "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
@@ -344,6 +342,10 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 	  "GPU property population failed" },
 	{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
+#if IS_ENABLED(CONFIG_MALI_CORESIGHT)
+	{ kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term,
+	  "Coresight initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 };
 
 static void kbase_device_term_partial(struct kbase_device *kbdev,
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
index 3b792968a7d7..2abd62aaa8b1 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_csf.c
@@ -24,6 +24,7 @@
 #include <backend/gpu/mali_kbase_instr_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <device/mali_kbase_device.h>
+#include <device/mali_kbase_device_internal.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <mali_kbase_ctx_sched.h>
@@ -149,9 +150,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 
 		dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-#ifdef CONFIG_MALI_BIFROST_DEBUG
-		WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
-#endif
 		kbase_pm_disable_db_mirror_interrupt(kbdev);
 		kbdev->pm.backend.exit_gpu_sleep_mode = true;
 		kbase_csf_scheduler_invoke_tick(kbdev);
@@ -189,7 +187,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 }
 
 #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-static bool kbase_is_register_accessible(u32 offset)
+bool kbase_is_register_accessible(u32 offset)
 {
 #ifdef CONFIG_MALI_BIFROST_DEBUG
 	if (((offset >= MCU_SUBSYSTEM_BASE) && (offset < IPA_CONTROL_BASE)) ||
@@ -201,7 +199,9 @@ static bool kbase_is_register_accessible(u32 offset)
 
 	return true;
 }
+#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
 
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
 {
 	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
index 52063fb0f533..38223af213d1 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_hw_jm.c
@@ -106,7 +106,7 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
 }
 
-#if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
 {
 	WARN_ON(!kbdev->pm.backend.gpu_powered);
@@ -140,4 +140,4 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
 	return val;
 }
 KBASE_EXPORT_TEST_API(kbase_reg_read);
-#endif /* !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
diff --git a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
index 129b4e430c52..6f0ec7d933c4 100644
--- a/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
+++ b/drivers/gpu/arm/bifrost/device/backend/mali_kbase_device_jm.c
@@ -30,10 +30,7 @@
 #include <hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h>
 #include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
 #include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
-
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 #include <backend/gpu/mali_kbase_model_linux.h>
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
@@ -74,13 +71,13 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 		goto fail_timer;
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#ifndef CONFIG_MALI_BIFROST_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
 		dev_err(kbdev->dev, "Interrupt assignment check failed.\n");
 		err = -EINVAL;
 		goto fail_interrupt_test;
 	}
-#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
 	err = kbase_job_slot_init(kbdev);
@@ -119,9 +116,9 @@ fail_devfreq_init:
 fail_job_slot:
 
 #ifdef CONFIG_MALI_BIFROST_DEBUG
-#ifndef CONFIG_MALI_BIFROST_NO_MALI
+#if IS_ENABLED(CONFIG_MALI_REAL_HW)
 fail_interrupt_test:
-#endif /* !CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_BIFROST_DEBUG */
 
 	kbase_backend_timer_term(kbdev);
@@ -213,12 +210,13 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
 }
 
 static const struct kbase_device_init dev_init[] = {
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
-#else
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
+	  "Dummy model initialization failed" },
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
-#endif
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
index 053400bd63f0..fb3e4176395e 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device.c
@@ -328,6 +328,9 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	kbdev->num_of_atoms_hw_completed = 0;
 #endif
 
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	atomic_set(&kbdev->live_fence_metadata, 0);
+#endif
 	return 0;
 
 term_as:
@@ -351,6 +354,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
 
 	if (kbdev->oom_notifier_block.notifier_call)
 		unregister_oom_notifier(&kbdev->oom_notifier_block);
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	if (atomic_read(&kbdev->live_fence_metadata) > 0)
+		dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!");
+#endif
 }
 
 #if !MALI_USE_CSF
diff --git a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
index 36b4698ca2f8..2c1c6ecec15f 100644
--- a/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/bifrost/device/mali_kbase_device_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -99,3 +99,13 @@ int kbase_device_late_init(struct kbase_device *kbdev);
  * @kbdev:	Device pointer
  */
 void kbase_device_late_term(struct kbase_device *kbdev);
+
+#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+/**
+ * kbase_is_register_accessible - Checks if register is accessible
+ * @offset: Register offset
+ *
+ * Return: true if the register is accessible, false otherwise.
+ */
+bool kbase_is_register_accessible(u32 offset);
+#endif /* MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI) */
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 9985752a3748..f412531ab03a 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -34,13 +34,11 @@
 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
 #include "mali_kbase_hwaccess_time.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <backend/gpu/mali_kbase_model_linux.h>
 
 #include <linux/log2.h>
 #include "mali_kbase_ccswe.h"
 
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 
 /* Ring buffer virtual address start at 4GB  */
 #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@@ -103,6 +101,8 @@ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_i
 
 static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
 						unsigned long *flags)
+	__acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+			    ctx->kbdev->csf.scheduler.interrupt_lock)
 {
 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
 	struct kbase_device *kbdev;
@@ -117,6 +117,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_i
 
 static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
 						  unsigned long flags)
+	__releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
+			    ctx->kbdev->csf.scheduler.interrupt_lock)
 {
 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
 	struct kbase_device *kbdev;
@@ -345,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
 	/* Update MMU table */
 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
 				     num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
-				     mmu_sync_info);
+				     mmu_sync_info, NULL, false);
 	if (ret)
 		goto mmu_insert_failed;
 
@@ -480,7 +482,7 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 
 		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
 						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
-						 fw_ring_buf->num_pages, MCU_AS_NR));
+						 fw_ring_buf->num_pages, MCU_AS_NR, true));
 
 		vunmap(fw_ring_buf->cpu_dump_base);
 
diff --git a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 9d9889a0e426..669701c29152 100644
--- a/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -27,10 +27,7 @@
 #include "mali_kbase_hwaccess_instr.h"
 #include "mali_kbase_hwaccess_time.h"
 #include "mali_kbase_ccswe.h"
-
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+#include "backend/gpu/mali_kbase_model_linux.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 
 #include "backend/gpu/mali_kbase_pm_internal.h"
diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
index 43cdf18a5e3b..21b4e52884c5 100644
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -23,10 +23,13 @@
 #include "mali_kbase.h"
 
 /* MEMSYS counter block offsets */
+#define L2_RD_MSG_IN_CU         (13)
 #define L2_RD_MSG_IN            (16)
 #define L2_WR_MSG_IN            (18)
+#define L2_SNP_MSG_IN           (20)
 #define L2_RD_MSG_OUT           (22)
 #define L2_READ_LOOKUP          (26)
+#define L2_EXT_READ_NOSNP       (30)
 #define L2_EXT_WRITE_NOSNP_FULL (43)
 
 /* SC counter block offsets */
@@ -36,17 +39,23 @@
 #define FULL_QUAD_WARPS         (21)
 #define EXEC_INSTR_FMA          (27)
 #define EXEC_INSTR_CVT          (28)
+#define EXEC_INSTR_SFU          (29)
 #define EXEC_INSTR_MSG          (30)
 #define TEX_FILT_NUM_OPS        (39)
 #define LS_MEM_READ_SHORT       (45)
 #define LS_MEM_WRITE_SHORT      (47)
 #define VARY_SLOT_16            (51)
+#define BEATS_RD_LSC_EXT        (57)
+#define BEATS_RD_TEX            (58)
+#define BEATS_RD_TEX_EXT        (59)
+#define FRAG_QUADS_COARSE       (68)
 
 /* Tiler counter block offsets */
 #define IDVS_POS_SHAD_STALL     (23)
 #define PREFETCH_STALL          (25)
 #define VFETCH_POS_READ_WAIT    (29)
 #define VFETCH_VERTEX_WAIT      (30)
+#define PRIMASSY_STALL          (32)
 #define IDVS_VAR_SHAD_STALL     (38)
 #define ITER_STALL              (40)
 #define PMGR_PTR_RD_STALL       (48)
@@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
 	TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT),
 };
 
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = {
+	TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL),
+	TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL),
+
+	MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU),
+	MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN),
+	MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP),
+};
+
 /* These tables provide a description of each performance counter
  * used by the shader cores counter model for energy estimation.
  */
@@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 	SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
 };
 
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = {
+	SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA),
+	SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG),
+	SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX),
+	SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT),
+	SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE),
+	SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT),
+	SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT),
+	SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU),
+};
+
 #define IPA_POWER_MODEL_OPS(gpu, init_token) \
 	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
 		.name = "mali-" #gpu "-power-model", \
@@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 #define ALIAS_POWER_MODEL(gpu, as_gpu) \
 	IPA_POWER_MODEL_OPS(gpu, as_gpu)
 
-/* Reference voltage value is 750 mV.
- */
+/* Reference voltage value is 750 mV. */
 STANDARD_POWER_MODEL(todx, 750);
 STANDARD_POWER_MODEL(tgrx, 750);
 STANDARD_POWER_MODEL(tvax, 750);
-
 STANDARD_POWER_MODEL(ttux, 750);
+/* Reference voltage value is 550 mV. */
+STANDARD_POWER_MODEL(ttix, 550);
 
 /* Assuming LODX is an alias of TODX for IPA */
 ALIAS_POWER_MODEL(lodx, todx);
@@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx);
 /* Assuming LTUX is an alias of TTUX for IPA */
 ALIAS_POWER_MODEL(ltux, ttux);
 
+/* Assuming LTUX is an alias of TTUX for IPA */
+ALIAS_POWER_MODEL(ltix, ttix);
+
 static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
 	&kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops,
 	&kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops,
-	&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops
+	&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops,
+	&kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops,
 };
 
 const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
@@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 		return "mali-ttux-power-model";
 	case GPU_ID2_PRODUCT_LTUX:
 		return "mali-ltux-power-model";
+	case GPU_ID2_PRODUCT_TTIX:
+		return "mali-ttix-power-model";
+	case GPU_ID2_PRODUCT_LTIX:
+		return "mali-ltix-power-model";
 	default:
 		return NULL;
 	}
diff --git a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
index a32a2c207163..cc61f642399c 100644
--- a/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/drivers/gpu/arm/bifrost/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -23,10 +23,7 @@
 
 #include "mali_kbase_ipa_counter_common_jm.h"
 #include "mali_kbase.h"
-
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+#include <backend/gpu/mali_kbase_model_linux.h>
 
 /* Performance counter blocks base offsets */
 #define JM_BASE             (0 * KBASE_IPA_NR_BYTES_PER_BLOCK)
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
index d15e98a54c40..b2e6bc459f22 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -84,11 +84,11 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
 static struct device_node *get_model_dt_node(struct kbase_ipa_model *model,
 					     bool dt_required)
 {
-	struct device_node *model_dt_node;
+	struct device_node *model_dt_node = NULL;
 	char compat_string[64];
 
-	snprintf(compat_string, sizeof(compat_string), "arm,%s",
-		 model->ops->name);
+	if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name)))
+		return NULL;
 
 	/* of_find_compatible_node() will call of_node_put() on the root node,
 	 * so take a reference on it first.
@@ -111,12 +111,12 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
 				  const char *name, s32 *addr,
 				  size_t num_elems, bool dt_required)
 {
-	int err, i;
+	int err = -EINVAL, i;
 	struct device_node *model_dt_node = get_model_dt_node(model,
 								dt_required);
 	char *origin;
 
-	err = of_property_read_u32_array(model_dt_node, name, addr, num_elems);
+	err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems);
 	/* We're done with model_dt_node now, so drop the reference taken in
 	 * get_model_dt_node()/of_find_compatible_node().
 	 */
@@ -138,11 +138,17 @@ int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
 	for (i = 0; i < num_elems; ++i) {
 		char elem_name[32];
 
-		if (num_elems == 1)
-			snprintf(elem_name, sizeof(elem_name), "%s", name);
-		else
-			snprintf(elem_name, sizeof(elem_name), "%s.%d",
-				name, i);
+		if (num_elems == 1) {
+			if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) {
+				err = -ENOMEM;
+				goto exit;
+			}
+		} else {
+			if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) {
+				err = -ENOMEM;
+				goto exit;
+			}
+		}
 
 		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
 			model->ops->name, elem_name, addr[i], origin);
@@ -164,7 +170,7 @@ int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
 	int err;
 	struct device_node *model_dt_node = get_model_dt_node(model,
 								dt_required);
-	const char *string_prop_value;
+	const char *string_prop_value = "";
 	char *origin;
 
 	err = of_property_read_string(model_dt_node, name,
diff --git a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
index 57508eb24749..8557fe8723cf 100644
--- a/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa_simple.c
@@ -236,14 +236,12 @@ static int add_params(struct kbase_ipa_model *model)
 			(struct kbase_ipa_model_simple_data *)model->model_data;
 
 	err = kbase_ipa_model_add_param_s32(model, "static-coefficient",
-					    &model_data->static_coefficient,
-					    1, true);
+					    (s32 *)&model_data->static_coefficient, 1, true);
 	if (err)
 		goto end;
 
 	err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient",
-					    &model_data->dynamic_coefficient,
-					    1, true);
+					    (s32 *)&model_data->dynamic_coefficient, 1, true);
 	if (err)
 		goto end;
 
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
index e4316981e635..fe8995aefc37 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_defs.h
@@ -578,7 +578,7 @@ struct kbase_jd_atom {
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
 	int work_id;
 #endif
-	int slot_nr;
+	unsigned int slot_nr;
 
 	u32 atom_flags;
 
diff --git a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
index d03bcc0f27d8..53819caaf616 100644
--- a/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
+++ b/drivers/gpu/arm/bifrost/jm/mali_kbase_jm_js.h
@@ -132,15 +132,15 @@ void kbasep_js_kctx_term(struct kbase_context *kctx);
  * Atoms of higher priority might still be able to be pulled from the context
  * on @js. This helps with starting a high priority atom as soon as possible.
  */
-static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx,
-						     int js, int sched_prio)
+static inline void kbase_jsctx_slot_prio_blocked_set(struct kbase_context *kctx, unsigned int js,
+						     int sched_prio)
 {
 	struct kbase_jsctx_slot_tracking *slot_tracking =
 		&kctx->slot_tracking[js];
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 	WARN(!slot_tracking->atoms_pulled_pri[sched_prio],
-	     "When marking slot %d as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
+	     "When marking slot %u as blocked for priority %d on a kctx, no atoms were pulled - the slot cannot become unblocked",
 	     js, sched_prio);
 
 	slot_tracking->blocked |= ((kbase_js_prio_bitmap_t)1) << sched_prio;
@@ -509,19 +509,6 @@ void kbasep_js_resume(struct kbase_device *kbdev);
 bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 		struct kbase_jd_atom *katom);
 
-/**
- * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
- * @kctx:  Context Pointer
- * @prio:  Priority (specifies the queue together with js).
- * @js:    Job slot (specifies the queue together with prio).
- *
- * Pushes all possible atoms from the linked list to the ringbuffer.
- * Number of atoms are limited to free space in the ringbuffer and
- * number of available atoms in the linked list.
- *
- */
-void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
-
 /**
  * kbase_js_pull - Pull an atom from a context in the job scheduler for
  *                 execution.
@@ -536,7 +523,7 @@ void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
  * Return: a pointer to an atom, or NULL if there are no atoms for this
  * slot that can be currently run.
  */
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js);
 
 /**
  * kbase_js_unpull - Return an atom to the job scheduler ringbuffer.
@@ -617,7 +604,7 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom);
  * been used.
  *
  */
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask);
 
 /**
  * kbase_js_zap_context - Attempt to deschedule a context that is being
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
index ea143ab49642..c6fea791b8c9 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_features.h
@@ -131,16 +131,6 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tBAx[
 	BASE_HW_FEATURE_END
 };
 
-__attribute__((unused)) static const enum base_hw_feature base_hw_features_tDUx[] = {
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
-	BASE_HW_FEATURE_END
-};
-
 __attribute__((unused)) static const enum base_hw_feature base_hw_features_tODx[] = {
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
diff --git a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
index a360984acca5..2dc0402197de 100644
--- a/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/bifrost/mali_base_hwconfig_issues.h
@@ -64,6 +64,9 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -88,6 +91,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -108,6 +113,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -128,6 +135,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tMIx_r0p1
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -143,6 +152,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tMI
 	BASE_HW_ISSUE_TMIX_8343,
 	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -156,6 +167,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -169,6 +182,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p1
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -182,6 +197,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p2
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -194,6 +211,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tHEx_r0p3
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -204,6 +223,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tHE
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -217,6 +238,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -230,6 +253,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r0p1
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -242,6 +267,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -253,6 +280,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tSIx_r1p1
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -263,6 +292,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tSI
 	BASE_HW_ISSUE_TSIX_1116,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -274,6 +305,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDVx_r0p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -284,6 +317,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDV
 	BASE_HW_ISSUE_TSIX_1116,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -296,6 +331,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNOx_r0p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -306,6 +343,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNO
 	BASE_HW_ISSUE_TSIX_1116,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -318,6 +357,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r0p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -330,6 +371,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGOx_r1p0
 	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -340,6 +383,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGO
 	BASE_HW_ISSUE_TSIX_1116,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -356,6 +401,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p0
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
 	BASE_HW_ISSUE_TTRX_3485,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -372,6 +419,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p1
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
 	BASE_HW_ISSUE_TTRX_3485,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -387,6 +436,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTRx_r0p2
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -399,6 +450,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTR
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -415,6 +468,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p0
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
 	BASE_HW_ISSUE_TTRX_3485,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -430,6 +485,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tNAx_r0p1
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -442,6 +499,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tNA
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -456,6 +515,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p0
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
 	BASE_HW_ISSUE_TTRX_3485,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -469,6 +530,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r0p1
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -482,6 +545,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p0
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -495,6 +560,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBEx_r1p1
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -507,6 +574,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBE
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -521,6 +590,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p0
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
 	BASE_HW_ISSUE_TTRX_3485,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -534,6 +605,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_lBEx_r1p1
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -547,6 +620,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p0
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -560,6 +635,8 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
@@ -572,90 +649,74 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBA
 	BASE_HW_ISSUE_TTRX_3083,
 	BASE_HW_ISSUE_TTRX_3470,
 	BASE_HW_ISSUE_TTRX_3464,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_END
-};
-
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_1997,
@@ -663,70 +724,97 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
+	BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_TURSEHW_2716,
+	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2019_3901,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_2716,
 	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
 	BASE_HW_ISSUE_END
 };
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase.h b/drivers/gpu/arm/bifrost/mali_kbase.h
index 8e4d36141368..542e8f63fb5b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase.h
@@ -339,21 +339,8 @@ int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 		struct kbase_jd_atom *target_katom);
 
-void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
-		struct kbase_jd_atom *target_katom, u32 sw_flags);
-
-/**
- * kbase_job_slot_hardstop - Hard-stop the specified job slot
- * @kctx:         The kbase context that contains the job(s) that should
- *                be hard-stopped
- * @js:           The job slot to hard-stop
- * @target_katom: The job that should be hard-stopped (or NULL for all
- *                jobs from the context)
- * Context:
- *   The job slot lock must be held when calling this function.
- */
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
-		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
+				     struct kbase_jd_atom *target_katom, u32 sw_flags);
 
 /**
  * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
@@ -448,19 +435,6 @@ static inline void kbase_free_user_buffer(
 	}
 }
 
-/**
- * kbase_mem_copy_from_extres() - Copy from external resources.
- *
- * @kctx:	kbase context within which the copying is to take place.
- * @buf_data:	Pointer to the information about external resources:
- *		pages pertaining to the external resource, number of
- *		pages to copy.
- *
- * Return:      0 on success, error code otherwise.
- */
-int kbase_mem_copy_from_extres(struct kbase_context *kctx,
-		struct kbase_debug_copy_buffer *buf_data);
-
 #if !MALI_USE_CSF
 int kbase_process_soft_job(struct kbase_jd_atom *katom);
 int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
index 08a9a3cd0479..10dbeee02e40 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_as_fault_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -98,11 +98,9 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
 			 "unable to create address_spaces debugfs directory");
 	} else {
 		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
-			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
-			debugfs_create_file(as_name, 0444,
-					    debugfs_directory,
-					    (void *)(uintptr_t)i,
-					    &as_fault_fops);
+			if (likely(scnprintf(as_name, ARRAY_SIZE(as_name), "as%u", i)))
+				debugfs_create_file(as_name, 0444, debugfs_directory,
+						    (void *)(uintptr_t)i, &as_fault_fops);
 		}
 	}
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
index fa094ab36b1f..7eb6b5a798ce 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_core_linux.c
@@ -31,10 +31,7 @@
 #include <ipa/mali_kbase_ipa_debugfs.h>
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
 #include "backend/gpu/mali_kbase_model_linux.h"
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
 #include "uapi/gpu/arm/bifrost/mali_kbase_mem_profile_debugfs_buf_size.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
@@ -632,7 +629,8 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
 		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
-	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+	if (unlikely(!scnprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id)))
+		return -ENOMEM;
 
 	mutex_init(&kctx->mem_profile_lock);
 
@@ -671,8 +669,10 @@ static int kbase_open(struct inode *inode, struct file *filp)
 	if (!kbdev)
 		return -ENODEV;
 
-	/* Set address space operation for page migration */
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	/* Set address space operations for page migration */
 	kbase_mem_migrate_set_address_space_ops(kbdev, filp);
+#endif
 
 	/* Device-wide firmware load is moved here from probing to comply with
 	 * Android GKI vendor guideline.
@@ -1467,6 +1467,9 @@ static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx,
 static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
 		union kbase_ioctl_cs_tiler_heap_init *heap_init)
 {
+	if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
+		return -EINVAL;
+
 	kctx->jit_group_id = heap_init->in.group_id;
 
 	return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -1479,6 +1482,9 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
 static int kbasep_cs_tiler_heap_init_1_13(struct kbase_context *kctx,
 					  union kbase_ioctl_cs_tiler_heap_init_1_13 *heap_init)
 {
+	if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
+		return -EINVAL;
+
 	kctx->jit_group_id = heap_init->in.group_id;
 
 	return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
@@ -4278,7 +4284,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
 	kfree(kbdev->protected_dev);
 }
 
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	return 0;
@@ -4286,7 +4292,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 {
 }
-#else /* CONFIG_MALI_BIFROST_NO_MALI */
+#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	int err = 0;
@@ -4322,7 +4328,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 		kbdev->reg_size = 0;
 	}
 }
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 
 int registers_map(struct kbase_device * const kbdev)
 {
@@ -4585,8 +4591,18 @@ int power_control_init(struct kbase_device *kbdev)
 	 * from completing its initialization.
 	 */
 #if defined(CONFIG_PM_OPP)
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
-	defined(CONFIG_REGULATOR))
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	if (kbdev->nr_regulators > 0) {
+		kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names);
+
+		if (kbdev->token < 0) {
+			err = kbdev->token;
+			goto regulators_probe_defer;
+		}
+
+	}
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	if (kbdev->nr_regulators > 0) {
 		kbdev->opp_table =
 			dev_pm_opp_set_regulators(kbdev->dev, regulator_names,
@@ -4605,7 +4621,9 @@ int power_control_init(struct kbase_device *kbdev)
 			return 0;
 		}
 	}
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
+
 #ifdef CONFIG_ARCH_ROCKCHIP
        err = kbase_platform_rk_init_opp_table(kbdev);
        if (err)
@@ -4645,13 +4663,17 @@ void power_control_term(struct kbase_device *kbdev)
 
 #if defined(CONFIG_PM_OPP)
 	dev_pm_opp_of_remove_table(kbdev->dev);
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
-	defined(CONFIG_REGULATOR))
-	if (!IS_ERR_OR_NULL(kbdev->opp_table)) {
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	if (kbdev->token > -EPERM) {
 		dev_pm_opp_unregister_set_opp_helper(kbdev->opp_table);
-		dev_pm_opp_put_regulators(kbdev->opp_table);
+		dev_pm_opp_put_regulators(kbdev->token);
 	}
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+	if (!IS_ERR_OR_NULL(kbdev->opp_table))
+		dev_pm_opp_put_regulators(kbdev->opp_table);
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
 #endif /* CONFIG_PM_OPP */
 
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
@@ -5514,6 +5536,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 	}
 
 	kbdev->dev = &pdev->dev;
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	kbdev->token = -EPERM;
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+
 	dev_set_drvdata(kbdev->dev, kbdev);
 #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
 	mutex_lock(&kbase_probe_mutex);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
index 60afde2ceb7f..beb292862b21 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.c
@@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev)
 	}
 }
 
+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx)
+{
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+}
+
 /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
  *
  * @kbdev: The context for which to find a free address space
@@ -113,7 +119,7 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx)
 	if (atomic_inc_return(&kctx->refcount) == 1) {
 		int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx);
 
-		if (free_as != KBASEP_AS_NR_INVALID) {
+		if (free_as >= 0) {
 			kbdev->as_free &= ~(1u << free_as);
 			/* Only program the MMU if the context has not been
 			 * assigned the same address space before.
@@ -167,8 +173,10 @@ void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx)
          */
 	WARN_ON(!atomic_read(&kctx->refcount));
 #endif
-	WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID);
-	WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+	if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)))
+		WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx);
+	else
+		WARN(true, "Invalid as_nr(%d)", kctx->as_nr);
 
 	atomic_inc(&kctx->refcount);
 }
@@ -182,16 +190,17 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
 
 	new_ref_count = atomic_dec_return(&kctx->refcount);
 	if (new_ref_count == 0) {
-		kbdev->as_free |= (1u << kctx->as_nr);
-		if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
-			KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(
-				kbdev, kctx->id);
-			kbdev->as_to_kctx[kctx->as_nr] = NULL;
-			kctx->as_nr = KBASEP_AS_NR_INVALID;
-			kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
+		if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) {
+			kbdev->as_free |= (1u << kctx->as_nr);
+			if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
+				KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id);
+				kbdev->as_to_kctx[kctx->as_nr] = NULL;
+				kctx->as_nr = KBASEP_AS_NR_INVALID;
+				kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT);
 #if !MALI_USE_CSF
-			kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+				kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
 #endif
+			}
 		}
 	}
 
@@ -201,13 +210,14 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
 {
 	struct kbase_device *const kbdev = kctx->kbdev;
+	unsigned long flags;
 
-	lockdep_assert_held(&kbdev->mmu_hw_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	WARN_ON(atomic_read(&kctx->refcount) != 0);
 
-	if (kctx->as_nr != KBASEP_AS_NR_INVALID) {
+	if ((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS)) {
 		if (kbdev->pm.backend.gpu_powered)
 			kbase_mmu_disable(kctx);
 
@@ -215,6 +225,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
 		kbdev->as_to_kctx[kctx->as_nr] = NULL;
 		kctx->as_nr = KBASEP_AS_NR_INVALID;
 	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
 }
 
 void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
index f787cc34ba48..5a8d17547b7b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_ctx_sched.h
@@ -59,6 +59,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev);
  */
 void kbase_ctx_sched_term(struct kbase_device *kbdev);
 
+/**
+ * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling
+ * @kctx: The context to initialize
+ *
+ * This must be called during context initialization before any other context
+ * scheduling functions are called on @kctx
+ */
+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx);
+
 /**
  * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
  * @kctx: The context to which to retain a reference
@@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
  * This function should be called when a context is being destroyed. The
  * context must no longer have any reference. If it has been assigned an
  * address space before then the AS will be unprogrammed.
- *
- * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
- * held whilst calling this function.
  */
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_defs.h b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
index 15fa0d71387a..80f76145e393 100755
--- a/drivers/gpu/arm/bifrost/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_defs.h
@@ -650,7 +650,6 @@ struct kbase_process {
  * struct kbase_mem_migrate - Object representing an instance for managing
  *                            page migration.
  *
- * @mapping:          Pointer to address space struct used for page migration.
  * @free_pages_list:  List of deferred pages to free. Mostly used when page migration
  *                    is enabled. Pages in memory pool that require migrating
  *                    will be freed instead. However page cannot be freed
@@ -661,13 +660,17 @@ struct kbase_process {
  * @free_pages_workq: Work queue to process the work items queued to free
  *                    pages in @free_pages_list.
  * @free_pages_work:  Work item to free pages in @free_pages_list.
+ * @inode:            Pointer to inode whose address space operations are used
+ *                    for page migration purposes.
  */
 struct kbase_mem_migrate {
-	struct address_space *mapping;
 	struct list_head free_pages_list;
 	spinlock_t free_pages_lock;
 	struct workqueue_struct *free_pages_workq;
 	struct work_struct free_pages_work;
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	struct inode *inode;
+#endif
 };
 
 /**
@@ -709,6 +712,10 @@ struct kbase_mem_migrate {
  * @opp_table:             Pointer to the device OPP structure maintaining the
  *                         link to OPPs attached to a device. This is obtained
  *                         after setting regulator names for the device.
+ * @token:                 Integer replacement for opp_table in kernel versions
+ *                         6 and greater. Value is a token id number when 0 or greater,
+ *                         and a linux errno when negative. Must be initialised
+ *                         to an non-zero value as 0 is valid token id.
  * @devname:               string containing the name used for GPU device instance,
  *                         miscellaneous device is registered using the same name.
  * @id:                    Unique identifier for the device, indicates the number of
@@ -906,6 +913,10 @@ struct kbase_mem_migrate {
  *                         GPU2019-3878. PM state machine is invoked after
  *                         clearing this flag and @hwaccess_lock is used to
  *                         serialize the access.
+ * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction
+ *                         and cleared after the transaction completes. PM L2 state is
+ *                         prevented from entering powering up/down transitions when the
+ *                         flag is set, @hwaccess_lock is used to serialize the access.
  * @poweroff_pending:      Set when power off operation for GPU is started, reset when
  *                         power on for GPU is started.
  * @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -986,6 +997,10 @@ struct kbase_mem_migrate {
  * @oom_notifier_block:     notifier_block containing kernel-registered out-of-
  *                          memory handler.
  * @mem_migrate:            Per device object for managing page migration.
+ * @live_fence_metadata:    Count of live fence metadata structures created by
+ *                          KCPU queue. These structures may outlive kbase module
+ *                          itself. Therefore, in such a case, a warning should be
+ *                          be produced.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1010,14 +1025,16 @@ struct kbase_device {
 #if IS_ENABLED(CONFIG_REGULATOR)
 	struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
 	unsigned int nr_regulators;
-#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	int token;
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	struct opp_table *opp_table;
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
 #endif /* CONFIG_REGULATOR */
 	char devname[DEVNAME_SIZE];
 	u32  id;
 
-#if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
+#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
 	void *model;
 	struct kmem_cache *irq_slab;
 	struct workqueue_struct *irq_workq;
@@ -1025,7 +1042,7 @@ struct kbase_device {
 	atomic_t serving_gpu_irq;
 	atomic_t serving_mmu_irq;
 	spinlock_t reg_op_lock;
-#endif /* CONFIG_MALI_BIFROST_NO_MALI */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	struct kbase_pm_device_data pm;
 
 	struct kbase_mem_pool_group mem_pools;
@@ -1186,6 +1203,7 @@ struct kbase_device {
 #if MALI_USE_CSF
 	bool mmu_hw_operation_in_progress;
 #endif
+	bool mmu_page_migrate_in_progress;
 	bool poweroff_pending;
 
 	bool infinite_cache_active_default;
@@ -1286,6 +1304,10 @@ struct kbase_device {
 #endif
 
 	struct kbase_mem_migrate mem_migrate;
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	atomic_t live_fence_metadata;
+#endif
 };
 
 /**
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence.h b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
index dfe33e52b4ce..25986f604c6c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence.h
@@ -33,6 +33,49 @@
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
 
+#if MALI_USE_CSF
+/* Maximum number of characters in DMA fence timeline name. */
+#define MAX_TIMELINE_NAME (32)
+
+/**
+ * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
+ *                                    information about KCPU queue. One instance per KCPU
+ *                                    queue.
+ *
+ * @refcount:       Atomic value to keep track of number of references to an instance.
+ *                  An instance can outlive the KCPU queue itself.
+ * @kbdev:          Pointer to Kbase device.
+ * @kctx_id:        Kbase context ID.
+ * @timeline_name:  String of timeline name for associated fence object.
+ */
+struct kbase_kcpu_dma_fence_meta {
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	atomic_t refcount;
+#else
+	refcount_t refcount;
+#endif
+	struct kbase_device *kbdev;
+	int kctx_id;
+	char timeline_name[MAX_TIMELINE_NAME];
+};
+
+/**
+ * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a
+ *                               reference to metadata containing more informaiton about it.
+ *
+ * @base:      Fence object itself.
+ * @metadata:  Pointer to metadata structure.
+ */
+struct kbase_kcpu_dma_fence {
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence base;
+#else
+	struct dma_fence base;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+	struct kbase_kcpu_dma_fence_meta *metadata;
+};
+#endif
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 extern const struct fence_ops kbase_fence_ops;
 #else
@@ -167,12 +210,56 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
  */
 #define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
 
+#if MALI_USE_CSF
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence)
+#else
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence)
+#endif
+{
+	if (fence->ops == &kbase_fence_ops)
+		return (struct kbase_kcpu_dma_fence *)fence;
+
+	return NULL;
+}
+
+static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
+{
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	if (atomic_dec_and_test(&metadata->refcount)) {
+#else
+	if (refcount_dec_and_test(&metadata->refcount)) {
+#endif
+		atomic_dec(&metadata->kbdev->live_fence_metadata);
+		kfree(metadata);
+	}
+}
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_kcpu_dma_fence_put(struct fence *fence)
+#else
+static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence)
+#endif
+{
+	struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence);
+
+	if (kcpu_fence)
+		kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+}
+#endif /* MALI_USE_CSF */
+
 /**
  * kbase_fence_put() - Releases a reference to a fence
  * @fence: Fence to release reference for.
  */
-#define kbase_fence_put(fence) dma_fence_put(fence)
-
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_fence_put(struct fence *fence)
+#else
+static inline void kbase_fence_put(struct dma_fence *fence)
+#endif
+{
+	dma_fence_put(fence);
+}
 
 #endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
 
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
index be141553c674..25b4c9c03b53 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_fence_ops.c
@@ -21,7 +21,7 @@
 
 #include <linux/atomic.h>
 #include <linux/list.h>
-#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
 #include <mali_kbase.h>
 
 static const char *
@@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence)
 kbase_fence_get_timeline_name(struct dma_fence *fence)
 #endif
 {
+#if MALI_USE_CSF
+	struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+	return kcpu_fence->metadata->timeline_name;
+#else
 	return kbase_timeline_name;
+#endif /* MALI_USE_CSF */
 }
 
 static bool
@@ -62,24 +68,44 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
 #endif
 {
 #if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
-	snprintf(str, size, "%u", fence->seqno);
+	const char *format = "%u";
 #else
-	snprintf(str, size, "%llu", fence->seqno);
+	const char *format = "%llu";
 #endif
+	if (unlikely(!scnprintf(str, size, format, fence->seqno)))
+		pr_err("Fail to encode fence seqno to string");
 }
 
+#if MALI_USE_CSF
+static void
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+kbase_fence_release(struct fence *fence)
+#else
+kbase_fence_release(struct dma_fence *fence)
+#endif
+{
+	struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+	kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+	kfree(kcpu_fence);
+}
+#endif
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
-const struct fence_ops kbase_fence_ops = {
-	.wait = fence_default_wait,
+const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait,
 #else
 extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
-const struct dma_fence_ops kbase_fence_ops = {
-	.wait = dma_fence_default_wait,
+const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait,
+#endif
+					   .get_driver_name = kbase_fence_get_driver_name,
+					   .get_timeline_name = kbase_fence_get_timeline_name,
+					   .enable_signaling = kbase_fence_enable_signaling,
+#if MALI_USE_CSF
+					   .fence_value_str = kbase_fence_fence_value_str,
+					   .release = kbase_fence_release
+#else
+					    .fence_value_str = kbase_fence_fence_value_str
 #endif
-	.get_driver_name = kbase_fence_get_driver_name,
-	.get_timeline_name = kbase_fence_get_timeline_name,
-	.enable_signaling = kbase_fence_enable_signaling,
-	.fence_value_str = kbase_fence_fence_value_str
 };
-
+KBASE_EXPORT_TEST_API(kbase_fence_ops);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
index 0282aaf8eb3a..7a7d17ea5f26 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_gpuprops.c
@@ -311,7 +311,6 @@ static void kbase_gpuprops_calculate_props(
 	struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
 {
 	int i;
-	u32 gpu_id;
 
 	/* Populate the base_gpu_props structure */
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -361,49 +360,23 @@ static void kbase_gpuprops_calculate_props(
 		gpu_props->thread_props.tls_alloc =
 				gpu_props->raw_props.thread_tls_alloc;
 
-	/* MIDHARC-2364 was intended for tULx.
-	 * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
-	 */
-	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-
 #if MALI_USE_CSF
-	CSTD_UNUSED(gpu_id);
 	gpu_props->thread_props.max_registers =
-		KBASE_UBFX32(gpu_props->raw_props.thread_features,
-			     0U, 22);
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
 	gpu_props->thread_props.impl_tech =
-		KBASE_UBFX32(gpu_props->raw_props.thread_features,
-			     22U, 2);
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
 	gpu_props->thread_props.max_task_queue =
-		KBASE_UBFX32(gpu_props->raw_props.thread_features,
-			     24U, 8);
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
 	gpu_props->thread_props.max_thread_group_split = 0;
 #else
-	if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
-		gpu_props->thread_props.max_registers =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     0U, 22);
-		gpu_props->thread_props.impl_tech =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     22U, 2);
-		gpu_props->thread_props.max_task_queue =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     24U, 8);
-		gpu_props->thread_props.max_thread_group_split = 0;
-	} else {
-		gpu_props->thread_props.max_registers =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     0U, 16);
-		gpu_props->thread_props.max_task_queue =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     16U, 8);
-		gpu_props->thread_props.max_thread_group_split =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     24U, 6);
-		gpu_props->thread_props.impl_tech =
-			KBASE_UBFX32(gpu_props->raw_props.thread_features,
-				     30U, 2);
-	}
+	gpu_props->thread_props.max_registers =
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue =
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split =
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech =
+		KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
 #endif
 
 	/* If values are not specified, then use defaults */
@@ -539,7 +512,7 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
 static u32 l2_hash_values[ASN_HASH_COUNT] = {
 	0,
 };
-static int num_override_l2_hash_values;
+static unsigned int num_override_l2_hash_values;
 module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
 MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
 
@@ -593,7 +566,7 @@ kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
 
 	kbdev->l2_hash_values_override = false;
 	if (num_override_l2_hash_values) {
-		int i;
+		unsigned int i;
 
 		kbdev->l2_hash_values_override = true;
 		for (i = 0; i < num_override_l2_hash_values; i++)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hw.c b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
index b6a8a2e5608f..c658fb79429b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hw.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hw.c
@@ -68,9 +68,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 	case GPU_ID2_PRODUCT_TBAX:
 		features = base_hw_features_tBAx;
 		break;
-	case GPU_ID2_PRODUCT_TDUX:
-		features = base_hw_features_tDUx;
-		break;
 	case GPU_ID2_PRODUCT_TODX:
 	case GPU_ID2_PRODUCT_LODX:
 		features = base_hw_features_tODx;
@@ -211,10 +208,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		    { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 },
 		    { U32_MAX, NULL } } },
 
-		{ GPU_ID2_PRODUCT_TDUX,
-		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 },
-		    { U32_MAX, NULL } } },
-
 		{ GPU_ID2_PRODUCT_TODX,
 		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 },
 		    { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 },
@@ -235,6 +228,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 
 		{ GPU_ID2_PRODUCT_TTUX,
 		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
@@ -393,9 +387,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		case GPU_ID2_PRODUCT_TBAX:
 			issues = base_hw_issues_model_tBAx;
 			break;
-		case GPU_ID2_PRODUCT_TDUX:
-			issues = base_hw_issues_model_tDUx;
-			break;
 		case GPU_ID2_PRODUCT_TODX:
 		case GPU_ID2_PRODUCT_LODX:
 			issues = base_hw_issues_model_tODx;
@@ -414,7 +405,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		case GPU_ID2_PRODUCT_LTIX:
 			issues = base_hw_issues_model_tTIx;
 			break;
-
 		default:
 			dev_err(kbdev->dev,
 				"Unknown GPU ID %x", gpu_id);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
index 124a6d643e42..ca77c192deea 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_hwaccess_jm.h
@@ -97,8 +97,8 @@ bool kbase_backend_use_ctx(struct kbase_device *kbdev,
  * Return: true if context is now active, false otherwise (ie if context does
  *	   not have an address space assigned)
  */
-bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
-					struct kbase_context *kctx, int js);
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, struct kbase_context *kctx,
+				 unsigned int js);
 
 /**
  * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
@@ -183,8 +183,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
  *
  * Return: Atom currently at the head of slot @js, or NULL
  */
-struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
-					int js);
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, unsigned int js);
 
 /**
  * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
@@ -194,7 +193,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
  *
  * Return: Number of atoms currently on slot
  */
-int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, unsigned int js);
 
 /**
  * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
@@ -204,7 +203,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
  *
  * Return: Number of atoms currently on slot @js that are currently on the GPU.
  */
-int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, unsigned int js);
 
 /**
  * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
@@ -233,7 +232,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
  *
  * Return: Number of jobs that can be submitted.
  */
-int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js);
 
 /**
  * kbase_job_check_leave_disjoint - potentially leave disjoint state
@@ -287,8 +286,8 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
  * Context:
  *   The job slot lock must be held when calling this function.
  */
-void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
-				struct kbase_jd_atom *target_katom);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
+			     struct kbase_jd_atom *target_katom);
 
 /**
  * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jd.c b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
index f5faa92525c5..f44426a736ca 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jd.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jd.c
@@ -28,6 +28,11 @@
 #include <linux/version.h>
 #include <linux/ratelimit.h>
 #include <linux/priority_control_manager.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
 
 #include <mali_kbase_jm.h>
 #include <mali_kbase_kinstr_jm.h>
@@ -1074,11 +1079,19 @@ int kbase_jd_submit(struct kbase_context *kctx,
 		return -EINVAL;
 	}
 
+	if (nr_atoms > BASE_JD_ATOM_COUNT) {
+		dev_dbg(kbdev->dev, "Invalid attempt to submit %u atoms at once for kctx %d_%d",
+			nr_atoms, kctx->tgid, kctx->id);
+		return -EINVAL;
+	}
+
 	/* All atoms submitted in this call have the same flush ID */
 	latest_flush = kbase_backend_get_current_flush_id(kbdev);
 
 	for (i = 0; i < nr_atoms; i++) {
-		struct base_jd_atom user_atom;
+		struct base_jd_atom user_atom = {
+			.seq_nr = 0,
+		};
 		struct base_jd_fragment user_jc_incr;
 		struct kbase_jd_atom *katom;
 
@@ -1202,6 +1215,12 @@ while (false)
 		kbase_disjoint_event_potential(kbdev);
 
 		mutex_unlock(&jctx->lock);
+		if (fatal_signal_pending(current)) {
+			dev_dbg(kbdev->dev, "Fatal signal pending for kctx %d_%d",
+				kctx->tgid, kctx->id);
+			/* We're being killed so the result code doesn't really matter  */
+			return 0;
+		}
 	}
 
 	if (need_to_try_schedule_context)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_jm.c
index 6cbd6f1a423e..1ac5cd3eafff 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,15 +37,13 @@
  *
  * Return: true if slot can still be submitted on, false if slot is now full.
  */
-static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
-				int nr_jobs_to_submit)
+static bool kbase_jm_next_job(struct kbase_device *kbdev, unsigned int js, int nr_jobs_to_submit)
 {
 	struct kbase_context *kctx;
 	int i;
 
 	kctx = kbdev->hwaccess.active_kctx[js];
-	dev_dbg(kbdev->dev,
-		"Trying to run the next %d jobs in kctx %pK (s:%d)\n",
+	dev_dbg(kbdev->dev, "Trying to run the next %d jobs in kctx %pK (s:%u)\n",
 		nr_jobs_to_submit, (void *)kctx, js);
 
 	if (!kctx)
@@ -60,7 +58,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
 		kbase_backend_run_atom(kbdev, katom);
 	}
 
-	dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js);
+	dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%u)\n", js);
 	return false;
 }
 
@@ -72,7 +70,7 @@ u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
 	dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask);
 
 	while (js_mask) {
-		int js = ffs(js_mask) - 1;
+		unsigned int js = ffs(js_mask) - 1;
 		int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
 
 		if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
@@ -111,14 +109,14 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
 		if (kbdev->hwaccess.active_kctx[js] == kctx) {
-			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
-					(void *)kctx, js);
+			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n", (void *)kctx,
+				js);
 			kbdev->hwaccess.active_kctx[js] = NULL;
 		}
 	}
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_js.c b/drivers/gpu/arm/bifrost/mali_kbase_js.c
index a64d7327a76b..78f2d7d47b3b 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_js.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_js.c
@@ -77,8 +77,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 		struct kbase_device *kbdev, struct kbase_context *kctx,
 		struct kbasep_js_atom_retained_state *katom_retained_state);
 
-static int kbase_js_get_slot(struct kbase_device *kbdev,
-				struct kbase_jd_atom *katom);
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
 
 static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 				     kbasep_js_ctx_job_cb *callback);
@@ -151,8 +150,7 @@ static void kbase_js_sync_timers(struct kbase_device *kbdev)
  *
  * Return: true if there are no atoms to pull, false otherwise.
  */
-static inline bool
-jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
+static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, unsigned int js, int prio)
 {
 	bool none_to_pull;
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
@@ -161,9 +159,8 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 
 	none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree);
 
-	dev_dbg(kctx->kbdev->dev,
-		"Slot %d (prio %d) is %spullable in kctx %pK\n",
-		js, prio, none_to_pull ? "not " : "", kctx);
+	dev_dbg(kctx->kbdev->dev, "Slot %u (prio %d) is %spullable in kctx %pK\n", js, prio,
+		none_to_pull ? "not " : "", kctx);
 
 	return none_to_pull;
 }
@@ -179,8 +176,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
  * Return: true if the ring buffers for all priorities have no pullable atoms,
  *	   false otherwise.
  */
-static inline bool
-jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
+static inline bool jsctx_rb_none_to_pull(struct kbase_context *kctx, unsigned int js)
 {
 	int prio;
 
@@ -212,8 +208,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
  *
  * The HW access lock must always be held when calling this function.
  */
-static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
-				     int prio, kbasep_js_ctx_job_cb *callback)
+static void jsctx_queue_foreach_prio(struct kbase_context *kctx, unsigned int js, int prio,
+				     kbasep_js_ctx_job_cb *callback)
 {
 	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
@@ -272,7 +268,7 @@ static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js,
  * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
  * for each entry, and remove the entry from the queue.
  */
-static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
+static inline void jsctx_queue_foreach(struct kbase_context *kctx, unsigned int js,
 				       kbasep_js_ctx_job_cb *callback)
 {
 	int prio;
@@ -293,15 +289,14 @@ static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js,
  *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
-static inline struct kbase_jd_atom *
-jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+static inline struct kbase_jd_atom *jsctx_rb_peek_prio(struct kbase_context *kctx, unsigned int js,
+						       int prio)
 {
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 	struct rb_node *node;
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-	dev_dbg(kctx->kbdev->dev,
-		"Peeking runnable tree of kctx %pK for prio %d (s:%d)\n",
+	dev_dbg(kctx->kbdev->dev, "Peeking runnable tree of kctx %pK for prio %d (s:%u)\n",
 		(void *)kctx, prio, js);
 
 	node = rb_first(&rb->runnable_tree);
@@ -326,8 +321,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
-static inline struct kbase_jd_atom *
-jsctx_rb_peek(struct kbase_context *kctx, int js)
+static inline struct kbase_jd_atom *jsctx_rb_peek(struct kbase_context *kctx, unsigned int js)
 {
 	int prio;
 
@@ -358,7 +352,7 @@ static inline void
 jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	int prio = katom->sched_priority;
-	int js = katom->slot_nr;
+	unsigned int js = katom->slot_nr;
 	struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
@@ -377,14 +371,14 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
 	int prio = katom->sched_priority;
-	int js = katom->slot_nr;
+	unsigned int js = katom->slot_nr;
 	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 	struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL;
 
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
-	dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n",
-		(void *)katom, (void *)kctx, js);
+	dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%u)\n", (void *)katom,
+		(void *)kctx, js);
 
 	while (*new) {
 		struct kbase_jd_atom *entry = container_of(*new,
@@ -425,15 +419,11 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 	jsctx_tree_add(kctx, katom);
 }
 
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
-					int js,
-					bool is_scheduled);
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled);
 static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js);
+						  struct kbase_context *kctx, unsigned int js);
 static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js);
+						    struct kbase_context *kctx, unsigned int js);
 
 typedef bool(katom_ordering_func)(const struct kbase_jd_atom *,
 				  const struct kbase_jd_atom *);
@@ -645,6 +635,8 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
+	kbase_ctx_sched_init_ctx(kctx);
+
 	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
 		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
 
@@ -683,7 +675,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev;
 	struct kbasep_js_kctx_info *js_kctx_info;
-	int js;
+	unsigned int js;
 	bool update_ctx_count = false;
 	unsigned long flags;
 	CSTD_UNUSED(js_kctx_info);
@@ -722,6 +714,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 		kbase_backend_ctx_count_changed(kbdev);
 		mutex_unlock(&kbdev->js_data.runpool_mutex);
 	}
+
+	kbase_ctx_sched_remove_ctx(kctx);
 }
 
 /*
@@ -729,8 +723,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
  */
 
 /* Should not normally use directly - use kbase_jsctx_slot_atom_pulled_dec() instead */
-static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
-						int js, int sched_prio)
+static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx, unsigned int js,
+						int sched_prio)
 {
 	struct kbase_jsctx_slot_tracking *slot_tracking =
 		&kctx->slot_tracking[js];
@@ -742,7 +736,7 @@ static void kbase_jsctx_slot_prio_blocked_clear(struct kbase_context *kctx,
 				      NULL, 0, js, (unsigned int)sched_prio);
 }
 
-static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
+static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, unsigned int js)
 {
 	return atomic_read(&kctx->slot_tracking[js].atoms_pulled);
 }
@@ -752,7 +746,7 @@ static int kbase_jsctx_slot_atoms_pulled(struct kbase_context *kctx, int js)
  * - that priority level is blocked
  * - or, any higher priority level is blocked
  */
-static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
+static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, unsigned int js,
 					     int sched_prio)
 {
 	struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -792,7 +786,7 @@ static bool kbase_jsctx_slot_prio_is_blocked(struct kbase_context *kctx, int js,
 static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
 					    const struct kbase_jd_atom *katom)
 {
-	int js = katom->slot_nr;
+	unsigned int js = katom->slot_nr;
 	int sched_prio = katom->sched_priority;
 	struct kbase_jsctx_slot_tracking *slot_tracking =
 		&kctx->slot_tracking[js];
@@ -801,7 +795,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 
 	WARN(kbase_jsctx_slot_prio_is_blocked(kctx, js, sched_prio),
-	     "Should not have pulled atoms for slot %d from a context that is blocked at priority %d or higher",
+	     "Should not have pulled atoms for slot %u from a context that is blocked at priority %d or higher",
 	     js, sched_prio);
 
 	nr_atoms_pulled = atomic_inc_return(&kctx->atoms_pulled_all_slots);
@@ -830,7 +824,7 @@ static int kbase_jsctx_slot_atom_pulled_inc(struct kbase_context *kctx,
 static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
 					     const struct kbase_jd_atom *katom)
 {
-	int js = katom->slot_nr;
+	unsigned int js = katom->slot_nr;
 	int sched_prio = katom->sched_priority;
 	int atoms_pulled_pri;
 	struct kbase_jsctx_slot_tracking *slot_tracking =
@@ -879,14 +873,12 @@ static bool kbase_jsctx_slot_atom_pulled_dec(struct kbase_context *kctx,
  * Return: true if caller should call kbase_backend_ctx_count_changed()
  */
 static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js)
+						  struct kbase_context *kctx, unsigned int js)
 {
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n",
-		(void *)kctx, js);
+	dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -921,14 +913,13 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev,
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_add_pullable_head_nolock(
-		struct kbase_device *kbdev, struct kbase_context *kctx, int js)
+static bool kbase_js_ctx_list_add_pullable_head_nolock(struct kbase_device *kbdev,
+						       struct kbase_context *kctx, unsigned int js)
 {
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n",
-		(void *)kctx, js);
+	dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%u)\n", (void *)kctx, js);
 
 	if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
@@ -966,8 +957,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock(
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
 static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js)
+						struct kbase_context *kctx, unsigned int js)
 {
 	bool ret;
 	unsigned long flags;
@@ -997,14 +987,12 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
 static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
-						struct kbase_context *kctx,
-						int js)
+						    struct kbase_context *kctx, unsigned int js)
 {
 	bool ret = false;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n",
-		(void *)kctx, js);
+	dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%u)\n", (void *)kctx, js);
 
 	list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
 		&kbdev->js_data.ctx_list_unpullable[js][kctx->priority]);
@@ -1039,9 +1027,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev,
  *
  * Return:  true if caller should call kbase_backend_ctx_count_changed()
  */
-static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					int js)
+static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, struct kbase_context *kctx,
+					    unsigned int js)
 {
 	bool ret = false;
 
@@ -1077,9 +1064,8 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev,
  * Return:  Context to use for specified slot.
  *          NULL if no contexts present for specified slot
  */
-static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
-						struct kbase_device *kbdev,
-						int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(struct kbase_device *kbdev,
+							       unsigned int js)
 {
 	struct kbase_context *kctx;
 	int i;
@@ -1095,9 +1081,8 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
 				jctx.sched_info.ctx.ctx_list_entry[js]);
 
 		list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
-		dev_dbg(kbdev->dev,
-			"Popped %pK from the pullable queue (s:%d)\n",
-			(void *)kctx, js);
+		dev_dbg(kbdev->dev, "Popped %pK from the pullable queue (s:%u)\n", (void *)kctx,
+			js);
 		return kctx;
 	}
 	return NULL;
@@ -1112,8 +1097,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock(
  * Return:  Context to use for specified slot.
  *          NULL if no contexts present for specified slot
  */
-static struct kbase_context *kbase_js_ctx_list_pop_head(
-		struct kbase_device *kbdev, int js)
+static struct kbase_context *kbase_js_ctx_list_pop_head(struct kbase_device *kbdev, unsigned int js)
 {
 	struct kbase_context *kctx;
 	unsigned long flags;
@@ -1137,8 +1121,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head(
  * Return:         true if context can be pulled from on specified slot
  *                 false otherwise
  */
-static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
-					bool is_scheduled)
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, unsigned int js, bool is_scheduled)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_jd_atom *katom;
@@ -1157,8 +1140,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 	}
 	katom = jsctx_rb_peek(kctx, js);
 	if (!katom) {
-		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
-			(void *)kctx, js);
+		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
 		return false; /* No pullable atoms */
 	}
 	if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
@@ -1166,7 +1148,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 			kctx->kbdev, JS_SLOT_PRIO_IS_BLOCKED, kctx, katom,
 			katom->jc, js, (unsigned int)katom->sched_priority);
 		dev_dbg(kbdev->dev,
-			"JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+			"JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
 			(void *)kctx, katom->sched_priority, js);
 		return false;
 	}
@@ -1187,14 +1169,14 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 			kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) {
 			dev_dbg(kbdev->dev,
-				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
 				(void *)katom, js);
 			return false;
 		}
 	}
 
-	dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n",
-		(void *)katom, (void *)kctx, js);
+	dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%u)\n", (void *)katom,
+		(void *)kctx, js);
 
 	return true;
 }
@@ -1205,7 +1187,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 	struct kbase_device *kbdev = kctx->kbdev;
 	bool ret = true;
 	bool has_dep = false, has_x_dep = false;
-	int js = kbase_js_get_slot(kbdev, katom);
+	unsigned int js = kbase_js_get_slot(kbdev, katom);
 	int prio = katom->sched_priority;
 	int i;
 
@@ -1213,7 +1195,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 		struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
 
 		if (dep_atom) {
-			int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+			unsigned int dep_js = kbase_js_get_slot(kbdev, dep_atom);
 			int dep_prio = dep_atom->sched_priority;
 
 			dev_dbg(kbdev->dev,
@@ -1368,7 +1350,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
-	int js;
+	unsigned int js;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -2074,9 +2056,8 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev)
 	kbase_backend_timeouts_changed(kbdev);
 }
 
-static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
-					struct kbase_context *kctx,
-					int js)
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+				   unsigned int js)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbasep_js_kctx_info *js_kctx_info;
@@ -2084,7 +2065,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	bool kctx_suspended = false;
 	int as_nr;
 
-	dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js);
+	dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%u)\n", kctx, js);
 
 	js_devdata = &kbdev->js_data;
 	js_kctx_info = &kctx->jctx.sched_info;
@@ -2111,8 +2092,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 			WARN_ON(as_nr == KBASEP_AS_NR_INVALID);
 		}
 	}
-	if (as_nr == KBASEP_AS_NR_INVALID)
-		return false; /* No address spaces currently available */
+	if ((as_nr < 0) || (as_nr >= BASE_MAX_NR_AS))
+		return false; /* No address space currently available */
 
 	/*
 	 * Atomic transaction on the Context and Run Pool begins
@@ -2219,9 +2200,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 	return true;
 }
 
-static bool kbase_js_use_ctx(struct kbase_device *kbdev,
-				struct kbase_context *kctx,
-				int js)
+static bool kbase_js_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx,
+			     unsigned int js)
 {
 	unsigned long flags;
 
@@ -2229,9 +2209,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev,
 
 	if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) &&
 			kbase_backend_use_ctx_sched(kbdev, kctx, js)) {
-
-		dev_dbg(kbdev->dev,
-			"kctx %pK already has ASID - mark as active (s:%d)\n",
+		dev_dbg(kbdev->dev, "kctx %pK already has ASID - mark as active (s:%u)\n",
 			(void *)kctx, js);
 
 		if (kbdev->hwaccess.active_kctx[js] != kctx) {
@@ -2498,8 +2476,7 @@ bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
 	return true;
 }
 
-static int kbase_js_get_slot(struct kbase_device *kbdev,
-				struct kbase_jd_atom *katom)
+static unsigned int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
 {
 	if (katom->core_req & BASE_JD_REQ_JOB_SLOT)
 		return katom->jobslot;
@@ -2538,11 +2515,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 			(katom->pre_dep && (katom->pre_dep->atom_flags &
 			KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) {
 		int prio = katom->sched_priority;
-		int js = katom->slot_nr;
+		unsigned int js = katom->slot_nr;
 		struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
-		dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n",
-			(void *)katom, js);
+		dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%u)\n", (void *)katom, js);
 
 		list_add_tail(&katom->queue, &queue->x_dep_head);
 		katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST;
@@ -2633,8 +2609,8 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom)
  *
  * Context: Caller must hold the HW access lock
  */
-static void kbase_js_evict_deps(struct kbase_context *kctx,
-				struct kbase_jd_atom *katom, int js, int prio)
+static void kbase_js_evict_deps(struct kbase_context *kctx, struct kbase_jd_atom *katom,
+				unsigned int js, int prio)
 {
 	struct kbase_jd_atom *x_dep = katom->x_post_dep;
 	struct kbase_jd_atom *next_katom = katom->post_dep;
@@ -2666,7 +2642,7 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
 	}
 }
 
-struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, unsigned int js)
 {
 	struct kbase_jd_atom *katom;
 	struct kbasep_js_device_data *js_devdata;
@@ -2676,8 +2652,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 	KBASE_DEBUG_ASSERT(kctx);
 
 	kbdev = kctx->kbdev;
-	dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n",
-		(void *)kctx, js);
+	dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%u)\n", (void *)kctx, js);
 
 	js_devdata = &kbdev->js_data;
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -2696,13 +2671,12 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
 	katom = jsctx_rb_peek(kctx, js);
 	if (!katom) {
-		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n",
-			(void *)kctx, js);
+		dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%u)\n", (void *)kctx, js);
 		return NULL;
 	}
 	if (kbase_jsctx_slot_prio_is_blocked(kctx, js, katom->sched_priority)) {
 		dev_dbg(kbdev->dev,
-			"JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%d)\n",
+			"JS: kctx %pK is blocked from submitting atoms at priority %d and lower (s:%u)\n",
 			(void *)kctx, katom->sched_priority, js);
 		return NULL;
 	}
@@ -2736,7 +2710,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 		if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
 				kbase_backend_nr_atoms_on_slot(kbdev, js)) {
 			dev_dbg(kbdev->dev,
-				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n",
+				"JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%u)\n",
 				(void *)katom, js);
 			return NULL;
 		}
@@ -2759,7 +2733,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
 	katom->ticks = 0;
 
-	dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n",
+	dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%u)\n",
 		(void *)katom, (void *)kctx, js);
 
 	return katom;
@@ -3362,7 +3336,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 	if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
 	    !kbase_jsctx_atoms_pulled(kctx) &&
 	    !kbase_ctx_flag(kctx, KCTX_DYING)) {
-		int js;
+		unsigned int js;
 
 		kbasep_js_set_submit_allowed(js_devdata, kctx);
 
@@ -3374,7 +3348,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
 		}
 	} else if (katom->x_post_dep &&
 			kbasep_js_is_submit_allowed(js_devdata, kctx)) {
-		int js;
+		unsigned int js;
 
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 			if (kbase_js_ctx_pullable(kctx, js, true))
@@ -3604,13 +3578,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom)
 	return false;
 }
 
-void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+void kbase_js_sched(struct kbase_device *kbdev, unsigned int js_mask)
 {
 	struct kbasep_js_device_data *js_devdata;
 	struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS];
 	bool timer_sync = false;
 	bool ctx_waiting[BASE_JM_MAX_NR_SLOTS];
-	int js;
+	unsigned int js;
 
 	KBASE_TLSTREAM_TL_JS_SCHED_START(kbdev, 0);
 
@@ -3639,24 +3613,20 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 
 			if (!kctx) {
 				js_mask &= ~(1 << js);
-				dev_dbg(kbdev->dev,
-					"No kctx on pullable list (s:%d)\n",
-					js);
+				dev_dbg(kbdev->dev, "No kctx on pullable list (s:%u)\n", js);
 				break;
 			}
 
 			if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) {
 				context_idle = true;
 
-				dev_dbg(kbdev->dev,
-					"kctx %pK is not active (s:%d)\n",
-					(void *)kctx, js);
+				dev_dbg(kbdev->dev, "kctx %pK is not active (s:%u)\n", (void *)kctx,
+					js);
 
 				if (kbase_pm_context_active_handle_suspend(
 									kbdev,
 				      KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
-					dev_dbg(kbdev->dev,
-						"Suspend pending (s:%d)\n", js);
+					dev_dbg(kbdev->dev, "Suspend pending (s:%u)\n", js);
 					/* Suspend pending - return context to
 					 * queue and stop scheduling
 					 */
@@ -3714,16 +3684,13 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 			kbase_ctx_flag_clear(kctx, KCTX_PULLED);
 
 			if (!kbase_jm_kick(kbdev, 1 << js)) {
-				dev_dbg(kbdev->dev,
-					"No more jobs can be submitted (s:%d)\n",
-					js);
+				dev_dbg(kbdev->dev, "No more jobs can be submitted (s:%u)\n", js);
 				js_mask &= ~(1 << js);
 			}
 			if (!kbase_ctx_flag(kctx, KCTX_PULLED)) {
 				bool pullable;
 
-				dev_dbg(kbdev->dev,
-					"No atoms pulled from kctx %pK (s:%d)\n",
+				dev_dbg(kbdev->dev, "No atoms pulled from kctx %pK (s:%u)\n",
 					(void *)kctx, js);
 
 				pullable = kbase_js_ctx_pullable(kctx, js,
@@ -3807,8 +3774,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
 	for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) {
 		if (kbdev->hwaccess.active_kctx[js] == last_active[js] &&
 				ctx_waiting[js]) {
-			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n",
-					(void *)last_active[js], js);
+			dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%u)\n",
+				(void *)last_active[js], js);
 			kbdev->hwaccess.active_kctx[js] = NULL;
 		}
 	}
@@ -3879,7 +3846,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 	 */
 	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
 		unsigned long flags;
-		int js;
+		unsigned int js;
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
@@ -4003,7 +3970,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 {
 	struct kbase_device *kbdev;
 	unsigned long flags;
-	u32 js;
+	unsigned int js;
 
 	kbdev = kctx->kbdev;
 
@@ -4035,4 +4002,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr
 									    req_priority);
 	return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority);
 }
-
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
index 7b8961679a10..14a730dc5a12 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_jm.c
@@ -48,6 +48,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/wait.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* Define static_assert().
  *
  * The macro was introduced in kernel 5.1. But older vendor kernels may define
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c
index 5fb11b7b94c5..823f9156e19e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_kinstr_prfcnt.c
@@ -36,9 +36,15 @@
 #include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
+#include <linux/overflow.h>
 #include <linux/version_compat_defs.h>
 #include <linux/workqueue.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* The minimum allowed interval between dumps, in nanoseconds
  * (equivalent to 10KHz)
  */
@@ -115,16 +121,31 @@ struct kbase_kinstr_prfcnt_client_config {
 };
 
 /**
- * struct kbase_kinstr_prfcnt_async - Asynchronous sampling operation to
- *                                    carry out for a kinstr_prfcnt_client.
- * @dump_work: Worker for performing asynchronous counter dumps.
- * @user_data: User data for asynchronous dump in progress.
- * @ts_end_ns: End timestamp of most recent async dump.
+ * enum kbase_kinstr_prfcnt_client_init_state - A list of
+ *                                              initialisation states that the
+ *                                              kinstr_prfcnt client can be at
+ *                                              during initialisation. Useful
+ *                                              for terminating a partially
+ *                                              initialised client.
+ *
+ * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised
+ * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session
+ * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map
+ * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer
+ * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array
+ * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client
+ * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue
+ * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised
  */
-struct kbase_kinstr_prfcnt_async {
-	struct work_struct dump_work;
-	u64 user_data;
-	u64 ts_end_ns;
+enum kbase_kinstr_prfcnt_client_init_state {
+	KINSTR_PRFCNT_UNINITIALISED,
+	KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED,
+	KINSTR_PRFCNT_ENABLE_MAP,
+	KINSTR_PRFCNT_DUMP_BUFFER,
+	KINSTR_PRFCNT_SAMPLE_ARRAY,
+	KINSTR_PRFCNT_VIRTUALIZER_CLIENT,
+	KINSTR_PRFCNT_WAITQ_MUTEX,
+	KINSTR_PRFCNT_INITIALISED
 };
 
 /**
@@ -134,9 +155,7 @@ struct kbase_kinstr_prfcnt_async {
  * @hvcli:                Hardware counter virtualizer client.
  * @node:                 Node used to attach this client to list in
  *                        kinstr_prfcnt context.
- * @cmd_sync_lock:        Lock coordinating the reader interface for commands
- *                        that need interacting with the async sample dump
- *                        worker thread.
+ * @cmd_sync_lock:        Lock coordinating the reader interface for commands.
  * @next_dump_time_ns:    Time in ns when this client's next periodic dump must
  *                        occur. If 0, not a periodic client.
  * @dump_interval_ns:     Interval between periodic dumps. If 0, not a periodic
@@ -157,15 +176,10 @@ struct kbase_kinstr_prfcnt_async {
  * @waitq:                Client's notification queue.
  * @sample_size:          Size of the data required for one sample, in bytes.
  * @sample_count:         Number of samples the client is able to capture.
- * @sync_sample_count:    Number of available spaces for synchronous samples.
- *                        It can differ from sample_count if asynchronous
- *                        sample requests are reserving space in the buffer.
  * @user_data:            User data associated with the session.
  *                        This is set when the session is started and stopped.
  *                        This value is ignored for control commands that
  *                        provide another value.
- * @async:                Asynchronous sampling operations to carry out in this
- *                        client's session.
  */
 struct kbase_kinstr_prfcnt_client {
 	struct kbase_kinstr_prfcnt_context *kinstr_ctx;
@@ -186,9 +200,7 @@ struct kbase_kinstr_prfcnt_client {
 	wait_queue_head_t waitq;
 	size_t sample_size;
 	size_t sample_count;
-	atomic_t sync_sample_count;
 	u64 user_data;
-	struct kbase_kinstr_prfcnt_async async;
 };
 
 static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
@@ -423,6 +435,7 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
 	size_t grp, blk, blk_inst;
 	struct prfcnt_metadata **ptr_md = block_meta_base;
 	const struct kbase_hwcnt_metadata *metadata;
+	uint8_t block_idx = 0;
 
 	if (!dst || !*block_meta_base)
 		return -EINVAL;
@@ -431,6 +444,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
 		u8 *dst_blk;
 
+		/* Block indices must be reported with no gaps. */
+		if (blk_inst == 0)
+			block_idx = 0;
+
 		/* Skip unavailable or non-enabled blocks */
 		if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
 		    !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
@@ -444,13 +461,14 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *ena
 			kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
 				kbase_hwcnt_metadata_block_type(metadata, grp,
 								blk));
-		(*ptr_md)->u.block_md.block_idx = (u8)blk_inst;
+		(*ptr_md)->u.block_md.block_idx = block_idx;
 		(*ptr_md)->u.block_md.set = counter_set;
 		(*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
 		(*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr);
 
 		/* update the buf meta data block pointer to next item */
 		(*ptr_md)++;
+		block_idx++;
 	}
 
 	return 0;
@@ -502,33 +520,6 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata(
 	ptr_md->hdr.item_version = 0;
 }
 
-/**
- * kbasep_kinstr_prfcnt_client_output_empty_sample() - Assemble an empty sample
- *                                                     for output.
- * @cli:          Non-NULL pointer to a kinstr_prfcnt client.
- * @buf_idx:      The index to the sample array for saving the sample.
- */
-static void kbasep_kinstr_prfcnt_client_output_empty_sample(
-	struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx)
-{
-	struct kbase_hwcnt_dump_buffer *dump_buf;
-	struct prfcnt_metadata *ptr_md;
-
-	if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
-		return;
-
-	dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
-	ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
-
-	kbase_hwcnt_dump_buffer_zero(dump_buf, &cli->enable_map);
-
-	/* Use end timestamp from most recent async dump */
-	ptr_md->u.sample_md.timestamp_start = cli->async.ts_end_ns;
-	ptr_md->u.sample_md.timestamp_end = cli->async.ts_end_ns;
-
-	kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
-}
-
 /**
  * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output.
  * @cli:          Non-NULL pointer to a kinstr_prfcnt client.
@@ -578,16 +569,11 @@ static void kbasep_kinstr_prfcnt_client_output_sample(
  * @cli:          Non-NULL pointer to a kinstr_prfcnt client.
  * @event_id:     Event type that triggered the dump.
  * @user_data:    User data to return to the user.
- * @async_dump:   Whether this is an asynchronous dump or not.
- * @empty_sample: Sample block data will be 0 if this is true.
  *
  * Return: 0 on success, else error code.
  */
-static int
-kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
-				 enum base_hwcnt_reader_event event_id,
-				 u64 user_data, bool async_dump,
-				 bool empty_sample)
+static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
+					    enum base_hwcnt_reader_event event_id, u64 user_data)
 {
 	int ret;
 	u64 ts_start_ns = 0;
@@ -605,17 +591,11 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
 	/* Check if there is a place to copy HWC block into. Calculate the
 	 * number of available samples count, by taking into account the type
 	 * of dump.
-	 * Asynchronous dumps have the ability to reserve space in the samples
-	 * array for future dumps, unlike synchronous dumps. Because of that,
-	 * the samples count for synchronous dumps is managed by a variable
-	 * called sync_sample_count, that originally is defined as equal to the
-	 * size of the whole array but later decreases every time an
-	 * asynchronous dump request is pending and then re-increased every
-	 * time an asynchronous dump request is completed.
 	 */
-	available_samples_count = async_dump ?
-					  cli->sample_arr.sample_count :
-					  atomic_read(&cli->sync_sample_count);
+	available_samples_count = cli->sample_arr.sample_count;
+	WARN_ON(available_samples_count < 1);
+	/* Reserve one slot to store the implicit sample taken on CMD_STOP */
+	available_samples_count -= 1;
 	if (write_idx - read_idx == available_samples_count) {
 		/* For periodic sampling, the current active dump
 		 * will be accumulated in the next sample, when
@@ -631,38 +611,19 @@ kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
 	 */
 	write_idx %= cli->sample_arr.sample_count;
 
-	if (!empty_sample) {
-		ret = kbase_hwcnt_virtualizer_client_dump(
-			cli->hvcli, &ts_start_ns, &ts_end_ns, &cli->tmp_buf);
-		/* HWC dump error, set the sample with error flag */
-		if (ret)
-			cli->sample_flags |= SAMPLE_FLAG_ERROR;
+	ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns,
+						  &cli->tmp_buf);
+	/* HWC dump error, set the sample with error flag */
+	if (ret)
+		cli->sample_flags |= SAMPLE_FLAG_ERROR;
 
-		/* Make the sample ready and copy it to the userspace mapped buffer */
-		kbasep_kinstr_prfcnt_client_output_sample(
-			cli, write_idx, user_data, ts_start_ns, ts_end_ns);
-	} else {
-		if (!async_dump) {
-			struct prfcnt_metadata *ptr_md;
-			/* User data will not be updated for empty samples. */
-			ptr_md = cli->sample_arr.samples[write_idx].sample_meta;
-			ptr_md->u.sample_md.user_data = user_data;
-		}
-
-		/* Make the sample ready and copy it to the userspace mapped buffer */
-		kbasep_kinstr_prfcnt_client_output_empty_sample(cli, write_idx);
-	}
+	/* Make the sample ready and copy it to the userspace mapped buffer */
+	kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns,
+						  ts_end_ns);
 
 	/* Notify client. Make sure all changes to memory are visible. */
 	wmb();
 	atomic_inc(&cli->write_idx);
-	if (async_dump) {
-		/* Remember the end timestamp of async dump for empty samples */
-		if (!empty_sample)
-			cli->async.ts_end_ns = ts_end_ns;
-
-		atomic_inc(&cli->sync_sample_count);
-	}
 	wake_up_interruptible(&cli->waitq);
 	/* Reset the flags for the next sample dump */
 	cli->sample_flags = 0;
@@ -676,6 +637,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
 {
 	int ret;
 	u64 tm_start, tm_end;
+	unsigned int write_idx;
+	unsigned int read_idx;
+	size_t available_samples_count;
 
 	WARN_ON(!cli);
 	lockdep_assert_held(&cli->cmd_sync_lock);
@@ -684,6 +648,16 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
 	if (cli->active)
 		return 0;
 
+	write_idx = atomic_read(&cli->write_idx);
+	read_idx = atomic_read(&cli->read_idx);
+
+	/* Check whether there is space to store atleast an implicit sample
+	 * corresponding to CMD_STOP.
+	 */
+	available_samples_count = cli->sample_count - (write_idx - read_idx);
+	if (!available_samples_count)
+		return -EBUSY;
+
 	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
 						 &cli->config.phys_em);
 
@@ -696,7 +670,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
 		cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL);
 
 	if (!ret) {
-		atomic_set(&cli->sync_sample_count, cli->sample_count);
 		cli->active = true;
 		cli->user_data = user_data;
 		cli->sample_flags = 0;
@@ -710,16 +683,6 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
 	return ret;
 }
 
-static int kbasep_kinstr_prfcnt_client_wait_async_done(
-	struct kbase_kinstr_prfcnt_client *cli)
-{
-	lockdep_assert_held(&cli->cmd_sync_lock);
-
-	return wait_event_interruptible(cli->waitq,
-					atomic_read(&cli->sync_sample_count) ==
-						cli->sample_count);
-}
-
 static int
 kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 				 u64 user_data)
@@ -728,7 +691,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 	u64 tm_start = 0;
 	u64 tm_end = 0;
 	struct kbase_hwcnt_physical_enable_map phys_em;
-	struct kbase_hwcnt_dump_buffer *tmp_buf = NULL;
+	size_t available_samples_count;
 	unsigned int write_idx;
 	unsigned int read_idx;
 
@@ -739,12 +702,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 	if (!cli->active)
 		return -EINVAL;
 
-	/* Wait until pending async sample operation done */
-	ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
-	if (ret < 0)
-		return -ERESTARTSYS;
+	mutex_lock(&cli->kinstr_ctx->lock);
 
+	/* Disable counters under the lock, so we do not race with the
+	 * sampling thread.
+	 */
 	phys_em.fe_bm = 0;
 	phys_em.tiler_bm = 0;
 	phys_em.mmu_l2_bm = 0;
@@ -752,15 +714,11 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 
 	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
 
-	mutex_lock(&cli->kinstr_ctx->lock);
-
 	/* Check whether one has the buffer to hold the last sample */
 	write_idx = atomic_read(&cli->write_idx);
 	read_idx = atomic_read(&cli->read_idx);
 
-	/* Check if there is a place to save the last stop produced sample */
-	if (write_idx - read_idx < cli->sample_arr.sample_count)
-		tmp_buf = &cli->tmp_buf;
+	available_samples_count = cli->sample_count - (write_idx - read_idx);
 
 	ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli,
 							  &cli->enable_map,
@@ -770,7 +728,8 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 	if (ret)
 		cli->sample_flags |= SAMPLE_FLAG_ERROR;
 
-	if (tmp_buf) {
+	/* There must be a place to save the last stop produced sample */
+	if (!WARN_ON(!available_samples_count)) {
 		write_idx %= cli->sample_arr.sample_count;
 		/* Handle the last stop sample */
 		kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
@@ -800,50 +759,6 @@ kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
 				      u64 user_data)
 {
 	int ret;
-	bool empty_sample = false;
-
-	lockdep_assert_held(&cli->cmd_sync_lock);
-
-	/* If the client is not started, or not manual, the command invalid */
-	if (!cli->active || cli->dump_interval_ns)
-		return -EINVAL;
-
-	/* Wait until pending async sample operation done, this is required to
-	 * satisfy the stated sample sequence following their issuing order,
-	 * reflected by the sample start timestamp.
-	 */
-	if (atomic_read(&cli->sync_sample_count) != cli->sample_count) {
-		/* Return empty sample instead of performing real dump.
-		 * As there is an async dump currently in-flight which will
-		 * have the desired information.
-		 */
-		empty_sample = true;
-		ret = kbasep_kinstr_prfcnt_client_wait_async_done(cli);
-
-		if (ret < 0)
-			return -ERESTARTSYS;
-	}
-
-	mutex_lock(&cli->kinstr_ctx->lock);
-
-	ret = kbasep_kinstr_prfcnt_client_dump(cli,
-					       BASE_HWCNT_READER_EVENT_MANUAL,
-					       user_data, false, empty_sample);
-
-	mutex_unlock(&cli->kinstr_ctx->lock);
-
-	return ret;
-}
-
-static int
-kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
-				       u64 user_data)
-{
-	unsigned int write_idx;
-	unsigned int read_idx;
-	unsigned int active_async_dumps;
-	unsigned int new_async_buf_idx;
-	int ret;
 
 	lockdep_assert_held(&cli->cmd_sync_lock);
 
@@ -853,45 +768,7 @@ kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
 
 	mutex_lock(&cli->kinstr_ctx->lock);
 
-	write_idx = atomic_read(&cli->write_idx);
-	read_idx = atomic_read(&cli->read_idx);
-	active_async_dumps =
-		cli->sample_count - atomic_read(&cli->sync_sample_count);
-	new_async_buf_idx = write_idx + active_async_dumps;
-
-	/* Check if there is a place to copy HWC block into.
-	 * If successful, reserve space in the buffer for the asynchronous
-	 * operation to make sure that it can actually take place.
-	 * Because we reserve space for asynchronous dumps we need to take that
-	 * in consideration here.
-	 */
-	ret = (new_async_buf_idx - read_idx == cli->sample_arr.sample_count) ?
-		      -EBUSY :
-		      0;
-
-	if (ret == -EBUSY) {
-		mutex_unlock(&cli->kinstr_ctx->lock);
-		return ret;
-	}
-
-	if (active_async_dumps > 0) {
-		struct prfcnt_metadata *ptr_md;
-		unsigned int buf_idx =
-			new_async_buf_idx % cli->sample_arr.sample_count;
-		/* Instead of storing user_data, write it directly to future
-		 * empty sample.
-		 */
-		ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
-		ptr_md->u.sample_md.user_data = user_data;
-
-		atomic_dec(&cli->sync_sample_count);
-	} else {
-		cli->async.user_data = user_data;
-		atomic_dec(&cli->sync_sample_count);
-
-		kbase_hwcnt_virtualizer_queue_work(cli->kinstr_ctx->hvirt,
-						   &cli->async.dump_work);
-	}
+	ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data);
 
 	mutex_unlock(&cli->kinstr_ctx->lock);
 
@@ -948,10 +825,6 @@ int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
 		ret = kbasep_kinstr_prfcnt_client_sync_dump(
 			cli, control_cmd->user_data);
 		break;
-	case PRFCNT_CONTROL_CMD_SAMPLE_ASYNC:
-		ret = kbasep_kinstr_prfcnt_client_async_dump(
-			cli, control_cmd->user_data);
-		break;
 	case PRFCNT_CONTROL_CMD_DISCARD:
 		ret = kbasep_kinstr_prfcnt_client_discard(cli);
 		break;
@@ -1006,17 +879,6 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
 	sample_meta = cli->sample_arr.samples[read_idx].sample_meta;
 	sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf;
 
-	/* Verify that a valid sample has been dumped in the read_idx.
-	 * There are situations where this may not be the case,
-	 * for instance if the client is trying to get an asynchronous
-	 * sample which has not been dumped yet.
-	 */
-	if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE ||
-	    sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) {
-		err = -EINVAL;
-		goto error_out;
-	}
-
 	sample_access->sequence = sample_meta->u.sample_md.seq;
 	sample_access->sample_offset_bytes = sample_offset_bytes;
 
@@ -1163,19 +1025,46 @@ static void kbasep_kinstr_prfcnt_sample_array_free(
 	memset(sample_arr, 0, sizeof(*sample_arr));
 }
 
-void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+static void
+kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli,
+					    enum kbase_kinstr_prfcnt_client_init_state init_state)
 {
 	if (!cli)
 		return;
 
-	kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
-	kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
-	kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
-	kbase_hwcnt_enable_map_free(&cli->enable_map);
-	mutex_destroy(&cli->cmd_sync_lock);
+	while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) {
+		switch (init_state) {
+		case KINSTR_PRFCNT_INITIALISED:
+			/* This shouldn't be reached */
+			break;
+		case KINSTR_PRFCNT_WAITQ_MUTEX:
+			mutex_destroy(&cli->cmd_sync_lock);
+			break;
+		case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+			kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
+			break;
+		case KINSTR_PRFCNT_SAMPLE_ARRAY:
+			kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
+			break;
+		case KINSTR_PRFCNT_DUMP_BUFFER:
+			kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
+			break;
+		case KINSTR_PRFCNT_ENABLE_MAP:
+			kbase_hwcnt_enable_map_free(&cli->enable_map);
+			break;
+		case KINSTR_PRFCNT_PARSE_SETUP:
+			/* Nothing to do here */
+			break;
+		}
+	}
 	kfree(cli);
 }
 
+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+{
+	kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED);
+}
+
 /**
  * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release.
  * @inode: Non-NULL pointer to inode structure.
@@ -1279,9 +1168,8 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
 	list_for_each_entry(pos, &kinstr_ctx->clients, node) {
 		if (pos->active && (pos->next_dump_time_ns != 0) &&
 		    (pos->next_dump_time_ns < cur_time_ns))
-			kbasep_kinstr_prfcnt_client_dump(
-				pos, BASE_HWCNT_READER_EVENT_PERIODIC,
-				pos->user_data, false, false);
+			kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC,
+							 pos->user_data);
 	}
 
 	kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx);
@@ -1289,48 +1177,6 @@ static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
 	mutex_unlock(&kinstr_ctx->lock);
 }
 
-/**
- * kbasep_kinstr_prfcnt_async_dump_worker()- Dump worker for a manual client
- *                                           to take a single asynchronous
- *                                           sample.
- * @work: Work structure.
- */
-static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work)
-{
-	struct kbase_kinstr_prfcnt_async *cli_async =
-		container_of(work, struct kbase_kinstr_prfcnt_async, dump_work);
-	struct kbase_kinstr_prfcnt_client *cli = container_of(
-		cli_async, struct kbase_kinstr_prfcnt_client, async);
-
-	mutex_lock(&cli->kinstr_ctx->lock);
-	/* While the async operation is in flight, a sync stop might have been
-	 * executed, for which the dump should be skipped. Further as we are
-	 * doing an async dump, we expect that there is reserved buffer for
-	 * this to happen. This is to avoid the rare corner case where the
-	 * user side has issued a stop/start pair before the async work item
-	 * get the chance to execute.
-	 */
-	if (cli->active &&
-	    (atomic_read(&cli->sync_sample_count) < cli->sample_count))
-		kbasep_kinstr_prfcnt_client_dump(cli,
-						 BASE_HWCNT_READER_EVENT_MANUAL,
-						 cli->async.user_data, true,
-						 false);
-
-	/* While the async operation is in flight, more async dump requests
-	 * may have been submitted. In this case, no more async dumps work
-	 * will be queued. Instead space will be reserved for that dump and
-	 * an empty sample will be return after handling the current async
-	 * dump.
-	 */
-	while (cli->active &&
-	       (atomic_read(&cli->sync_sample_count) < cli->sample_count)) {
-		kbasep_kinstr_prfcnt_client_dump(
-			cli, BASE_HWCNT_READER_EVENT_MANUAL, 0, true, true);
-	}
-	mutex_unlock(&cli->kinstr_ctx->lock);
-}
-
 /**
  * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
  *                              execution as soon as possible.
@@ -1790,83 +1636,100 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
 {
 	int err;
 	struct kbase_kinstr_prfcnt_client *cli;
+	enum kbase_kinstr_prfcnt_client_init_state init_state;
 
-	WARN_ON(!kinstr_ctx);
-	WARN_ON(!setup);
-	WARN_ON(!req_arr);
+	if (WARN_ON(!kinstr_ctx))
+		return -EINVAL;
+
+	if (WARN_ON(!setup))
+		return -EINVAL;
+
+	if (WARN_ON(!req_arr))
+		return -EINVAL;
 
 	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
 
 	if (!cli)
 		return -ENOMEM;
 
-	cli->kinstr_ctx = kinstr_ctx;
-	err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr);
+	for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED;
+	     init_state++) {
+		err = 0;
+		switch (init_state) {
+		case KINSTR_PRFCNT_PARSE_SETUP:
+			cli->kinstr_ctx = kinstr_ctx;
+			err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config,
+							       req_arr);
 
-	if (err < 0)
-		goto error;
+			break;
 
-	cli->config.buffer_count = MAX_BUFFER_COUNT;
-	cli->dump_interval_ns = cli->config.period_ns;
-	cli->next_dump_time_ns = 0;
-	cli->active = false;
-	atomic_set(&cli->write_idx, 0);
-	atomic_set(&cli->read_idx, 0);
-	atomic_set(&cli->fetch_idx, 0);
+		case KINSTR_PRFCNT_ENABLE_MAP:
+			cli->config.buffer_count = MAX_BUFFER_COUNT;
+			cli->dump_interval_ns = cli->config.period_ns;
+			cli->next_dump_time_ns = 0;
+			cli->active = false;
+			atomic_set(&cli->write_idx, 0);
+			atomic_set(&cli->read_idx, 0);
+			atomic_set(&cli->fetch_idx, 0);
 
-	err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
-					   &cli->enable_map);
+			err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map);
+			break;
 
-	if (err < 0)
-		goto error;
+		case KINSTR_PRFCNT_DUMP_BUFFER:
+			kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+								 &cli->config.phys_em);
 
-	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em);
+			cli->sample_count = cli->config.buffer_count;
+			cli->sample_size =
+				kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
 
-	cli->sample_count = cli->config.buffer_count;
-	atomic_set(&cli->sync_sample_count, cli->sample_count);
-	cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
+			/* Use virtualizer's metadata to alloc tmp buffer which interacts with
+			 * the HWC virtualizer.
+			 */
+			err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf);
+			break;
 
-	/* Use virtualizer's metadata to alloc tmp buffer which interacts with
-	 * the HWC virtualizer.
-	 */
-	err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata,
-					    &cli->tmp_buf);
+		case KINSTR_PRFCNT_SAMPLE_ARRAY:
+			/* Disable clock map in setup, and enable clock map when start */
+			cli->enable_map.clk_enable_map = 0;
 
-	if (err < 0)
-		goto error;
+			/* Use metadata from virtualizer to allocate dump buffers  if
+			 * kinstr_prfcnt doesn't have the truncated metadata.
+			 */
+			err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
 
-	/* Disable clock map in setup, and enable clock map when start */
-	cli->enable_map.clk_enable_map = 0;
+			break;
 
-	/* Use metadata from virtualizer to allocate dump buffers  if
-	 * kinstr_prfcnt doesn't have the truncated metadata.
-	 */
-	err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
+		case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+			/* Set enable map to be 0 to prevent virtualizer to init and kick the
+			 * backend to count.
+			 */
+			kbase_hwcnt_gpu_enable_map_from_physical(
+				&cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 });
 
-	if (err < 0)
-		goto error;
+			err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt,
+								    &cli->enable_map, &cli->hvcli);
+			break;
 
-	/* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */
-	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
-						 &(struct kbase_hwcnt_physical_enable_map){ 0 });
+		case KINSTR_PRFCNT_WAITQ_MUTEX:
+			init_waitqueue_head(&cli->waitq);
+			mutex_init(&cli->cmd_sync_lock);
+			break;
 
-	err = kbase_hwcnt_virtualizer_client_create(
-		kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
+		case KINSTR_PRFCNT_INITIALISED:
+			/* This shouldn't be reached */
+			break;
+		}
 
-	if (err < 0)
-		goto error;
-
-	init_waitqueue_head(&cli->waitq);
-	INIT_WORK(&cli->async.dump_work,
-		  kbasep_kinstr_prfcnt_async_dump_worker);
-	mutex_init(&cli->cmd_sync_lock);
+		if (err < 0) {
+			kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state);
+			return err;
+		}
+	}
 	*out_vcli = cli;
 
 	return 0;
 
-error:
-	kbasep_kinstr_prfcnt_client_destroy(cli);
-	return err;
 }
 
 static size_t kbasep_kinstr_prfcnt_get_block_info_count(
@@ -2085,17 +1948,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
 			      union kbase_ioctl_kinstr_prfcnt_setup *setup)
 {
 	int err;
-	unsigned int item_count;
-	unsigned long bytes;
-	struct prfcnt_request_item *req_arr;
+	size_t item_count;
+	size_t bytes;
+	struct prfcnt_request_item *req_arr = NULL;
 	struct kbase_kinstr_prfcnt_client *cli = NULL;
+	const size_t max_bytes = 32 * sizeof(*req_arr);
 
 	if (!kinstr_ctx || !setup)
 		return -EINVAL;
 
 	item_count = setup->in.request_item_count;
 
-	/* Limiting the request items to 2x of the expected: acommodating
+	/* Limiting the request items to 2x of the expected: accommodating
 	 * moderate duplications but rejecting excessive abuses.
 	 */
 	if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) ||
@@ -2103,7 +1967,18 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
 		return -EINVAL;
 	}
 
-	bytes = item_count * sizeof(*req_arr);
+	if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes))
+		return -EINVAL;
+
+	/* Further limiting the max bytes to copy from userspace by setting it in the following
+	 * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items,
+	 * each currently at the size of prfcnt_request_item.
+	 *
+	 * Note: if more request types get added, this max limit needs to be updated.
+	 */
+	if (bytes > max_bytes)
+		return -EINVAL;
+
 	req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes);
 
 	if (IS_ERR(req_arr))
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
index 1d8d1967eee9..e5c6f7a0b217 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_linux.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,7 @@
 #include <linux/module.h>
 #include <linux/atomic.h>
 
-#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+#if IS_ENABLED(MALI_KERNEL_TEST_API)
 	#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
 #else
 	#define KBASE_EXPORT_TEST_API(func)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.c b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
index 3743b4df999f..b18b1e25267e 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -803,6 +803,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 }
 #endif /* MALI_USE_CSF */
 
+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = NULL;
+	struct rb_root *rbtree = reg->rbtree;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
+		break;
+	case KBASE_REG_ZONE_EXEC_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
+		break;
+#if MALI_USE_CSF
+	case KBASE_REG_ZONE_EXEC_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
+		break;
+	case KBASE_REG_ZONE_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
+		break;
+	case KBASE_REG_ZONE_MCU_SHARED:
+		/* This is only expected to be called on driver unload. */
+		break;
+#endif
+	default:
+		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+		break;
+	}
+
+	return kctx;
+}
+
 static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 {
 	struct rb_node *rbnode;
@@ -814,6 +848,8 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 			rb_erase(rbnode, rbtree);
 			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
 			WARN_ON(reg->va_refcnt != 1);
+			if (kbase_page_migration_enabled)
+				kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
 			/* Reset the start_pfn - as the rbtree is being
 			 * destroyed and we've already erased this region, there
 			 * is no further need to attempt to remove it.
@@ -830,6 +866,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 
 void kbase_region_tracker_term(struct kbase_context *kctx)
 {
+	WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+	     kctx->tgid, kctx->id);
+
 	kbase_gpu_vm_lock(kctx);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
@@ -1554,6 +1594,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 		return NULL;
 
 	new_reg->va_refcnt = 1;
+	new_reg->no_user_free_refcnt = 0;
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->rbtree = rbtree;
@@ -1572,41 +1613,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 
 KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
 
-static struct kbase_context *kbase_reg_flags_to_kctx(
-		struct kbase_va_region *reg)
-{
-	struct kbase_context *kctx = NULL;
-	struct rb_root *rbtree = reg->rbtree;
-
-	switch (reg->flags & KBASE_REG_ZONE_MASK) {
-	case KBASE_REG_ZONE_CUSTOM_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_custom);
-		break;
-	case KBASE_REG_ZONE_SAME_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_same);
-		break;
-	case KBASE_REG_ZONE_EXEC_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_exec);
-		break;
-#if MALI_USE_CSF
-	case KBASE_REG_ZONE_EXEC_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
-		break;
-	case KBASE_REG_ZONE_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
-		break;
-#endif
-	default:
-		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
-		break;
-	}
-
-	return kctx;
-}
-
 /**
  * kbase_free_alloced_region - Free a region object.
  *
@@ -1720,6 +1726,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	unsigned long gwt_mask = ~0;
 	int group_id;
 	struct kbase_mem_phy_alloc *alloc;
+	bool ignore_page_migration = false;
 
 #ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
@@ -1749,15 +1756,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
 				err = kbase_mmu_insert_pages(
-					kctx->kbdev, &kctx->mmu,
-					reg->start_pfn + (i * stride),
-					alloc->imported.alias.aliased[i]
-							.alloc->pages +
-						alloc->imported.alias.aliased[i]
-							.offset,
+					kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+					alloc->imported.alias.aliased[i].alloc->pages +
+						alloc->imported.alias.aliased[i].offset,
 					alloc->imported.alias.aliased[i].length,
-					reg->flags & gwt_mask, kctx->as_nr,
-					group_id, mmu_sync_info);
+					reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
+					NULL, ignore_page_migration);
 				if (err)
 					goto bad_insert;
 
@@ -1777,12 +1781,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 			}
 		}
 	} else {
-		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
-					     reg->start_pfn,
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+			ignore_page_migration = true;
+
+		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 					     kbase_get_gpu_phy_pages(reg),
 					     kbase_reg_current_backed_size(reg),
-					     reg->flags & gwt_mask, kctx->as_nr,
-					     group_id, mmu_sync_info);
+					     reg->flags & gwt_mask, kctx->as_nr, group_id,
+					     mmu_sync_info, reg, ignore_page_migration);
 		if (err)
 			goto bad_insert;
 		kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1816,7 +1823,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 
 bad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 reg->nr_pages, kctx->as_nr);
+				 reg->nr_pages, kctx->as_nr, ignore_page_migration);
 
 	kbase_remove_va_region(kctx->kbdev, reg);
 
@@ -1845,7 +1852,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 	switch (alloc->type) {
 	case KBASE_MEM_TYPE_ALIAS: {
 			size_t i = 0;
-
 			/* Due to the way the number of valid PTEs and ATEs are tracked
 			 * currently, only the GPU virtual range that is backed & mapped
 			 * should be passed to the kbase_mmu_teardown_pages() function,
@@ -1853,27 +1859,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 			 * separately.
 			 */
 			for (i = 0; i < alloc->imported.alias.nents; i++) {
-				if (alloc->imported.alias.aliased[i].alloc) {
-					int err_loop = kbase_mmu_teardown_pages(
-						kctx->kbdev, &kctx->mmu,
-						reg->start_pfn + (i * alloc->imported.alias.stride),
-						alloc->pages + (i * alloc->imported.alias.stride),
-						alloc->imported.alias.aliased[i].length,
-						kctx->as_nr);
-					if (WARN_ON_ONCE(err_loop))
-						err = err_loop;
-				}
+				struct tagged_addr *phys_alloc = NULL;
+				int err_loop;
+
+				if (alloc->imported.alias.aliased[i].alloc != NULL)
+					phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+						     alloc->imported.alias.aliased[i].offset;
+
+				err_loop = kbase_mmu_teardown_pages(
+					kctx->kbdev, &kctx->mmu,
+					reg->start_pfn + (i * alloc->imported.alias.stride),
+					phys_alloc, alloc->imported.alias.aliased[i].length,
+					kctx->as_nr, false);
+
+				if (WARN_ON_ONCE(err_loop))
+					err = err_loop;
 			}
 		}
 		break;
 	case KBASE_MEM_TYPE_IMPORTED_UMM:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr);
+					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+		break;
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+					       alloc->pages, kbase_reg_current_backed_size(reg),
+					       kctx->as_nr, true);
 		break;
 	default:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr);
+					       kctx->as_nr, false);
 		break;
 	}
 
@@ -2046,6 +2062,7 @@ void kbase_sync_single(struct kbase_context *kctx,
 			src = ((unsigned char *)kmap(gpu_page)) + offset;
 			dst = ((unsigned char *)kmap(cpu_page)) + offset;
 		}
+
 		memcpy(dst, src, size);
 		kunmap(gpu_page);
 		kunmap(cpu_page);
@@ -2197,7 +2214,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 		__func__, (void *)reg, (void *)kctx);
 	lockdep_assert_held(&kctx->reg_lock);
 
-	if (reg->flags & KBASE_REG_NO_USER_FREE) {
+	if (kbase_va_region_is_no_user_free(kctx, reg)) {
 		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
 		return -EINVAL;
 	}
@@ -2416,8 +2433,11 @@ int kbase_update_region_flags(struct kbase_context *kctx,
 	if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
 		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
 
-	if (flags & BASEP_MEM_NO_USER_FREE)
-		reg->flags |= KBASE_REG_NO_USER_FREE;
+	if (flags & BASEP_MEM_NO_USER_FREE) {
+		kbase_gpu_vm_lock(kctx);
+		kbase_va_region_no_user_free_get(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	}
 
 	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
 		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
@@ -3206,9 +3226,32 @@ out_rollback:
 out_term:
 	return -1;
 }
-
 KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
 
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+				     enum kbase_page_status status)
+{
+	u32 i = 0;
+
+	for (; i < alloc->nents; i++) {
+		struct tagged_addr phys = alloc->pages[i];
+		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(phys) || is_partial(phys))
+			continue;
+
+		if (!page_md)
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
 bool kbase_check_alloc_flags(unsigned long flags)
 {
 	/* Only known input flags should be set. */
@@ -3766,7 +3809,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
 		mutex_unlock(&kctx->jit_evict_lock);
 
 		kbase_gpu_vm_lock(kctx);
-		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(reg->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, reg);
 		kbase_mem_free_region(kctx, reg);
 		kbase_gpu_vm_unlock(kctx);
 	} while (1);
@@ -4419,7 +4470,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		if (ret < 0) {
 			/*
 			 * An update to an allocation from the pool failed,
-			 * chances are slim a new allocation would fair any
+			 * chances are slim a new allocation would fare any
 			 * better so return the allocation to the pool and
 			 * return the function with failure.
 			 */
@@ -4441,6 +4492,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			mutex_unlock(&kctx->jit_evict_lock);
 			reg = NULL;
 			goto end;
+		} else {
+			/* A suitable JIT allocation existed on the evict list, so we need
+			 * to make sure that the NOT_MOVABLE property is cleared.
+			 */
+			if (kbase_page_migration_enabled) {
+				kbase_gpu_vm_lock(kctx);
+				mutex_lock(&kctx->jit_evict_lock);
+				kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+				mutex_unlock(&kctx->jit_evict_lock);
+				kbase_gpu_vm_unlock(kctx);
+			}
 		}
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
@@ -4497,6 +4559,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		}
 	}
 
+	/* Similarly to tiler heap init, there is a short window of time
+	 * where the (either recycled or newly allocated, in our case) region has
+	 * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+	 * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
+	 * allocation is the least bad option that doesn't lead to a security issue down the
+	 * line (it will eventually be cleaned up during context termination).
+	 *
+	 * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
+	 * flags.
+	 */
+	kbase_gpu_vm_lock(kctx);
+	if (unlikely(reg->no_user_free_refcnt > 1)) {
+		kbase_gpu_vm_unlock(kctx);
+		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_move(&reg->jit_node, &kctx->jit_pool_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = NULL;
+		goto end;
+	}
+
 	trace_mali_jit_alloc(reg, info->id);
 
 	kctx->jit_current_allocations++;
@@ -4514,6 +4599,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
 			KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+	kbase_gpu_vm_unlock(kctx);
 
 end:
 	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
@@ -4584,6 +4670,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
 
+	/* Inactive JIT regions should be freed by the shrinker and not impacted
+	 * by page migration. Once freed, they will enter into the page migration
+	 * state machine via the mempools.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
 	mutex_unlock(&kctx->jit_evict_lock);
 }
 
@@ -4630,7 +4722,14 @@ bool kbase_jit_evict(struct kbase_context *kctx)
 	mutex_unlock(&kctx->jit_evict_lock);
 
 	if (reg) {
-		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(reg->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, reg);
 		kbase_mem_free_region(kctx, reg);
 	}
 
@@ -4652,7 +4751,14 @@ void kbase_jit_term(struct kbase_context *kctx)
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(walker->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4664,7 +4770,14 @@ void kbase_jit_term(struct kbase_context *kctx)
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(walker->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4873,10 +4986,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	struct page **pages;
 	struct tagged_addr *pa;
 	long i, dma_mapped_pages;
-	unsigned long address;
 	struct device *dev;
-	unsigned long offset_within_page;
-	unsigned long remaining_size;
 	unsigned long gwt_mask = ~0;
 	/* Calls to this function are inherently asynchronous, with respect to
 	 * MMU operations.
@@ -4892,19 +5002,29 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 
 	alloc = reg->gpu_alloc;
 	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
 	pinned_pages = alloc->nents;
 	pages = alloc->imported.user_buf.pages;
 	dev = kctx->kbdev->dev;
-	offset_within_page = address & ~PAGE_MASK;
-	remaining_size = alloc->imported.user_buf.size;
 
+	/* Manual CPU cache synchronization.
+	 *
+	 * The driver disables automatic CPU cache synchronization because the
+	 * memory pages that enclose the imported region may also contain
+	 * sub-regions which are not imported and that are allocated and used
+	 * by the user process. This may be the case of memory at the beginning
+	 * of the first page and at the end of the last page. Automatic CPU cache
+	 * synchronization would force some operations on those memory allocations,
+	 * unbeknown to the user process: in particular, a CPU cache invalidate
+	 * upon unmapping would destroy the content of dirty CPU caches and cause
+	 * the user process to lose CPU writes to the non-imported sub-regions.
+	 *
+	 * When the GPU claims ownership of the imported memory buffer, it shall
+	 * commit CPU writes for the whole of all pages that enclose the imported
+	 * region, otherwise the initial content of memory would be wrong.
+	 */
 	for (i = 0; i < pinned_pages; i++) {
-		unsigned long map_size =
-			MIN(PAGE_SIZE - offset_within_page, remaining_size);
-		dma_addr_t dma_addr = dma_map_page(dev, pages[i],
-				offset_within_page, map_size,
-				DMA_BIDIRECTIONAL);
+		dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+							 DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
 
 		err = dma_mapping_error(dev, dma_addr);
 		if (err)
@@ -4913,8 +5033,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = as_tagged(page_to_phys(pages[i]));
 
-		remaining_size -= map_size;
-		offset_within_page = 0;
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	}
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -4922,29 +5041,31 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-				     pa, kbase_reg_current_backed_size(reg),
-				     reg->flags & gwt_mask, kctx->as_nr,
-				     alloc->group_id, mmu_sync_info);
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
+				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
 	if (err == 0)
 		return 0;
 
 	/* fall down */
 unwind:
 	alloc->nents = 0;
-	offset_within_page = address & ~PAGE_MASK;
-	remaining_size = alloc->imported.user_buf.size;
 	dma_mapped_pages = i;
-	/* Run the unmap loop in the same order as map loop */
+	/* Run the unmap loop in the same order as map loop, and perform again
+	 * CPU cache synchronization to re-write the content of dirty CPU caches
+	 * to memory. This is precautionary measure in case a GPU job has taken
+	 * advantage of a partially GPU-mapped range to write and corrupt the
+	 * content of memory, either inside or outside the imported region.
+	 *
+	 * Notice that this error recovery path doesn't try to be optimal and just
+	 * flushes the entire page range.
+	 */
 	for (i = 0; i < dma_mapped_pages; i++) {
-		unsigned long unmap_size =
-			MIN(PAGE_SIZE - offset_within_page, remaining_size);
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		dma_unmap_page(kctx->kbdev->dev,
-				alloc->imported.user_buf.dma_addrs[i],
-				unmap_size, DMA_BIDIRECTIONAL);
-		remaining_size -= unmap_size;
-		offset_within_page = 0;
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+				     DMA_ATTR_SKIP_CPU_SYNC);
 	}
 
 	/* The user buffer could already have been previously pinned before
@@ -4985,12 +5106,85 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 #endif
 
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
-		unsigned long unmap_size =
-			MIN(remaining_size, PAGE_SIZE - offset_within_page);
+		unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+		/* Notice: this is a temporary variable that is used for DMA sync
+		 * operations, and that could be incremented by an offset if the
+		 * current page contains both imported and non-imported memory
+		 * sub-regions.
+		 *
+		 * It is valid to add an offset to this value, because the offset
+		 * is always kept within the physically contiguous dma-mapped range
+		 * and there's no need to translate to physical address to offset it.
+		 *
+		 * This variable is not going to be used for the actual DMA unmap
+		 * operation, that shall always use the original DMA address of the
+		 * whole memory page.
+		 */
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		dma_unmap_page(kctx->kbdev->dev, dma_addr, unmap_size,
-				DMA_BIDIRECTIONAL);
+		/* Manual CPU cache synchronization.
+		 *
+		 * When the GPU returns ownership of the buffer to the CPU, the driver
+		 * needs to treat imported and non-imported memory differently.
+		 *
+		 * The first case to consider is non-imported sub-regions at the
+		 * beginning of the first page and at the end of last page. For these
+		 * sub-regions: CPU cache shall be committed with a clean+invalidate,
+		 * in order to keep the last CPU write.
+		 *
+		 * Imported region prefers the opposite treatment: this memory has been
+		 * legitimately mapped and used by the GPU, hence GPU writes shall be
+		 * committed to memory, while CPU cache shall be invalidated to make
+		 * sure that CPU reads the correct memory content.
+		 *
+		 * The following diagram shows the expect value of the variables
+		 * used in this loop in the corner case of an imported region encloed
+		 * by a single memory page:
+		 *
+		 * page boundary ->|---------- | <- dma_addr (initial value)
+		 *                 |           |
+		 *                 | - - - - - | <- offset_within_page
+		 *                 |XXXXXXXXXXX|\
+		 *                 |XXXXXXXXXXX| \
+		 *                 |XXXXXXXXXXX|  }- imported_size
+		 *                 |XXXXXXXXXXX| /
+		 *                 |XXXXXXXXXXX|/
+		 *                 | - - - - - | <- offset_within_page + imported_size
+		 *                 |           |\
+		 *                 |           | }- PAGE_SIZE - imported_size - offset_within_page
+		 *                 |           |/
+		 * page boundary ->|-----------|
+		 *
+		 * If the imported region is enclosed by more than one page, then
+		 * offset_within_page = 0 for any page after the first.
+		 */
+
+		/* Only for first page: handle non-imported range at the beginning. */
+		if (offset_within_page > 0) {
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+						   DMA_BIDIRECTIONAL);
+			dma_addr += offset_within_page;
+		}
+
+		/* For every page: handle imported range. */
+		if (imported_size > 0)
+			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+						DMA_BIDIRECTIONAL);
+
+		/* Only for last page (that may coincide with first page):
+		 * handle non-imported range at the end.
+		 */
+		if ((imported_size + offset_within_page) < PAGE_SIZE) {
+			dma_addr += imported_size;
+			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+						   PAGE_SIZE - imported_size - offset_within_page,
+						   DMA_BIDIRECTIONAL);
+		}
+
+		/* Notice: use the original DMA address to unmap the whole memory page. */
+		dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+				     PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
@@ -4998,7 +5192,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		pages[i] = NULL;
 #endif
 
-		remaining_size -= unmap_size;
+		remaining_size -= imported_size;
 		offset_within_page = 0;
 	}
 #if !MALI_USE_CSF
@@ -5079,8 +5273,9 @@ int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_regi
 		break;
 	}
 	default:
-		WARN(1, "Invalid external resource GPU allocation type (%x) on mapping",
-		     alloc->type);
+		dev_dbg(kctx->kbdev->dev,
+			"Invalid external resource GPU allocation type (%x) on mapping",
+			alloc->type);
 		return -EINVAL;
 	}
 
@@ -5113,7 +5308,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 				kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 							 alloc->pages,
 							 kbase_reg_current_backed_size(reg),
-							 kctx->as_nr);
+							 kctx->as_nr, true);
 			}
 
 			if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem.h b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
index 5820f6d8a556..f0f5f92c793c 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -193,10 +193,11 @@ struct kbase_mem_phy_alloc {
  * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory
  *                     pool of a dying context are being moved to the device
  *                     memory pool.
+ * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is
+ *               not movable, but may return to be movable when the object
+ *               is freed.
  * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU
  *                    and has reference to kbase_mem_phy_alloc object.
- * @MULTI_MAPPED: Stable state. This state is used to manage all use cases
- *                where a page may have "unusual" mappings.
  * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't
  *             reference kbase_mem_phy_alloc object. Used as a page in MMU
  *             page table.
@@ -205,9 +206,11 @@ struct kbase_mem_phy_alloc {
  *                    unmapping it. This status means that a memory release is
  *                    happening and it's still not complete.
  * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
- *                             A page is isolated while it is in ALLOCATED_MAPPED or
- *                             PT_MAPPED state, but then the driver tries to destroy the
- *                             allocation.
+ *                             A page is isolated while it is in ALLOCATED_MAPPED state,
+ *                             but then the driver tries to destroy the allocation.
+ * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
+ *                                A page is isolated while it is in PT_MAPPED state, but
+ *                                then the driver tries to destroy the allocation.
  *
  * Pages can only be migrated in stable states.
  */
@@ -215,23 +218,32 @@ enum kbase_page_status {
 	MEM_POOL = 0,
 	ALLOCATE_IN_PROGRESS,
 	SPILL_IN_PROGRESS,
+	NOT_MOVABLE,
 	ALLOCATED_MAPPED,
-	MULTI_MAPPED,
 	PT_MAPPED,
 	FREE_IN_PROGRESS,
 	FREE_ISOLATED_IN_PROGRESS,
+	FREE_PT_ISOLATED_IN_PROGRESS,
 };
 
+#define PGD_VPFN_LEVEL_MASK ((u64)0x3)
+#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level)                                                        \
+	((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK))
+
 /**
  * struct kbase_page_metadata - Metadata for each page in kbase
  *
  * @kbdev:         Pointer to kbase device.
  * @dma_addr:      DMA address mapped to page.
  * @migrate_lock:  A spinlock to protect the private metadata.
+ * @data:          Member in union valid based on @status.
  * @status:        Status to keep track if page can be migrated at any
  *                 given moment. MSB will indicate if page is isolated.
  *                 Protected by @migrate_lock.
- * @data:          Member in union valid based on @status.
+ * @vmap_count:    Counter of kernel mappings.
+ * @group_id:      Memory group ID obtained at the time of page allocation.
  *
  * Each 4KB page will have a reference to this struct in the private field.
  * This will be used to keep track of information required for Linux page
@@ -240,7 +252,6 @@ enum kbase_page_status {
 struct kbase_page_metadata {
 	dma_addr_t dma_addr;
 	spinlock_t migrate_lock;
-	u8 status;
 
 	union {
 		struct {
@@ -251,19 +262,25 @@ struct kbase_page_metadata {
 			struct kbase_device *kbdev;
 		} mem_pool;
 		struct {
-			struct kbase_mem_phy_alloc *phy_alloc;
 			struct kbase_va_region *reg;
 			struct kbase_mmu_table *mmut;
-			struct page *pgd;
 			u64 vpfn;
-			size_t page_array_index;
 		} mapped;
 		struct {
 			struct kbase_mmu_table *mmut;
-			struct page *pgd;
-			u16 entry_info;
+			u64 pgd_vpfn_level;
 		} pt_mapped;
+		struct {
+			struct kbase_device *kbdev;
+		} free_isolated;
+		struct {
+			struct kbase_device *kbdev;
+		} free_pt_isolated;
 	} data;
+
+	u8 status;
+	u8 vmap_count;
+	u8 group_id;
 };
 
 /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
@@ -288,6 +305,20 @@ enum kbase_jit_report_flags {
 	KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0)
 };
 
+/**
+ * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
+ *                                   physical allocation.
+ * @alloc:  the physical allocation containing the pages whose metadata is going
+ *          to be modified
+ * @status: the status the pages should end up in
+ *
+ * Note that this function does not go through all of the checking to ensure that
+ * proper states are set. Instead, it is only used when we change the allocation
+ * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED
+ */
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+				     enum kbase_page_status status);
+
 static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
 {
 	KBASE_DEBUG_ASSERT(alloc);
@@ -388,6 +419,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @jit_usage_id: The last just-in-time memory usage ID for this region.
  * @jit_bin_id:   The just-in-time memory bin this region came from.
  * @va_refcnt:    Number of users of this region. Protected by reg_lock.
+ * @no_user_free_refcnt:    Number of users that want to prevent the region from
+ *                          being freed by userspace.
  * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
  *                      an allocated region
  *                      The object can be one of:
@@ -508,10 +541,7 @@ struct kbase_va_region {
 #define KBASE_REG_RESERVED_BIT_23   (1ul << 23)
 #endif /* !MALI_USE_CSF */
 
-/* Whilst this flag is set the GPU allocation is not supposed to be freed by
- * user space. The flag will remain set for the lifetime of JIT allocations.
- */
-#define KBASE_REG_NO_USER_FREE      (1ul << 24)
+/* Bit 24 is currently unused and is available for use for a new flag */
 
 /* Memory has permanent kernel side mapping */
 #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
@@ -652,6 +682,7 @@ struct kbase_va_region {
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	int    va_refcnt;
+	int no_user_free_refcnt;
 };
 
 /**
@@ -694,6 +725,23 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg)
 	return (kbase_is_region_invalid(reg) ||	kbase_is_region_free(reg));
 }
 
+/**
+ * kbase_is_region_shrinkable - Check if a region is "shrinkable".
+ * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages)
+ * can be freed at any point, even though the kbase_va_region structure itself
+ * may have been refcounted.
+ * Regions that aren't on a shrinker, but could be shrunk at any point in future
+ * without warning are still considered "shrinkable" (e.g. Active JIT allocs)
+ *
+ * @reg: Pointer to region
+ *
+ * Return: true if the region is "shrinkable", false if not.
+ */
+static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg)
+{
+	return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC);
+}
+
 void kbase_remove_va_region(struct kbase_device *kbdev,
 			    struct kbase_va_region *reg);
 static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
@@ -714,6 +762,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
 	lockdep_assert_held(&kctx->reg_lock);
 
 	WARN_ON(!region->va_refcnt);
+	WARN_ON(region->va_refcnt == INT_MAX);
 
 	/* non-atomic as kctx->reg_lock is held */
 	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
@@ -741,6 +790,69 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
 	return NULL;
 }
 
+/**
+ * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region.
+ * A region that must not be freed by userspace indicates that it is owned by some other
+ * kbase subsystem, for example tiler heaps, JIT memory or CSF queues.
+ * Such regions must not be shrunk (i.e. have their backing pages freed), except by the
+ * current owner.
+ * Hence, callers cannot rely on this check alone to determine if a region might be shrunk
+ * by any part of kbase. Instead they should use kbase_is_region_shrinkable().
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region.
+ *
+ * Return: true if userspace cannot free the region, false if userspace can free the region.
+ */
+static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
+						   struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+	return region->no_user_free_refcnt > 0;
+}
+
+/**
+ * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * Calling this function will prevent the region to be shrunk by parts of kbase that
+ * don't own the region (as long as the refcount stays above zero). Refer to
+ * kbase_va_region_is_no_user_free() for more information.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ *
+ * Return: the pointer to the region passed as argument.
+ */
+static inline struct kbase_va_region *
+kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(kbase_is_region_shrinkable(region));
+	WARN_ON(region->no_user_free_refcnt == INT_MAX);
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->no_user_free_refcnt++;
+
+	return region;
+}
+
+/**
+ * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ */
+static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
+						    struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->no_user_free_refcnt--;
+}
+
 /* Common functions */
 static inline struct tagged_addr *kbase_get_cpu_phy_pages(
 		struct kbase_va_region *reg)
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
index 9899cef317ac..998849fa4cc2 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,7 @@
 #include <linux/cache.h>
 #include <linux/memory_group_manager.h>
 #include <linux/math64.h>
+#include <linux/migrate.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
@@ -791,7 +792,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx)
 	 * struct shrinker does not define batch
 	 */
 	kctx->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(&kctx->reclaim);
+#else
+	register_shrinker(&kctx->reclaim, "mali-mem");
+#endif
 	return 0;
 }
 
@@ -855,6 +860,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 
 	lockdep_assert_held(&kctx->reg_lock);
 
+	/* Memory is in the process of transitioning to the shrinker, and
+	 * should ignore migration attempts
+	 */
 	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
 			0, gpu_alloc->nents);
 
@@ -862,12 +870,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 	/* This allocation can't already be on a list. */
 	WARN_ON(!list_empty(&gpu_alloc->evict_node));
 
-	/*
-	 * Add the allocation to the eviction list, after this point the shrink
+	/* Add the allocation to the eviction list, after this point the shrink
 	 * can reclaim it.
 	 */
 	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
 	atomic_add(gpu_alloc->nents, &kctx->evict_nents);
+
+	/* Indicate to page migration that the memory can be reclaimed by the shrinker.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
+
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_mem_evictable_mark_reclaim(gpu_alloc);
 
@@ -919,6 +932,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 					gpu_alloc->evicted, 0, mmu_sync_info);
 
 			gpu_alloc->evicted = 0;
+
+			/* Since the allocation is no longer evictable, and we ensure that
+			 * it grows back to its pre-eviction size, we will consider the
+			 * state of it to be ALLOCATED_MAPPED, as that is the only state
+			 * in which a physical allocation could transition to NOT_MOVABLE
+			 * from.
+			 */
+			if (kbase_page_migration_enabled)
+				kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
 		}
 	}
 
@@ -977,7 +999,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	 * & GPU queue ringbuffer and none of them needs to be explicitly marked
 	 * as evictable by Userspace.
 	 */
-	if (reg->flags & KBASE_REG_NO_USER_FREE)
+	if (kbase_va_region_is_no_user_free(kctx, reg))
 		goto out_unlock;
 
 	/* Is the region being transitioning between not needed and needed? */
@@ -1302,9 +1324,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 
 	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 				     kbase_get_gpu_phy_pages(reg),
-				     kbase_reg_current_backed_size(reg),
-				     reg->flags & gwt_mask, kctx->as_nr,
-				     alloc->group_id, mmu_sync_info);
+				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
+				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
 	if (err)
 		goto bad_insert;
 
@@ -1330,7 +1351,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 
 bad_pad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 alloc->nents, kctx->as_nr);
+				 alloc->nents, kctx->as_nr, true);
 bad_insert:
 	kbase_mem_umm_unmap_attachment(kctx, alloc);
 bad_map_attachment:
@@ -1359,7 +1380,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 		int err;
 
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr);
+					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
 		WARN_ON(err);
 	}
 
@@ -1559,10 +1580,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	int zone = KBASE_REG_ZONE_CUSTOM_VA;
 	bool shared_zone = false;
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
-	unsigned long offset_within_page;
-	unsigned long remaining_size;
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
+	struct tagged_addr *pa;
+	struct device *dev;
 	int write;
 
 	/* Flag supported only for dma-buf imported memory */
@@ -1704,20 +1725,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	reg->gpu_alloc->nents = 0;
 	reg->extension = 0;
 
-	if (pages) {
-		struct device *dev = kctx->kbdev->dev;
-		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
+	pa = kbase_get_gpu_phy_pages(reg);
+	dev = kctx->kbdev->dev;
 
+	if (pages) {
 		/* Top bit signifies that this was pinned on import */
 		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
 
-		offset_within_page = user_buf->address & ~PAGE_MASK;
-		remaining_size = user_buf->size;
+		/* Manual CPU cache synchronization.
+		 *
+		 * The driver disables automatic CPU cache synchronization because the
+		 * memory pages that enclose the imported region may also contain
+		 * sub-regions which are not imported and that are allocated and used
+		 * by the user process. This may be the case of memory at the beginning
+		 * of the first page and at the end of the last page. Automatic CPU cache
+		 * synchronization would force some operations on those memory allocations,
+		 * unbeknown to the user process: in particular, a CPU cache invalidate
+		 * upon unmapping would destroy the content of dirty CPU caches and cause
+		 * the user process to lose CPU writes to the non-imported sub-regions.
+		 *
+		 * When the GPU claims ownership of the imported memory buffer, it shall
+		 * commit CPU writes for the whole of all pages that enclose the imported
+		 * region, otherwise the initial content of memory would be wrong.
+		 */
 		for (i = 0; i < faulted_pages; i++) {
-			unsigned long map_size =
-				MIN(PAGE_SIZE - offset_within_page, remaining_size);
-			dma_addr_t dma_addr = dma_map_page(dev, pages[i],
-				offset_within_page, map_size, DMA_BIDIRECTIONAL);
+			dma_addr_t dma_addr =
+				dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+						   DMA_ATTR_SKIP_CPU_SYNC);
 
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
@@ -1725,8 +1759,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 			user_buf->dma_addrs[i] = dma_addr;
 			pa[i] = as_tagged(page_to_phys(pages[i]));
 
-			remaining_size -= map_size;
-			offset_within_page = 0;
+			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 		}
 
 		reg->gpu_alloc->nents = faulted_pages;
@@ -1735,19 +1768,19 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	return reg;
 
 unwind_dma_map:
-	offset_within_page = user_buf->address & ~PAGE_MASK;
-	remaining_size = user_buf->size;
 	dma_mapped_pages = i;
-	/* Run the unmap loop in the same order as map loop */
+	/* Run the unmap loop in the same order as map loop, and perform again
+	 * CPU cache synchronization to re-write the content of dirty CPU caches
+	 * to memory. This precautionary measure is kept here to keep this code
+	 * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+	 * in the future.
+	 */
 	for (i = 0; i < dma_mapped_pages; i++) {
-		unsigned long unmap_size =
-			MIN(PAGE_SIZE - offset_within_page, remaining_size);
+		dma_addr_t dma_addr = user_buf->dma_addrs[i];
 
-		dma_unmap_page(kctx->kbdev->dev,
-				user_buf->dma_addrs[i],
-				unmap_size, DMA_BIDIRECTIONAL);
-		remaining_size -= unmap_size;
-		offset_within_page = 0;
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+				     DMA_ATTR_SKIP_CPU_SYNC);
 	}
 fault_mismatch:
 	if (pages) {
@@ -1767,7 +1800,6 @@ no_alloc_obj:
 no_region:
 bad_size:
 	return NULL;
-
 }
 
 
@@ -1888,9 +1920,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 			/* validate found region */
 			if (kbase_is_region_invalid_or_free(aliasing_reg))
 				goto bad_handle; /* Not found/already free */
-			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+			if (kbase_is_region_shrinkable(aliasing_reg))
 				goto bad_handle; /* Ephemeral region */
-			if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE)
+			if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
 				goto bad_handle; /* JIT regions can't be
 						  * aliased. NO_USER_FREE flag
 						  * covers the entire lifetime
@@ -2050,7 +2082,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
-
+	if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+		dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+		goto bad_flags;
+	}
 	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
 		dev_warn(kctx->kbdev->dev,
 				"padding is only supported for UMM");
@@ -2164,11 +2199,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
 
 	/* Map the new pages into the GPU */
 	phy_pages = kbase_get_gpu_phy_pages(reg);
-	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
-				     reg->start_pfn + old_pages,
-				     phy_pages + old_pages, delta, reg->flags,
-				     kctx->as_nr, reg->gpu_alloc->group_id,
-				     mmu_sync_info);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+				     phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
+				     reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
 
 	return ret;
 }
@@ -2197,7 +2230,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 	int ret = 0;
 
 	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
-				       alloc->pages + new_pages, delta, kctx->as_nr);
+				       alloc->pages + new_pages, delta, kctx->as_nr, false);
 
 	return ret;
 }
@@ -2262,10 +2295,10 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (atomic_read(&reg->cpu_alloc->kernel_mappings) > 0)
 		goto out_unlock;
 
-	if (reg->flags & KBASE_REG_DONT_NEED)
+	if (kbase_is_region_shrinkable(reg))
 		goto out_unlock;
 
-	if (reg->flags & KBASE_REG_NO_USER_FREE)
+	if (kbase_va_region_is_no_user_free(kctx, reg))
 		goto out_unlock;
 
 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2662,6 +2695,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 	size_t size;
 	int err = 0;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
 	size = (vma->vm_end - vma->vm_start);
 	nr_pages = size >> PAGE_SHIFT;
@@ -2734,7 +2769,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 			   size_t *nr_pages, size_t *aligned_offset)
 
 {
-	int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+	unsigned int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
 	struct kbase_va_region *reg;
 	int err = 0;
 
@@ -2775,7 +2810,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 
 	/* adjust down nr_pages to what we have physically */
 	*nr_pages = kbase_reg_current_backed_size(reg);
-
 	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
 			   reg->nr_pages, 1, mmu_sync_info) != 0) {
 		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
@@ -3016,6 +3050,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx,
 	}
 }
 
+/**
+ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for
+ *                                                array of physical pages
+ *
+ * @pages:      Array of pages.
+ * @page_count: Number of pages.
+ * @flags:      Region flags.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is incremented by 1. Errors are handled by making pages
+ * not movable. Permanent kernel mappings will be marked as not movable, too.
+ */
+static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages,
+							 size_t page_count, unsigned long flags)
+{
+	size_t i;
+
+	for (i = 0; i < page_count; i++) {
+		struct page *p = as_page(pages[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(pages[i]) || is_partial(pages[i]))
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		/* Mark permanent kernel mappings as NOT_MOVABLE because they're likely
+		 * to stay mapped for a long time. However, keep on counting the number
+		 * of mappings even for them: they don't represent an exception for the
+		 * vmap_count.
+		 *
+		 * At the same time, errors need to be handled if a client tries to add
+		 * too many mappings, hence a page may end up in the NOT_MOVABLE state
+		 * anyway even if it's not a permanent kernel mapping.
+		 */
+		if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING)
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		if (page_md->vmap_count < U8_MAX)
+			page_md->vmap_count++;
+		else
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
+/**
+ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for
+ *                                                  array of physical pages
+ *
+ * @pages:      Array of pages.
+ * @page_count: Number of pages.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is decremented by 1. Errors are handled by making pages
+ * not movable.
+ */
+static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages,
+							   size_t page_count)
+{
+	size_t i;
+
+	for (i = 0; i < page_count; i++) {
+		struct page *p = as_page(pages[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(pages[i]) || is_partial(pages[i]))
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		/* Decrement the number of mappings for all kinds of pages, including
+		 * pages which are NOT_MOVABLE (e.g. permanent kernel mappings).
+		 * However, errors still need to be handled if a client tries to remove
+		 * more mappings than created.
+		 */
+		if (page_md->vmap_count == 0)
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		else
+			page_md->vmap_count--;
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
 static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
 				u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
 				kbase_vmap_flag vmap_flags)
@@ -3088,6 +3215,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 	 */
 	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
 
+	/* If page migration is enabled, increment the number of VMA mappings
+	 * of all physical pages. In case of errors, e.g. too many mappings,
+	 * make the page not movable to prevent trouble.
+	 */
+	if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+		kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
+
 	kfree(pages);
 
 	if (!cpu_addr)
@@ -3111,6 +3245,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 		atomic_add(page_count, &kctx->permanent_mapped_pages);
 
 	kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
+
 	return 0;
 }
 
@@ -3162,6 +3297,9 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size,
 	if (kbase_is_region_invalid_or_free(reg))
 		goto out_unlock;
 
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+		goto out_unlock;
+
 	addr = kbase_vmap_reg(kctx, reg, gpu_addr, size, prot_request, map, 0u);
 
 out_unlock:
@@ -3189,6 +3327,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
 
 	vunmap(addr);
 
+	/* If page migration is enabled, decrement the number of VMA mappings
+	 * for all physical pages. Now is a good time to do it because references
+	 * haven't been released yet.
+	 */
+	if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+		const size_t page_count = PFN_UP(map->offset_in_page + map->size);
+		struct tagged_addr *pages_array = map->cpu_pages;
+
+		kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count);
+	}
+
 	if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
 		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
 	if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) {
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
index 5b12e181bf4c..6dda44b9f128 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_linux.h
@@ -284,7 +284,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_reg
  *   have been released in the mean time.
  * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region
  *   lock is now held again.
- * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the
+ * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the
  *   region lock is now held again.
  *
  * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS.
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
index 8526688b7b12..737f7da5595d 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.c
@@ -22,11 +22,11 @@
 /**
  * DOC: Base kernel page migration implementation.
  */
-
 #include <linux/migrate.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_migrate.h>
+#include <mmu/mali_kbase_mmu.h>
 
 /* Global integer used to determine if module parameter value has been
  * provided and if page migration feature is enabled.
@@ -36,7 +36,12 @@ int kbase_page_migration_enabled;
 module_param(kbase_page_migration_enabled, int, 0444);
 KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
 
-bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops;
+#endif
+
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+			       u8 group_id)
 {
 	struct kbase_page_metadata *page_md =
 		kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
@@ -48,19 +53,43 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
 	set_page_private(p, (unsigned long)page_md);
 	page_md->dma_addr = dma_addr;
 	page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS);
+	page_md->vmap_count = 0;
+	page_md->group_id = group_id;
 	spin_lock_init(&page_md->migrate_lock);
 
 	lock_page(p);
-	if (kbdev->mem_migrate.mapping) {
-		__SetPageMovable(p, kbdev->mem_migrate.mapping);
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	__SetPageMovable(p, &movable_ops);
+	page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+	/* In some corner cases, the driver may attempt to allocate memory pages
+	 * even before the device file is open and the mapping for address space
+	 * operations is created. In that case, it is impossible to assign address
+	 * space operations to memory pages: simply pretend that they are movable,
+	 * even if they are not.
+	 *
+	 * The page will go through all state transitions but it will never be
+	 * actually considered movable by the kernel. This is due to the fact that
+	 * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the
+	 * memory pool will always refuse to add it to the pool and schedule
+	 * a worker thread to free it later.
+	 *
+	 * Page metadata may seem redundant in this case, but they are not,
+	 * because memory pools expect metadata to be present when page migration
+	 * is enabled and because the pages may always return to memory pools and
+	 * gain the movable property later on in their life cycle.
+	 */
+	if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) {
+		__SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping);
 		page_md->status = PAGE_MOVABLE_SET(page_md->status);
 	}
+#endif
 	unlock_page(p);
 
 	return true;
 }
 
-static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p)
+static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id)
 {
 	struct device *const dev = kbdev->dev;
 	struct kbase_page_metadata *page_md;
@@ -70,10 +99,13 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p)
 	if (!page_md)
 		return;
 
+	if (group_id)
+		*group_id = page_md->group_id;
 	dma_addr = kbase_dma_addr(p);
 	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 
 	kfree(page_md);
+	set_page_private(p, 0);
 	ClearPagePrivate(p);
 }
 
@@ -91,6 +123,7 @@ static void kbase_free_pages_worker(struct work_struct *work)
 	spin_unlock(&mem_migrate->free_pages_lock);
 
 	list_for_each_entry_safe(p, tmp, &free_list, lru) {
+		u8 group_id = 0;
 		list_del_init(&p->lru);
 
 		lock_page(p);
@@ -101,8 +134,8 @@ static void kbase_free_pages_worker(struct work_struct *work)
 		}
 		unlock_page(p);
 
-		kbase_free_page_metadata(kbdev, p);
-		__free_pages(p, 0);
+		kbase_free_page_metadata(kbdev, p, &group_id);
+		kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0);
 	}
 }
 
@@ -115,6 +148,145 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
 	spin_unlock(&mem_migrate->free_pages_lock);
 }
 
+/**
+ * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped
+ *                                 in a PGD of kbase_mmu_table.
+ *
+ * @old_page:  Existing PGD page to remove
+ * @new_page:  Destination for migrating the existing PGD page to
+ *
+ * Replace an existing PGD page with a new page by migrating its content. More specifically:
+ * the new page shall replace the existing PGD page in the MMU page table. Before returning,
+ * the new page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property. The meta data attached to the PGD page is transferred to the
+ * new (replacement) page.
+ *
+ * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure
+ *          and the migration is aborted.
+ */
+static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+	struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	dma_addr_t old_dma_addr = page_md->dma_addr;
+	dma_addr_t new_dma_addr;
+	int ret;
+
+	/* Create a new dma map for the new page */
+	new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kbdev->dev, new_dma_addr))
+		return -ENOMEM;
+
+	/* Lock context to protect access to the page in physical allocation.
+	 * This blocks the CPU page fault handler from remapping pages.
+	 * Only MCU's mmut is device wide, i.e. no corresponding kctx.
+	 */
+	kbase_gpu_vm_lock(kctx);
+
+	ret = kbase_mmu_migrate_page(
+		as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr,
+		new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level));
+
+	if (ret == 0) {
+		dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__ClearPageMovable(old_page);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+		ClearPagePrivate(old_page);
+		put_page(old_page);
+
+		page_md = kbase_page_private(new_page);
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+		__SetPageMovable(new_page, &movable_ops);
+		page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+		if (kbdev->mem_migrate.inode->i_mapping) {
+			__SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping);
+			page_md->status = PAGE_MOVABLE_SET(page_md->status);
+		}
+#endif
+		SetPagePrivate(new_page);
+		get_page(new_page);
+	} else
+		dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Page fault handler for CPU mapping unblocked. */
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+/*
+ * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both
+ *                                        allocated and mapped.
+ *
+ * @old_page:  Page to remove.
+ * @new_page:  Page to add.
+ *
+ * Replace an old page with a new page by migrating its content and all its
+ * CPU and GPU mappings. More specifically: the new page shall replace the
+ * old page in the MMU page table, as well as in the page array of the physical
+ * allocation, which is used to create CPU mappings. Before returning, the new
+ * page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property.
+ */
+static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+	struct kbase_context *kctx = page_md->data.mapped.mmut->kctx;
+	dma_addr_t old_dma_addr, new_dma_addr;
+	int ret;
+
+	old_dma_addr = page_md->dma_addr;
+	new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr))
+		return -ENOMEM;
+
+	/* Lock context to protect access to array of pages in physical allocation.
+	 * This blocks the CPU page fault handler from remapping pages.
+	 */
+	kbase_gpu_vm_lock(kctx);
+
+	/* Unmap the old physical range. */
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT,
+			    PAGE_SIZE, 1);
+
+	ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)),
+				     as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr,
+				     MIDGARD_MMU_BOTTOMLEVEL);
+
+	if (ret == 0) {
+		dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		SetPagePrivate(new_page);
+		get_page(new_page);
+
+		/* Clear PG_movable from the old page and release reference. */
+		ClearPagePrivate(old_page);
+		__ClearPageMovable(old_page);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+		put_page(old_page);
+
+		page_md = kbase_page_private(new_page);
+		/* Set PG_movable to the new page. */
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+		__SetPageMovable(new_page, &movable_ops);
+		page_md->status = PAGE_MOVABLE_SET(page_md->status);
+#else
+		if (kctx->kbdev->mem_migrate.inode->i_mapping) {
+			__SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping);
+			page_md->status = PAGE_MOVABLE_SET(page_md->status);
+		}
+#endif
+	} else
+		dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Page fault handler for CPU mapping unblocked. */
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
 /**
  * kbase_page_isolate - Isolate a page for migration.
  *
@@ -133,6 +305,9 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 
 	CSTD_UNUSED(mode);
 
+	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+		return false;
+
 	if (!spin_trylock(&page_md->migrate_lock))
 		return false;
 
@@ -152,17 +327,29 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 		atomic_inc(&mem_pool->isolation_in_progress_cnt);
 		break;
 	case ALLOCATED_MAPPED:
+		/* Mark the page into isolated state, but only if it has no
+		 * kernel CPU mappings
+		 */
+		if (page_md->vmap_count == 0)
+			page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
+		break;
 	case PT_MAPPED:
-		/* Only pages in a memory pool can be isolated for now. */
+		/* Mark the page into isolated state. */
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
 		break;
 	case SPILL_IN_PROGRESS:
 	case ALLOCATE_IN_PROGRESS:
 	case FREE_IN_PROGRESS:
-		/* Transitory state: do nothing. */
+		break;
+	case NOT_MOVABLE:
+		/* Opportunistically clear the movable property for these pages */
+		__ClearPageMovable(p);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 		break;
 	default:
 		/* State should always fall in one of the previous cases!
-		 * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because
+		 * Also notice that FREE_ISOLATED_IN_PROGRESS or
+		 * FREE_PT_ISOLATED_IN_PROGRESS is impossible because
 		 * that state only applies to pages that are already isolated.
 		 */
 		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
@@ -210,17 +397,31 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
  *
  * Return: 0 on success, error code otherwise.
  */
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 static int kbase_page_migrate(struct address_space *mapping, struct page *new_page,
 			      struct page *old_page, enum migrate_mode mode)
+#else
+static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode)
+#endif
 {
 	int err = 0;
 	bool status_mem_pool = false;
+	bool status_free_pt_isolated_in_progress = false;
+	bool status_free_isolated_in_progress = false;
+	bool status_pt_mapped = false;
+	bool status_mapped = false;
+	bool status_not_movable = false;
 	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
-	struct kbase_device *kbdev;
+	struct kbase_device *kbdev = NULL;
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 	CSTD_UNUSED(mapping);
+#endif
 	CSTD_UNUSED(mode);
 
+	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+		return -EINVAL;
+
 	if (!spin_trylock(&page_md->migrate_lock))
 		return -EAGAIN;
 
@@ -235,10 +436,22 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 		kbdev = page_md->data.mem_pool.kbdev;
 		break;
 	case ALLOCATED_MAPPED:
+		status_mapped = true;
+		break;
 	case PT_MAPPED:
+		status_pt_mapped = true;
+		break;
 	case FREE_ISOLATED_IN_PROGRESS:
-	case MULTI_MAPPED:
-		/* So far, only pages in a memory pool can be migrated. */
+		status_free_isolated_in_progress = true;
+		kbdev = page_md->data.free_isolated.kbdev;
+		break;
+	case FREE_PT_ISOLATED_IN_PROGRESS:
+		status_free_pt_isolated_in_progress = true;
+		kbdev = page_md->data.free_pt_isolated.kbdev;
+		break;
+	case NOT_MOVABLE:
+		status_not_movable = true;
+		break;
 	default:
 		/* State should always fall in one of the previous cases! */
 		err = -EAGAIN;
@@ -247,18 +460,37 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 
 	spin_unlock(&page_md->migrate_lock);
 
-	if (status_mem_pool) {
+	if (status_mem_pool || status_free_isolated_in_progress ||
+	    status_free_pt_isolated_in_progress) {
 		struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
-		kbase_free_page_metadata(kbdev, old_page);
+		kbase_free_page_metadata(kbdev, old_page, NULL);
 		__ClearPageMovable(old_page);
 		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+		put_page(old_page);
 
 		/* Just free new page to avoid lock contention. */
 		INIT_LIST_HEAD(&new_page->lru);
+		get_page(new_page);
 		set_page_private(new_page, 0);
 		kbase_free_page_later(kbdev, new_page);
 		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	} else if (status_not_movable) {
+		err = -EINVAL;
+	} else if (status_mapped) {
+		err = kbasep_migrate_page_allocated_mapped(old_page, new_page);
+	} else if (status_pt_mapped) {
+		err = kbasep_migrate_page_pt_mapped(old_page, new_page);
+	}
+
+	/* While we want to preserve the movability of pages for which we return
+	 * EAGAIN, according to the kernel docs, movable pages for which a critical
+	 * error is returned are called putback on, which may not be what we
+	 * expect.
+	 */
+	if (err < 0 && err != -EAGAIN) {
+		__ClearPageMovable(old_page);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 	}
 
 	return err;
@@ -277,13 +509,23 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 static void kbase_page_putback(struct page *p)
 {
 	bool status_mem_pool = false;
+	bool status_free_isolated_in_progress = false;
+	bool status_free_pt_isolated_in_progress = false;
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
-	struct kbase_device *kbdev;
+	struct kbase_device *kbdev = NULL;
+
+	/* If we don't have page metadata, the page may not belong to the
+	 * driver or may already have been freed, and there's nothing we can do
+	 */
+	if (!page_md)
+		return;
 
 	spin_lock(&page_md->migrate_lock);
 
-	/* Page must have been isolated to reach here but metadata is incorrect. */
-	WARN_ON(!IS_PAGE_ISOLATED(page_md->status));
+	if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) {
+		spin_unlock(&page_md->migrate_lock);
+		return;
+	}
 
 	switch (PAGE_STATUS_GET(page_md->status)) {
 	case MEM_POOL:
@@ -291,11 +533,22 @@ static void kbase_page_putback(struct page *p)
 		kbdev = page_md->data.mem_pool.kbdev;
 		break;
 	case ALLOCATED_MAPPED:
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+		break;
 	case PT_MAPPED:
-	case FREE_ISOLATED_IN_PROGRESS:
-		/* Only pages in a memory pool can be isolated for now.
-		 * Therefore only pages in a memory pool can be 'putback'.
+	case NOT_MOVABLE:
+		/* Pages should no longer be isolated if they are in a stable state
+		 * and used by the driver.
 		 */
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+		break;
+	case FREE_ISOLATED_IN_PROGRESS:
+		status_free_isolated_in_progress = true;
+		kbdev = page_md->data.free_isolated.kbdev;
+		break;
+	case FREE_PT_ISOLATED_IN_PROGRESS:
+		status_free_pt_isolated_in_progress = true;
+		kbdev = page_md->data.free_pt_isolated.kbdev;
 		break;
 	default:
 		/* State should always fall in one of the previous cases! */
@@ -304,35 +557,59 @@ static void kbase_page_putback(struct page *p)
 
 	spin_unlock(&page_md->migrate_lock);
 
-	/* If page was in a memory pool then just free it to avoid lock contention. */
-	if (!WARN_ON(!status_mem_pool)) {
-		struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
-
+	/* If page was in a memory pool then just free it to avoid lock contention. The
+	 * same is also true to status_free_pt_isolated_in_progress.
+	 */
+	if (status_mem_pool || status_free_isolated_in_progress ||
+	    status_free_pt_isolated_in_progress) {
 		__ClearPageMovable(p);
 		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
-		list_del_init(&p->lru);
-		kbase_free_page_later(kbdev, p);
-		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+
+		if (!WARN_ON_ONCE(!kbdev)) {
+			struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+			kbase_free_page_later(kbdev, p);
+			queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+		}
 	}
 }
 
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops = {
+	.isolate_page = kbase_page_isolate,
+	.migrate_page = kbase_page_migrate,
+	.putback_page = kbase_page_putback,
+};
+#else
 static const struct address_space_operations kbase_address_space_ops = {
 	.isolate_page = kbase_page_isolate,
 	.migratepage = kbase_page_migrate,
 	.putback_page = kbase_page_putback,
 };
+#endif
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp)
 {
+	mutex_lock(&kbdev->fw_load_lock);
+
 	if (filp) {
 		filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops;
 
-		if (!kbdev->mem_migrate.mapping)
-			kbdev->mem_migrate.mapping = filp->f_inode->i_mapping;
-		else
-			WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping);
+		if (!kbdev->mem_migrate.inode) {
+			kbdev->mem_migrate.inode = filp->f_inode;
+			/* This reference count increment is balanced by iput()
+			 * upon termination.
+			 */
+			atomic_inc(&filp->f_inode->i_count);
+		} else {
+			WARN_ON(kbdev->mem_migrate.inode != filp->f_inode);
+		}
 	}
+
+	mutex_unlock(&kbdev->fw_load_lock);
 }
+#endif
 
 void kbase_mem_migrate_init(struct kbase_device *kbdev)
 {
@@ -344,6 +621,9 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
 	spin_lock_init(&mem_migrate->free_pages_lock);
 	INIT_LIST_HEAD(&mem_migrate->free_pages_list);
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	mem_migrate->inode = NULL;
+#endif
 	mem_migrate->free_pages_workq =
 		alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
 	INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker);
@@ -355,4 +635,7 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev)
 
 	if (mem_migrate->free_pages_workq)
 		destroy_workqueue(mem_migrate->free_pages_workq);
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	iput(mem_migrate->inode);
+#endif
 }
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
index d4796327b8d7..76bbc999e110 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_migrate.h
@@ -50,6 +50,8 @@ extern int kbase_page_migration_enabled;
  * @kbdev:    Pointer to kbase device.
  * @p:        Page to assign metadata to.
  * @dma_addr: DMA address mapped to paged.
+ * @group_id: Memory group ID associated with the entity that is
+ *            allocating the page metadata.
  *
  * This will allocate memory for the page's metadata, initialize it and
  * assign a reference to the page's private field. Importantly, once
@@ -58,7 +60,8 @@ extern int kbase_page_migration_enabled;
  *
  * Return: true if successful or false otherwise.
  */
-bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr);
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+			       u8 group_id);
 
 /**
  * kbase_free_page_later - Defer freeing of given page.
@@ -70,6 +73,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
  */
 void kbase_free_page_later(struct kbase_device *kbdev, struct page *p);
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 /*
  * kbase_mem_migrate_set_address_space_ops - Set address space operations
  *
@@ -81,6 +85,7 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p);
  * add a reference to @kbdev.
  */
 void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp);
+#endif
 
 /*
  * kbase_mem_migrate_init - Initialise kbase page migration
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
index dce066db7385..75569cc51c52 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_mem_pool.c
@@ -57,37 +57,59 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
 	return kbase_mem_pool_size(pool) == 0;
 }
 
-static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p,
+static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p,
 				       struct list_head *page_list, size_t *list_size)
 {
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
+	bool not_movable = false;
 
 	lockdep_assert_held(&pool->pool_lock);
 
+	/* Free the page instead of adding it to the pool if it's not movable.
+	 * Only update page status and add the page to the memory pool if
+	 * it is not isolated.
+	 */
 	spin_lock(&page_md->migrate_lock);
-	/* Only update page status and add the page to the memory pool if it is not isolated */
-	if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) {
+	if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+		not_movable = true;
+	} else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
 		page_md->data.mem_pool.pool = pool;
 		page_md->data.mem_pool.kbdev = pool->kbdev;
-		list_move(&p->lru, page_list);
+		list_add(&p->lru, page_list);
 		(*list_size)++;
 	}
 	spin_unlock(&page_md->migrate_lock);
+
+	if (not_movable) {
+		kbase_free_page_later(pool->kbdev, p);
+		pool_dbg(pool, "skipping a not movable page\n");
+	}
+
+	return not_movable;
 }
 
 static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
 		struct page *p)
 {
+	bool queue_work_to_free = false;
+
 	lockdep_assert_held(&pool->pool_lock);
 
-	if (!pool->order && kbase_page_migration_enabled)
-		set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size);
-	else {
+	if (!pool->order && kbase_page_migration_enabled) {
+		if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+			queue_work_to_free = true;
+	} else {
 		list_add(&p->lru, &pool->page_list);
 		pool->cur_size++;
 	}
 
+	if (queue_work_to_free) {
+		struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	}
+
 	pool_dbg(pool, "added page\n");
 }
 
@@ -101,18 +123,29 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
 static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
 		struct list_head *page_list, size_t nr_pages)
 {
+	bool queue_work_to_free = false;
+
 	lockdep_assert_held(&pool->pool_lock);
 
 	if (!pool->order && kbase_page_migration_enabled) {
 		struct page *p, *tmp;
 
-		list_for_each_entry_safe(p, tmp, page_list, lru)
-			set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size);
+		list_for_each_entry_safe(p, tmp, page_list, lru) {
+			list_del_init(&p->lru);
+			if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+				queue_work_to_free = true;
+		}
 	} else {
 		list_splice(page_list, &pool->page_list);
 		pool->cur_size += nr_pages;
 	}
 
+	if (queue_work_to_free) {
+		struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	}
+
 	pool_dbg(pool, "added %zu pages\n", nr_pages);
 }
 
@@ -226,7 +259,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 	/* Setup page metadata for 4KB pages when page migration is enabled */
 	if (!pool->order && kbase_page_migration_enabled) {
 		INIT_LIST_HEAD(&p->lru);
-		if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) {
+		if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) {
 			dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 			kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p,
 							  pool->order);
@@ -251,7 +284,14 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool)
 
 void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p)
 {
-	struct kbase_device *kbdev = pool->kbdev;
+	struct kbase_device *kbdev;
+
+	if (WARN_ON(!pool))
+		return;
+	if (WARN_ON(!p))
+		return;
+
+	kbdev = pool->kbdev;
 
 	if (!pool->order && kbase_page_migration_enabled) {
 		kbase_free_page_later(kbdev, p);
@@ -460,7 +500,11 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
 	 * struct shrinker does not define batch
 	 */
 	pool->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(&pool->reclaim);
+#else
+	register_shrinker(&pool->reclaim, "mali-mem-pool");
+#endif
 
 	pool_dbg(pool, "initialized\n");
 
@@ -499,14 +543,16 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
 		/* Zero pages first without holding the next_pool lock */
 		for (i = 0; i < nr_to_spill; i++) {
 			p = kbase_mem_pool_remove_locked(pool, SPILL_IN_PROGRESS);
-			list_add(&p->lru, &spill_list);
+			if (p)
+				list_add(&p->lru, &spill_list);
 		}
 	}
 
 	while (!kbase_mem_pool_is_empty(pool)) {
 		/* Free remaining pages to kernel */
 		p = kbase_mem_pool_remove_locked(pool, FREE_IN_PROGRESS);
-		list_add(&p->lru, &free_list);
+		if (p)
+			list_add(&p->lru, &free_list);
 	}
 
 	kbase_mem_pool_unlock(pool);
@@ -558,17 +604,10 @@ struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool)
 
 struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool)
 {
-	struct page *p;
-
 	lockdep_assert_held(&pool->pool_lock);
 
 	pool_dbg(pool, "alloc_locked()\n");
-	p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
-
-	if (p)
-		return p;
-
-	return NULL;
+	return kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
 }
 
 void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p,
@@ -636,10 +675,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	/* Get pages from this pool */
 	kbase_mem_pool_lock(pool);
 	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+
 	while (nr_from_pool--) {
 		int j;
 
 		p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
+
 		if (pool->order) {
 			pages[i++] = as_tagged_tag(page_to_phys(p),
 						   HUGE_HEAD | HUGE_PAGE);
@@ -867,7 +908,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 			pages[i] = as_tagged(0);
 			continue;
 		}
-
 		p = as_page(pages[i]);
 
 		kbase_mem_pool_free_page(pool, p);
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
index b0c5126afcbe..212a61f68372 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_softjobs.c
@@ -502,6 +502,7 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
 {
 	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
@@ -673,8 +674,8 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
 		case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
 		{
 			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
-			unsigned long nr_pages =
-				alloc->imported.user_buf.nr_pages;
+			const unsigned long nr_pages = alloc->imported.user_buf.nr_pages;
+			const unsigned long start = alloc->imported.user_buf.address;
 
 			if (alloc->imported.user_buf.mm != current->mm) {
 				ret = -EINVAL;
@@ -686,11 +687,9 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
 				ret = -ENOMEM;
 				goto out_unlock;
 			}
-
-			ret = get_user_pages_fast(
-					alloc->imported.user_buf.address,
-					nr_pages, 0,
-					buffers[i].extres_pages);
+			kbase_gpu_vm_unlock(katom->kctx);
+			ret = get_user_pages_fast(start, nr_pages, 0, buffers[i].extres_pages);
+			kbase_gpu_vm_lock(katom->kctx);
 			if (ret != nr_pages) {
 				/* Adjust number of pages, so that we only
 				 * attempt to release pages in the array that we
@@ -728,7 +727,6 @@ out_cleanup:
 
 	return ret;
 }
-#endif /* !MALI_USE_CSF */
 
 #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
 static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
@@ -760,8 +758,18 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
 }
 #endif
 
-int kbase_mem_copy_from_extres(struct kbase_context *kctx,
-		struct kbase_debug_copy_buffer *buf_data)
+/**
+ * kbase_mem_copy_from_extres() - Copy from external resources.
+ *
+ * @kctx:	kbase context within which the copying is to take place.
+ * @buf_data:	Pointer to the information about external resources:
+ *		pages pertaining to the external resource, number of
+ *		pages to copy.
+ *
+ * Return:      0 on success, error code otherwise.
+ */
+static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
+				      struct kbase_debug_copy_buffer *buf_data)
 {
 	unsigned int i;
 	unsigned int target_page_nr = 0;
@@ -848,7 +856,6 @@ out_unlock:
 	return ret;
 }
 
-#if !MALI_USE_CSF
 static int kbase_debug_copy(struct kbase_jd_atom *katom)
 {
 	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
@@ -866,6 +873,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom)
 
 	return 0;
 }
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
 #endif /* !MALI_USE_CSF */
 
 #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7)
@@ -963,11 +971,6 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 			ret = -EINVAL;
 			goto free_info;
 		}
-		/* Clear any remaining bytes when user struct is smaller than
-		 * kernel struct. For jit version 1, this also clears the
-		 * padding bytes
-		 */
-		memset(((u8 *)info) + sizeof(*info), 0, sizeof(*info) - sizeof(*info));
 
 		ret = kbasep_jit_alloc_validate(kctx, info);
 		if (ret)
@@ -1541,6 +1544,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 	case BASE_JD_REQ_SOFT_EVENT_RESET:
 		kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET);
 		break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 	case BASE_JD_REQ_SOFT_DEBUG_COPY:
 	{
 		int res = kbase_debug_copy(katom);
@@ -1549,6 +1553,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		break;
 	}
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
 	case BASE_JD_REQ_SOFT_JIT_ALLOC:
 		ret = kbase_jit_allocate_process(katom);
 		break;
@@ -1654,8 +1659,10 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
 		if (katom->jc == 0)
 			return -EINVAL;
 		break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 	case BASE_JD_REQ_SOFT_DEBUG_COPY:
 		return kbase_debug_copy_prepare(katom);
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
 	case BASE_JD_REQ_SOFT_EXT_RES_MAP:
 		return kbase_ext_res_prepare(katom);
 	case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
@@ -1687,9 +1694,11 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
 		kbase_sync_fence_in_remove(katom);
 		break;
 #endif /* CONFIG_SYNC_FILE */
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 	case BASE_JD_REQ_SOFT_DEBUG_COPY:
 		kbase_debug_copy_finish(katom);
 		break;
+#endif /* IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST */
 	case BASE_JD_REQ_SOFT_JIT_ALLOC:
 		kbase_jit_allocate_finish(katom);
 		break;
diff --git a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
index 853c89796d44..d770913e9da5 100644
--- a/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/bifrost/mali_kbase_vinstr.c
@@ -41,6 +41,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/workqueue.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* Hwcnt reader API version */
 #define HWCNT_READER_API 1
 
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
index db2086079c14..4a0926531af2 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_csf.c
@@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr,
 		 * context's address space, when the page fault occurs for
 		 * MCU's address space.
 		 */
-		if (!queue_work(as->pf_wq, &as->work_pagefault))
-			kbase_ctx_sched_release_ctx(kctx);
-		else {
+		if (!queue_work(as->pf_wq, &as->work_pagefault)) {
 			dev_dbg(kbdev->dev,
-				"Page fault is already pending for as %u\n",
-				as_nr);
+				"Page fault is already pending for as %u", as_nr);
+			kbase_ctx_sched_release_ctx(kctx);
+		} else {
 			atomic_inc(&kbdev->faults_pending);
 		}
 	}
@@ -552,14 +551,14 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt);
 
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 {
 	kbdev->as[i].number = i;
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
 	kbdev->as[i].gf_data.addr = 0ULL;
 
-	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
 	if (!kbdev->as[i].pf_wq)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
index 22786f0748ce..83605c3dc56f 100644
--- a/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bifrost/mmu/backend/mali_kbase_mmu_jm.c
@@ -328,7 +328,7 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 
 	while (bf_bits | pf_bits) {
 		struct kbase_as *as;
-		int as_no;
+		unsigned int as_no;
 		struct kbase_context *kctx;
 		struct kbase_fault *fault;
 
@@ -423,13 +423,13 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx,
 	return kbase_job_slot_softstop_start_rp(kctx, reg);
 }
 
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 {
 	kbdev->as[i].number = i;
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
 
-	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
 	if (!kbdev->as[i].pf_wq)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
index 8f261d439909..3131d57ef330 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
+#include <linux/migrate.h>
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -156,7 +157,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 	} else if (op == KBASE_MMU_OP_FLUSH_MEM) {
 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
 	} else {
-		dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op);
+		dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
 		return;
 	}
 
@@ -167,7 +168,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 		 * perform a reset to recover
 		 */
 		dev_err(kbdev->dev,
-			"Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n");
+			"Flush for physical address range did not complete. Issuing GPU soft-reset to recover");
 
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
@@ -230,9 +231,8 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
 		 */
 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
 
-		if (kbase_prepare_to_reset_gpu(
-			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu(kbdev);
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+			kbase_reset_gpu_locked(kbdev);
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -326,7 +326,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct
 		 * perform a reset to recover.
 		 */
 		dev_err(kbdev->dev,
-			"Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+			"Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
 
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
@@ -340,15 +340,7 @@ static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_cont
 				   phys_addr_t phys, size_t size,
 				   enum kbase_mmu_op_type flush_op)
 {
-#if MALI_USE_CSF
-	unsigned long irq_flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
-	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
-	    kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
-		mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
-#endif
+	kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
 }
 
 static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
@@ -398,9 +390,9 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context
  *        a 4kB physical page.
  */
 
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
-					   struct tagged_addr *phys, size_t nr, unsigned long flags,
-					   int group_id, u64 *dirty_pgds);
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+					   u64 vpfn, struct tagged_addr *phys, size_t nr,
+					   unsigned long flags, int group_id, u64 *dirty_pgds);
 
 /**
  * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
@@ -420,6 +412,65 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  u64 vpfn, int level,
 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
 						  struct list_head *free_pgds_list);
+
+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+	atomic_sub(1, &kbdev->memdev.used_pages);
+
+	/* If MMU tables belong to a context then pages will have been accounted
+	 * against it, so we must decrement the usage counts here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		atomic_sub(1, &mmut->kctx->used_pages);
+	}
+
+	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+}
+
+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
+					       struct kbase_mmu_table *mmut,
+					       struct page *p)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(p);
+	bool page_is_isolated = false;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	if (!kbase_page_migration_enabled)
+		return false;
+
+	spin_lock(&page_md->migrate_lock);
+	if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
+		WARN_ON_ONCE(!mmut->kctx);
+		if (IS_PAGE_ISOLATED(page_md->status)) {
+			page_md->status = PAGE_STATUS_SET(page_md->status,
+							  FREE_PT_ISOLATED_IN_PROGRESS);
+			page_md->data.free_pt_isolated.kbdev = kbdev;
+			page_is_isolated = true;
+		} else {
+			page_md->status =
+				PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
+		}
+	} else {
+		WARN_ON_ONCE(mmut->kctx);
+		WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
+	}
+	spin_unlock(&page_md->migrate_lock);
+
+	if (unlikely(page_is_isolated)) {
+		/* Do the CPU cache flush and accounting here for the isolated
+		 * PGD page, which is done inside kbase_mmu_free_pgd() for the
+		 * PGD page that did not get isolated.
+		 */
+		dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		kbase_mmu_account_freed_pgd(kbdev, mmut);
+	}
+
+	return page_is_isolated;
+}
+
 /**
  * kbase_mmu_free_pgd() - Free memory of the page directory
  *
@@ -441,17 +492,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
 
 	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
 
-	atomic_sub(1, &kbdev->memdev.used_pages);
-
-	/* If MMU tables belong to a context then pages will have been accounted
-	 * against it, so we must decrement the usage counts here.
-	 */
-	if (mmut->kctx) {
-		kbase_process_page_usage_dec(mmut->kctx, 1);
-		atomic_sub(1, &mmut->kctx->used_pages);
-	}
-
-	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+	kbase_mmu_account_freed_pgd(kbdev, mmut);
 }
 
 /**
@@ -482,6 +523,20 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm
 	mutex_unlock(&mmut->mmu_lock);
 }
 
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev,
+					    struct kbase_mmu_table *mmut,
+					    struct page *p, struct list_head *free_pgds_list)
+{
+	bool page_is_isolated = false;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+
+	if (likely(!page_is_isolated))
+		list_add(&p->lru, free_pgds_list);
+}
+
 /**
  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
  *                               a region on a GPU page fault
@@ -509,7 +564,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 	if (!multiple) {
 		dev_warn(
 			kbdev->dev,
-			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 		return minimum_extra;
 	}
@@ -692,8 +747,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 	}
 
 	/* Now make this faulting page writable to GPU. */
-	kbase_mmu_update_pages_no_flush(kctx, fault_pfn, fault_phys_addr, 1, region->flags,
-					region->gpu_alloc->group_id, &dirty_pgds);
+	kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
+					region->flags, region->gpu_alloc->group_id, &dirty_pgds);
 
 	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
 					       kctx->id, dirty_pgds);
@@ -917,7 +972,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
 			 */
 			dev_warn(
 				kctx->kbdev->dev,
-				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n",
+				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
 				new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
 				total_mempools_free_4k);
 			*pages_to_grow = 0;
@@ -985,9 +1040,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	as_no = faulting_as->number;
 
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
-	dev_dbg(kbdev->dev,
-		"Entering %s %pK, fault_pfn %lld, as_no %d\n",
-		__func__, (void *)data, fault_pfn, as_no);
+	dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
+		fault_pfn, as_no);
 
 	/* Grab the context that was already refcounted in kbase_mmu_interrupt()
 	 * Therefore, it cannot be scheduled out of this AS until we explicitly
@@ -1010,8 +1064,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
-		dev_dbg(kbdev->dev,
-				"%s: GPU has been removed\n", __func__);
+		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 		goto fault_done;
 	}
 #endif
@@ -1206,8 +1259,7 @@ page_fault_retry:
 
 	/* cap to max vsize */
 	new_pages = min(new_pages, region->nr_pages - current_backed_size);
-	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
-		new_pages);
+	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
 
 	if (new_pages == 0) {
 		struct kbase_mmu_hw_op_param op_param;
@@ -1284,11 +1336,10 @@ page_fault_retry:
 		 * so the no_flush version of insert_pages is used which allows
 		 * us to unlock the MMU as we see fit.
 		 */
-		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
-						      region->start_pfn + pfn_offset,
-						      &kbase_get_gpu_phy_pages(region)[pfn_offset],
-						      new_pages, region->flags,
-						      region->gpu_alloc->group_id, &dirty_pgds);
+		err = kbase_mmu_insert_pages_no_flush(
+			kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+			&kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
+			region->gpu_alloc->group_id, &dirty_pgds, region, false);
 		if (err) {
 			kbase_free_phy_pages_helper(region->gpu_alloc,
 					new_pages);
@@ -1314,16 +1365,11 @@ page_fault_retry:
 		if (region->threshold_pages &&
 			kbase_reg_current_backed_size(region) >
 				region->threshold_pages) {
-
-			dev_dbg(kctx->kbdev->dev,
-				"%zu pages exceeded IR threshold %zu\n",
-				new_pages + current_backed_size,
-				region->threshold_pages);
+			dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
+				new_pages + current_backed_size, region->threshold_pages);
 
 			if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
-				dev_dbg(kctx->kbdev->dev,
-					"Get region %pK for IR\n",
-					(void *)region);
+				dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
 				kbase_va_region_alloc_get(kctx, region);
 			}
 		}
@@ -1441,7 +1487,7 @@ page_fault_retry:
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 					"Page allocation failure", fault);
 		} else {
-			dev_dbg(kbdev->dev, "Try again after pool_grow\n");
+			dev_dbg(kbdev->dev, "Try again after pool_grow");
 			goto page_fault_retry;
 		}
 	}
@@ -1468,7 +1514,7 @@ fault_done:
 	release_ctx(kbdev, kctx);
 
 	atomic_dec(&kbdev->faults_pending);
-	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
+	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
 }
 
 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
@@ -1532,11 +1578,10 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 			    u64 *dirty_pgds)
 {
 	u64 *page;
+	u64 pgd_vpfn = vpfn;
 	phys_addr_t target_pgd;
 	struct page *p;
 
-	KBASE_DEBUG_ASSERT(*pgd);
-
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	/*
@@ -1549,7 +1594,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	p = pfn_to_page(PFN_DOWN(*pgd));
 	page = kmap(p);
 	if (page == NULL) {
-		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return -EINVAL;
 	}
 
@@ -1559,8 +1604,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 
 		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
 		if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
-			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
-					__func__);
+			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__);
 			kunmap(p);
 			return -ENOMEM;
 		}
@@ -1585,9 +1629,32 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 		 * GPU cache is still needed. For explanation, please refer
 		 * the comment in kbase_mmu_insert_pages_no_flush().
 		 */
-		kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)),
-				   kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64),
-				   KBASE_MMU_OP_FLUSH_PT);
+		kbase_mmu_sync_pgd(kbdev, mmut->kctx,
+				   *pgd + (vpfn * sizeof(u64)),
+				   kbase_dma_addr(p) + (vpfn * sizeof(u64)),
+				   sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+
+		/* Update the new target_pgd page to its stable state */
+		if (kbase_page_migration_enabled) {
+			struct kbase_page_metadata *page_md =
+				kbase_page_private(phys_to_page(target_pgd));
+
+			spin_lock(&page_md->migrate_lock);
+
+			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+				     IS_PAGE_ISOLATED(page_md->status));
+
+			if (mmut->kctx) {
+				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+				page_md->data.pt_mapped.mmut = mmut;
+				page_md->data.pt_mapped.pgd_vpfn_level =
+					PGD_VPFN_LEVEL_SET(pgd_vpfn, level);
+			} else {
+				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+			}
+
+			spin_unlock(&page_md->migrate_lock);
+		}
 	} else {
 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
@@ -1618,9 +1685,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
 			mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
 		/* Handle failure condition */
 		if (err) {
-			dev_dbg(kbdev->dev,
-				 "%s: mmu_get_next_pgd failure at level %d\n",
-				 __func__, l);
+			dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__,
+				l);
 			return err;
 		}
 	}
@@ -1640,7 +1706,8 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table
 static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, u64 from_vpfn,
 					      u64 to_vpfn, u64 *dirty_pgds,
-					      struct list_head *free_pgds_list)
+					      struct list_head *free_pgds_list,
+					      struct tagged_addr *phys, bool ignore_page_migration)
 {
 	u64 vpfn = from_vpfn;
 	struct kbase_mmu_mode const *mmu_mode;
@@ -1693,8 +1760,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 			pcount = count;
 			break;
 		default:
-			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
-			       __func__, level);
+			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
 			goto next;
 		}
 
@@ -1713,7 +1779,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		if (!num_of_valid_entries) {
 			kunmap(p);
 
-			list_add(&p->lru, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
 							      KBASE_MMU_OP_NONE, dirty_pgds,
@@ -1734,12 +1800,34 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 next:
 		vpfn += count;
 	}
+
+	/* If page migration is enabled: the only way to recover from failure
+	 * is to mark all pages as not movable. It is not predictable what's
+	 * going to happen to these pages at this stage. They might return
+	 * movable once they are returned to a memory pool.
+	 */
+	if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
+		const u64 num_pages = to_vpfn - from_vpfn + 1;
+		u64 i;
+
+		for (i = 0; i < num_pages; i++) {
+			struct page *phys_page = as_page(phys[i]);
+			struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+			if (page_md) {
+				spin_lock(&page_md->migrate_lock);
+				page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+				spin_unlock(&page_md->migrate_lock);
+			}
+		}
+	}
 }
 
 static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, const u64 vpfn,
 					      size_t nr, u64 dirty_pgds,
-					      enum kbase_caller_mmu_sync_info mmu_sync_info)
+					      enum kbase_caller_mmu_sync_info mmu_sync_info,
+					      bool insert_pages_failed)
 {
 	struct kbase_mmu_hw_op_param op_param;
 	int as_nr = 0;
@@ -1764,8 +1852,12 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
 	 *
 	 * Operations that affect the whole GPU cache shall only be done if it's
 	 * impossible to update physical ranges.
+	 *
+	 * On GPUs where flushing by physical address range is supported,
+	 * full cache flush is done when an error occurs during
+	 * insert_pages() to keep the error handling simpler.
 	 */
-	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
 		mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
 	else
 		mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
@@ -1806,6 +1898,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 	if (nr == 0)
 		return 0;
 
+	/* If page migration is enabled, pages involved in multiple GPU mappings
+	 * are always treated as not movable.
+	 */
+	if (kbase_page_migration_enabled) {
+		struct page *phys_page = as_page(phys);
+		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+		if (page_md) {
+			spin_lock(&page_md->migrate_lock);
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+
 	mutex_lock(&kctx->mmu.mmu_lock);
 
 	while (remain) {
@@ -1842,15 +1948,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n",
-				 __func__);
+			dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__);
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
 								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list);
+								  &dirty_pgds, &free_pgds_list,
+								  NULL, true);
 			}
 			goto fail_unlock;
 		}
@@ -1858,14 +1964,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
 								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list);
+								  &dirty_pgds, &free_pgds_list,
+								  NULL, true);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -1917,7 +2024,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 	mutex_unlock(&kctx->mmu.mmu_lock);
 
 	mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
-					  mmu_sync_info);
+					  mmu_sync_info, false);
 
 	return 0;
 
@@ -1925,12 +2032,91 @@ fail_unlock:
 	mutex_unlock(&kctx->mmu.mmu_lock);
 
 	mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
-					  mmu_sync_info);
+					  mmu_sync_info, true);
 	kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
 
 	return err;
 }
 
+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
+						   struct kbase_va_region *reg,
+						   struct kbase_mmu_table *mmut, const u64 vpfn)
+{
+	struct page *phys_page = as_page(phys);
+	struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+	spin_lock(&page_md->migrate_lock);
+
+	/* If no GPU va region is given: the metadata provided are
+	 * invalid.
+	 *
+	 * If the page is already allocated and mapped: this is
+	 * an additional GPU mapping, probably to create a memory
+	 * alias, which means it is no longer possible to migrate
+	 * the page easily because tracking all the GPU mappings
+	 * would be too costly.
+	 *
+	 * In any case: the page becomes not movable. It is kept
+	 * alive, but attempts to migrate it will fail. The page
+	 * will be freed if it is still not movable when it returns
+	 * to a memory pool. Notice that the movable flag is not
+	 * cleared because that would require taking the page lock.
+	 */
+	if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+	} else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
+		page_md->data.mapped.reg = reg;
+		page_md->data.mapped.mmut = mmut;
+		page_md->data.mapped.vpfn = vpfn;
+	}
+
+	spin_unlock(&page_md->migrate_lock);
+}
+
+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
+						     struct tagged_addr *phys, size_t requested_nr)
+{
+	size_t i;
+
+	for (i = 0; i < requested_nr; i++) {
+		struct page *phys_page = as_page(phys[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(phys[i]) || is_partial(phys[i]))
+			continue;
+
+		if (page_md) {
+			u8 status;
+
+			spin_lock(&page_md->migrate_lock);
+			status = PAGE_STATUS_GET(page_md->status);
+
+			if (status == ALLOCATED_MAPPED) {
+				if (IS_PAGE_ISOLATED(page_md->status)) {
+					page_md->status = PAGE_STATUS_SET(
+						page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
+					page_md->data.free_isolated.kbdev = kbdev;
+					/* At this point, we still have a reference
+					 * to the page via its page migration metadata,
+					 * and any page with the FREE_ISOLATED_IN_PROGRESS
+					 * status will subsequently be freed in either
+					 * kbase_page_migrate() or kbase_page_putback()
+					 */
+					phys[i] = as_tagged(0);
+				} else
+					page_md->status = PAGE_STATUS_SET(page_md->status,
+									  (u8)FREE_IN_PROGRESS);
+			}
+
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+}
+
 u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
 	struct tagged_addr const phy, unsigned long const flags,
 	int const level, int const group_id)
@@ -1944,7 +2130,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int const group_id, u64 *dirty_pgds)
+				    unsigned long flags, int const group_id, u64 *dirty_pgds,
+				    struct kbase_va_region *reg, bool ignore_page_migration)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -2006,14 +2193,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		} while (!err);
 
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__);
+			dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
 								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list);
+								  &free_pgds_list, phys,
+								  ignore_page_migration);
 			}
 			goto fail_unlock;
 		}
@@ -2021,15 +2209,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure\n",
-				 __func__);
+			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
 								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list);
+								  &free_pgds_list, phys,
+								  ignore_page_migration);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -2060,6 +2248,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 
 				*target = kbase_mmu_create_ate(kbdev,
 					phys[i], flags, cur_level, group_id);
+
+				/* If page migration is enabled, this is the right time
+				 * to update the status of the page.
+				 */
+				if (kbase_page_migration_enabled && !ignore_page_migration &&
+				    !is_huge(phys[i]) && !is_partial(phys[i]))
+					kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
+									       insert_vpfn + i);
 			}
 			num_of_valid_entries += count;
 		}
@@ -2104,8 +2300,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 fail_unlock:
 	mutex_unlock(&mmut->mmu_lock);
 
-	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds,
-					  CALLER_MMU_ASYNC);
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
+					  dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
 	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
 
 	return err;
@@ -2115,11 +2311,10 @@ fail_unlock:
  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
  * number 'as_nr'.
  */
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
-			   struct kbase_mmu_table *mmut, u64 vpfn,
-			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int as_nr, int const group_id,
-			   enum kbase_caller_mmu_sync_info mmu_sync_info)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+			   int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+			   struct kbase_va_region *reg, bool ignore_page_migration)
 {
 	int err;
 	u64 dirty_pgds = 0;
@@ -2130,11 +2325,11 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 		return 0;
 
 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
-					      &dirty_pgds);
+					      &dirty_pgds, reg, ignore_page_migration);
 	if (err)
 		return err;
 
-	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info);
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
 
 	return 0;
 }
@@ -2285,7 +2480,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 				current_pgd + (index * sizeof(u64)),
 				sizeof(u64), flush_op);
 
-			list_add(&p->lru, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
 		} else {
 			current_valid_entries--;
 
@@ -2361,11 +2556,12 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
  * @mmut:     Pointer to GPU MMU page table.
  * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
  * @phys:     Array of physical pages currently mapped to the virtual
- *            pages to unmap, or NULL. This is only used for GPU cache
- *            maintenance.
+ *            pages to unmap, or NULL. This is used for GPU cache maintenance
+ *            and page migration support.
  * @nr:       Number of pages to unmap.
  * @as_nr:    Address space number, for GPU cache maintenance operations
  *            that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
  *
  * We actually discard the ATE and free the page table pages if no valid entries
  * exist in PGD.
@@ -2384,10 +2580,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
  * Return: 0 on success, otherwise an error code.
  */
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr)
+			     struct tagged_addr *phys, size_t nr, int as_nr,
+			     bool ignore_page_migration)
 {
+	const size_t requested_nr = nr;
 	u64 start_vpfn = vpfn;
-	size_t requested_nr = nr;
 	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
 	struct kbase_mmu_mode const *mmu_mode;
 	struct kbase_mmu_hw_op_param op_param;
@@ -2478,9 +2675,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 		switch (level) {
 		case MIDGARD_MMU_LEVEL(0):
 		case MIDGARD_MMU_LEVEL(1):
-			dev_warn(kbdev->dev,
-				 "%s: No support for ATEs at level %d\n",
-				 __func__, level);
+			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
+				 level);
 			kunmap(p);
 			goto out;
 		case MIDGARD_MMU_LEVEL(2):
@@ -2488,9 +2684,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 			if (count >= 512) {
 				pcount = 1;
 			} else {
-				dev_warn(kbdev->dev,
-					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
-					 __func__, count);
+				dev_warn(
+					kbdev->dev,
+					"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
+					__func__, count);
 				pcount = 0;
 			}
 			break;
@@ -2499,9 +2696,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 			pcount = count;
 			break;
 		default:
-			dev_err(kbdev->dev,
-				"%s: found non-mapped memory, early out\n",
-				__func__);
+			dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
 			vpfn += count;
 			nr -= count;
 			continue;
@@ -2530,7 +2725,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 				pgd + (index * sizeof(u64)),
 				pcount * sizeof(u64), flush_op);
 
-			list_add(&p->lru, &free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
 							      flush_op, &dirty_pgds,
@@ -2553,7 +2748,6 @@ next:
 	}
 	err = 0;
 out:
-	mutex_unlock(&mmut->mmu_lock);
 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
 	op_param = (struct kbase_mmu_hw_op_param){
 		.vpfn = start_vpfn,
@@ -2566,6 +2760,16 @@ out:
 	};
 	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
 
+	/* If page migration is enabled: the status of all physical pages involved
+	 * shall be updated, unless they are not movable. Their status shall be
+	 * updated before releasing the lock to protect against concurrent
+	 * requests to migrate the pages, if they have been isolated.
+	 */
+	if (kbase_page_migration_enabled && phys && !ignore_page_migration)
+		kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr);
+
+	mutex_unlock(&mmut->mmu_lock);
+
 	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
 
 	return err;
@@ -2574,9 +2778,11 @@ out:
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 /**
- * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries
+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
+ *                                     page table entries
  *
- * @kctx:  Kbase context
+ * @kbdev: Pointer to kbase device.
+ * @mmut:  The involved MMU table
  * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
  * @phys:  Pointer to the array of tagged physical addresses of the physical
  *         pages that are pointed to by the page table entries (that need to
@@ -2589,26 +2795,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
  * @dirty_pgds: Flags to track every level where a PGD has been updated.
  *
  * This will update page table entries that already exist on the GPU based on
- * the new flags that are passed (the physical pages pointed to by the page
- * table entries remain unchanged). It is used as a response to the changes of
- * the memory attributes.
+ * new flags and replace any existing phy pages that are passed (the PGD pages
+ * remain unchanged). It is used as a response to the changes of phys as well
+ * as the the memory attributes.
  *
  * The caller is responsible for validating the memory attributes.
  *
  * Return: 0 if the attributes data in page table entries were updated
  *         successfully, otherwise an error code.
  */
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
-					   struct tagged_addr *phys, size_t nr, unsigned long flags,
-					   int const group_id, u64 *dirty_pgds)
+static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+					   u64 vpfn, struct tagged_addr *phys, size_t nr,
+					   unsigned long flags, int const group_id, u64 *dirty_pgds)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
 	int err;
-	struct kbase_device *kbdev;
-
-	if (WARN_ON(kctx == NULL))
-		return -EINVAL;
 
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
@@ -2616,9 +2818,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 	if (nr == 0)
 		return 0;
 
-	mutex_lock(&kctx->mmu.mmu_lock);
-
-	kbdev = kctx->kbdev;
+	mutex_lock(&mmut->mmu_lock);
 
 	while (nr) {
 		unsigned int i;
@@ -2634,8 +2834,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		if (is_huge(*phys) && (index == index_in_large_page(*phys)))
 			cur_level = MIDGARD_MMU_LEVEL(2);
 
-		err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd, NULL,
-					   dirty_pgds);
+		err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds);
 		if (WARN_ON(err))
 			goto fail_unlock;
 
@@ -2662,7 +2861,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 			pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
 					*target_phys, flags, MIDGARD_MMU_LEVEL(2),
 					group_id);
-			kbase_mmu_sync_pgd(kbdev, kctx, pgd + (level_index * sizeof(u64)),
+			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
 					   kbase_dma_addr(p) + (level_index * sizeof(u64)),
 					   sizeof(u64), KBASE_MMU_OP_NONE);
 		} else {
@@ -2680,7 +2879,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 			/* MMU cache flush strategy is NONE because GPU cache maintenance
 			 * will be done by the caller.
 			 */
-			kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
+			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
 					   kbase_dma_addr(p) + (index * sizeof(u64)),
 					   count * sizeof(u64), KBASE_MMU_OP_NONE);
 		}
@@ -2698,60 +2897,446 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		kunmap(p);
 	}
 
-	mutex_unlock(&kctx->mmu.mmu_lock);
+	mutex_unlock(&mmut->mmu_lock);
 	return 0;
 
 fail_unlock:
-	mutex_unlock(&kctx->mmu.mmu_lock);
+	mutex_unlock(&mmut->mmu_lock);
 	return err;
 }
 
-int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
-			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int const group_id)
+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
+					 u64 vpfn, struct tagged_addr *phys, size_t nr,
+					 unsigned long flags, int const group_id)
 {
 	int err;
 	struct kbase_mmu_hw_op_param op_param;
 	u64 dirty_pgds = 0;
-
+	struct kbase_mmu_table *mmut;
 	/* Calls to this function are inherently asynchronous, with respect to
 	 * MMU operations.
 	 */
 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	int as_nr;
 
-	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, group_id, &dirty_pgds);
+#if !MALI_USE_CSF
+	if (unlikely(kctx == NULL))
+		return -EINVAL;
+
+	as_nr = kctx->as_nr;
+	mmut = &kctx->mmu;
+#else
+	if (kctx) {
+		mmut = &kctx->mmu;
+		as_nr = kctx->as_nr;
+	} else {
+		mmut = &kbdev->csf.mcu_mmu;
+		as_nr = MCU_AS_NR;
+	}
+#endif
+
+	err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+					      &dirty_pgds);
 
 	op_param = (const struct kbase_mmu_hw_op_param){
 		.vpfn = vpfn,
 		.nr = nr,
 		.op = KBASE_MMU_OP_FLUSH_MEM,
-		.kctx_id = kctx->id,
+		.kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
 		.mmu_sync_info = mmu_sync_info,
 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
 	};
 
-	if (mmu_flush_cache_on_gpu_ctrl(kctx->kbdev))
-		mmu_flush_invalidate_on_gpu_ctrl(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+		mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
 	else
-		mmu_flush_invalidate(kctx->kbdev, kctx, kctx->as_nr, &op_param);
+		mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
+
 	return err;
 }
 
-static void mmu_teardown_level(struct kbase_device *kbdev,
-		struct kbase_mmu_table *mmut, phys_addr_t pgd,
-		int level)
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
+			   size_t nr, unsigned long flags, int const group_id)
+{
+	if (unlikely(kctx == NULL))
+		return -EINVAL;
+
+	return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
+}
+
+#if MALI_USE_CSF
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+				   size_t nr, unsigned long flags, int const group_id)
+{
+	return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
+}
+#endif /* MALI_USE_CSF */
+
+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
+	kbdev->mmu_page_migrate_in_progress = true;
+}
+
+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
+	kbdev->mmu_page_migrate_in_progress = false;
+	/* Invoke the PM state machine, as the MMU page migration session
+	 * may have deferred a transition in L2 state machine.
+	 */
+	kbase_pm_update_state(kbdev);
+}
+
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
+	struct kbase_mmu_hw_op_param op_param;
+	struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+					       page_md->data.mapped.mmut :
+					       page_md->data.pt_mapped.mmut;
+	struct kbase_device *kbdev;
+	phys_addr_t pgd;
+	u64 *old_page, *new_page, *pgd_page, *target, vpfn;
+	int index, check_state, ret = 0;
+	unsigned long hwaccess_flags = 0;
+	unsigned int num_of_valid_entries;
+	u8 vmap_count = 0;
+
+	/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
+	 * here we skip the no kctx case, which is only used with MCU's mmut.
+	 */
+	if (!mmut->kctx)
+		return -EINVAL;
+
+	if (level > MIDGARD_MMU_BOTTOMLEVEL)
+		return -EINVAL;
+	else if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		vpfn = page_md->data.mapped.vpfn;
+	else
+		vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
+
+	kbdev = mmut->kctx->kbdev;
+	index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+
+	/* Create all mappings before copying content.
+	 * This is done as early as possible because is the only operation that may
+	 * fail. It is possible to do this before taking any locks because the
+	 * pages to migrate are not going to change and even the parent PGD is not
+	 * going to be affected by any other concurrent operation, since the page
+	 * has been isolated before migration and therefore it cannot disappear in
+	 * the middle of this function.
+	 */
+	old_page = kmap(as_page(old_phys));
+	if (!old_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
+		ret = -EINVAL;
+		goto old_page_map_error;
+	}
+
+	new_page = kmap(as_page(new_phys));
+	if (!new_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
+		ret = -EINVAL;
+		goto new_page_map_error;
+	}
+
+	/* GPU cache maintenance affects both memory content and page table,
+	 * but at two different stages. A single virtual memory page is affected
+	 * by the migration.
+	 *
+	 * Notice that the MMU maintenance is done in the following steps:
+	 *
+	 * 1) The MMU region is locked without performing any other operation.
+	 *    This lock must cover the entire migration process, in order to
+	 *    prevent any GPU access to the virtual page whose physical page
+	 *    is being migrated.
+	 * 2) Immediately after locking: the MMU region content is flushed via
+	 *    GPU control while the lock is taken and without unlocking.
+	 *    The region must stay locked for the duration of the whole page
+	 *    migration procedure.
+	 *    This is necessary to make sure that pending writes to the old page
+	 *    are finalized before copying content to the new page.
+	 * 3) Before unlocking: changes to the page table are flushed.
+	 *    Finer-grained GPU control operations are used if possible, otherwise
+	 *    the whole GPU cache shall be flushed again.
+	 *    This is necessary to make sure that the GPU accesses the new page
+	 *    after migration.
+	 * 4) The MMU region is unlocked.
+	 */
+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
+	op_param.mmu_sync_info = CALLER_MMU_ASYNC;
+	op_param.kctx_id = mmut->kctx->id;
+	op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
+	op_param.nr = 1 << ((3 - level) * 9);
+	op_param.op = KBASE_MMU_OP_FLUSH_PT;
+	/* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
+	op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+					     pgd_level_to_skip_flush(1ULL << level) :
+					     pgd_level_to_skip_flush(3ULL << level);
+
+	mutex_lock(&mmut->mmu_lock);
+
+	/* The state was evaluated before entering this function, but it could
+	 * have changed before the mmu_lock was taken. However, the state
+	 * transitions which are possible at this point are only two, and in both
+	 * cases it is a stable state progressing to a "free in progress" state.
+	 *
+	 * After taking the mmu_lock the state can no longer change: read it again
+	 * and make sure that it hasn't changed before continuing.
+	 */
+	spin_lock(&page_md->migrate_lock);
+	check_state = PAGE_STATUS_GET(page_md->status);
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		vmap_count = page_md->vmap_count;
+	spin_unlock(&page_md->migrate_lock);
+
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		if (check_state != ALLOCATED_MAPPED) {
+			dev_dbg(kbdev->dev,
+				"%s: state changed to %d (was %d), abort page migration", __func__,
+				check_state, ALLOCATED_MAPPED);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		} else if (vmap_count > 0) {
+			dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
+				__func__);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		}
+	} else {
+		if (check_state != PT_MAPPED) {
+			dev_dbg(kbdev->dev,
+				"%s: state changed to %d (was %d), abort PGD page migration",
+				__func__, check_state, PT_MAPPED);
+			WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		}
+	}
+
+	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL);
+	if (ret) {
+		dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+		goto get_pgd_at_level_error;
+	}
+
+	pgd_page = kmap(phys_to_page(pgd));
+	if (!pgd_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
+		ret = -EINVAL;
+		goto pgd_page_map_error;
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	/* Lock MMU region and flush GPU cache by using GPU control,
+	 * in order to keep MMU region locked.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
+		/* Defer the migration as L2 is in a transitional phase */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+		dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
+		ret = -EAGAIN;
+		goto l2_state_defer_out;
+	}
+	/* Prevent transitional phases in L2 by starting the transaction */
+	mmu_page_migration_transaction_begin(kbdev);
+	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+		int as_nr = mmut->kctx->as_nr;
+		struct kbase_as *as = &kbdev->as[as_nr];
+
+		ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
+		if (!ret) {
+				ret = kbase_gpu_cache_flush_and_busy_wait(
+					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+		}
+		if (ret)
+			mmu_page_migration_transaction_end(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+	if (ret < 0) {
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+		dev_err(kbdev->dev,
+			"%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.",
+			__func__);
+		goto gpu_reset;
+	}
+
+	/* Copy memory content.
+	 *
+	 * It is necessary to claim the ownership of the DMA buffer for the old
+	 * page before performing the copy, to make sure of reading a consistent
+	 * version of its content, before copying. After the copy, ownership of
+	 * the DMA buffer for the new page is given to the GPU in order to make
+	 * the content visible to potential GPU access that may happen as soon as
+	 * this function releases the lock on the MMU region.
+	 */
+	dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	memcpy(new_page, old_page, PAGE_SIZE);
+	dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Remap GPU virtual page.
+	 *
+	 * This code rests on the assumption that page migration is only enabled
+	 * for 4 kB pages, that necessarily live in the bottom level of the MMU
+	 * page table. For this reason, the PGD level tells us inequivocably
+	 * whether the page being migrated is a "content page" or another PGD
+	 * of the page table:
+	 *
+	 * - Bottom level implies ATE (Address Translation Entry)
+	 * - Any other level implies PTE (Page Table Entry)
+	 *
+	 * The current implementation doesn't handle the case of a level 0 PGD,
+	 * that is: the root PGD of the page table.
+	 */
+	target = &pgd_page[index];
+
+	/* Certain entries of a page table page encode the count of valid entries
+	 * present in that page. So need to save & restore the count information
+	 * when updating the PTE/ATE to point to the new page.
+	 */
+	num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		WARN_ON_ONCE((*target & 1UL) == 0);
+		*target =
+			kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
+					     level, page_md->data.mapped.reg->gpu_alloc->group_id);
+	} else {
+		u64 managed_pte;
+
+#ifdef CONFIG_MALI_BIFROST_DEBUG
+		/* The PTE should be pointing to the page being migrated */
+		WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
+			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
+#endif
+		kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
+		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
+	}
+
+	kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+
+	/* This function always updates a single entry inside an existing PGD,
+	 * therefore cache maintenance is necessary and affects a single entry.
+	 */
+	kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+			   kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
+			   KBASE_MMU_OP_FLUSH_PT);
+
+	/* Unlock MMU region.
+	 *
+	 * Notice that GPUs which don't issue flush commands via GPU control
+	 * still need an additional GPU cache flush here, this time only
+	 * for the page table, because the function call above to sync PGDs
+	 * won't have any effect on them.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+		int as_nr = mmut->kctx->as_nr;
+		struct kbase_as *as = &kbdev->as[as_nr];
+
+		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+			ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
+		} else {
+			ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+								  GPU_COMMAND_CACHE_CLN_INV_L2);
+			if (!ret)
+				ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+	/* Releasing locks before checking the migration transaction error state */
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	mutex_unlock(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	/* Release the transition prevention in L2 by ending the transaction */
+	mmu_page_migration_transaction_end(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+	/* Checking the final migration transaction error state */
+	if (ret < 0) {
+		dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
+		goto gpu_reset;
+	}
+
+	/* Undertaking metadata transfer, while we are holding the mmu_lock */
+	spin_lock(&page_md->migrate_lock);
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		size_t page_array_index =
+			page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
+
+		WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
+
+		/* Replace page in array of pages of the physical allocation. */
+		page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+	}
+	/* Update the new page dma_addr with the transferred metadata from the old_page */
+	page_md->dma_addr = new_dma_addr;
+	page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+	spin_unlock(&page_md->migrate_lock);
+	set_page_private(as_page(new_phys), (unsigned long)page_md);
+	/* Old page metatdata pointer cleared as it now owned by the new page */
+	set_page_private(as_page(old_phys), 0);
+
+l2_state_defer_out:
+	kunmap(phys_to_page(pgd));
+pgd_page_map_error:
+get_pgd_at_level_error:
+page_state_change_out:
+	mutex_unlock(&mmut->mmu_lock);
+
+	kunmap(as_page(new_phys));
+new_page_map_error:
+	kunmap(as_page(old_phys));
+old_page_map_error:
+	return ret;
+
+gpu_reset:
+	/* Unlock the MMU table before resetting the GPU and undo
+	 * mappings.
+	 */
+	mutex_unlock(&mmut->mmu_lock);
+	kunmap(phys_to_page(pgd));
+	kunmap(as_page(new_phys));
+	kunmap(as_page(old_phys));
+
+	/* Reset the GPU because of an unrecoverable error in locking or flushing. */
+	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu(kbdev);
+
+	return ret;
+}
+
+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+			       phys_addr_t pgd, unsigned int level)
 {
 	u64 *pgd_page;
 	int i;
 	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 	u64 *pgd_page_buffer = NULL;
+	bool page_is_isolated = false;
+	struct page *p = phys_to_page(pgd);
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	pgd_page = kmap_atomic(p);
 	/* kmap_atomic should NEVER fail. */
-	if (WARN_ON(pgd_page == NULL))
+	if (WARN_ON_ONCE(pgd_page == NULL))
 		return;
 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
 		/* Copy the page to our preallocated buffer so that we can minimize
@@ -2761,6 +3346,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 		memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
 	}
 
+	/* When page migration is enabled, kbase_region_tracker_term() would ensure
+	 * there are no pages left mapped on the GPU for a context. Hence the count
+	 * of valid entries is expected to be zero here.
+	 */
+	if (kbase_page_migration_enabled && mmut->kctx)
+		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
 	/* Invalidate page after copying */
 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
 	kunmap_atomic(pgd_page);
@@ -2779,7 +3370,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 		}
 	}
 
-	kbase_mmu_free_pgd(kbdev, mmut, pgd);
+	/* Top level PGD page is excluded from migration process. */
+	if (level != MIDGARD_MMU_TOPLEVEL)
+		page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+
+	if (likely(!page_is_isolated))
+		kbase_mmu_free_pgd(kbdev, mmut, pgd);
 }
 
 int kbase_mmu_init(struct kbase_device *const kbdev,
@@ -2836,6 +3432,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
 	int level;
 
+	WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
+	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
+	     mmut->kctx->tgid, mmut->kctx->id);
+
 	if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
 		mutex_lock(&mmut->mmu_lock);
 		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
@@ -2855,11 +3455,26 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 	mutex_destroy(&mmut->mmu_lock);
 }
 
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
 {
 	destroy_workqueue(kbdev->as[i].pf_wq);
 }
 
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+			      phys_addr_t phys, size_t size,
+			      enum kbase_mmu_op_type flush_op)
+{
+#if MALI_USE_CSF
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
+	    kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+		mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#endif
+}
+
 #ifdef CONFIG_MALI_VECTOR_DUMP
 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 		int level, char ** const buffer, size_t *size_left)
@@ -2881,7 +3496,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 
 	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
 	if (!pgd_page) {
-		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return 0;
 	}
 
@@ -3035,8 +3650,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
-		dev_dbg(kbdev->dev,
-				"%s: GPU has been removed\n", __func__);
+		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 		release_ctx(kbdev, kctx);
 		atomic_dec(&kbdev->faults_pending);
 		return;
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
index 848570f2a6dd..247a67c50da8 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.h
@@ -29,6 +29,7 @@
 
 struct kbase_context;
 struct kbase_mmu_table;
+struct kbase_va_region;
 
 /**
  * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info.
@@ -49,6 +50,26 @@ enum kbase_caller_mmu_sync_info {
 	CALLER_MMU_ASYNC
 };
 
+/**
+ * enum kbase_mmu_op_type - enum for MMU operations
+ * @KBASE_MMU_OP_NONE:        To help catch uninitialized struct
+ * @KBASE_MMU_OP_FIRST:       The lower boundary of enum
+ * @KBASE_MMU_OP_LOCK:        Lock memory region
+ * @KBASE_MMU_OP_UNLOCK:      Unlock memory region
+ * @KBASE_MMU_OP_FLUSH_PT:    Flush page table (CLN+INV L2 only)
+ * @KBASE_MMU_OP_FLUSH_MEM:   Flush memory (CLN+INV L2+LSC)
+ * @KBASE_MMU_OP_COUNT:       The upper boundary of enum
+ */
+enum kbase_mmu_op_type {
+	KBASE_MMU_OP_NONE = 0, /* Must be zero */
+	KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
+	KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
+	KBASE_MMU_OP_UNLOCK,
+	KBASE_MMU_OP_FLUSH_PT,
+	KBASE_MMU_OP_FLUSH_MEM,
+	KBASE_MMU_OP_COUNT /* Must be the last in enum */
+};
+
 /**
  * kbase_mmu_as_init() - Initialising GPU address space object.
  *
@@ -60,7 +81,7 @@ enum kbase_caller_mmu_sync_info {
  *
  * Return: 0 on success and non-zero value on failure.
  */
-int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
+int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i);
 
 /**
  * kbase_mmu_as_term() - Terminate address space object.
@@ -71,7 +92,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
  * This is called upon device termination to destroy
  * the address space object of the device.
  */
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i);
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i);
 
 /**
  * kbase_mmu_init - Initialise an object representing GPU page tables
@@ -132,22 +153,92 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int group_id, u64 *dirty_pgds);
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
-			   struct kbase_mmu_table *mmut, u64 vpfn,
-			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int as_nr, int group_id,
-			   enum kbase_caller_mmu_sync_info mmu_sync_info);
+				    unsigned long flags, int group_id, u64 *dirty_pgds,
+				    struct kbase_va_region *reg, bool ignore_page_migration);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+			   int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+			   struct kbase_va_region *reg, bool ignore_page_migration);
 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 				 struct tagged_addr phys, size_t nr,
 				 unsigned long flags, int group_id,
 				 enum kbase_caller_mmu_sync_info mmu_sync_info);
 
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr);
+			     struct tagged_addr *phys, size_t nr, int as_nr,
+			     bool ignore_page_migration);
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
 			   unsigned long flags, int const group_id);
+#if MALI_USE_CSF
+/**
+ * kbase_mmu_update_csf_mcu_pages - Update MCU mappings with changes of phys and flags
+ *
+ * @kbdev:    Pointer to kbase device.
+ * @vpfn:     Virtual PFN (Page Frame Number) of the first page to update
+ * @phys:     Pointer to the array of tagged physical addresses of the physical
+ *            pages that are pointed to by the page table entries (that need to
+ *            be updated).
+ * @nr:       Number of pages to update
+ * @flags:    Flags
+ * @group_id: The physical memory group in which the page was allocated.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+				   size_t nr, unsigned long flags, int const group_id);
+#endif
+
+/**
+ * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages
+ *
+ * @old_phys:     Old physical page to be replaced.
+ * @new_phys:     New physical page used to replace old physical page.
+ * @old_dma_addr: DMA address of the old page.
+ * @new_dma_addr: DMA address of the new page.
+ * @level:        MMU page table level of the provided PGD.
+ *
+ * The page migration process is made of 2 big steps:
+ *
+ * 1) Copy the content of the old page to the new page.
+ * 2) Remap the virtual page, that is: replace either the ATE (if the old page
+ *    was a regular page) or the PTE (if the old page was used as a PGD) in the
+ *    MMU page table with the new page.
+ *
+ * During the process, the MMU region is locked to prevent GPU access to the
+ * virtual memory page that is being remapped.
+ *
+ * Before copying the content of the old page to the new page and while the
+ * MMU region is locked, a GPU cache flush is performed to make sure that
+ * pending GPU writes are finalized to the old page before copying.
+ * That is necessary because otherwise there's a risk that GPU writes might
+ * be finalized to the old page, and not new page, after migration.
+ * The MMU region is unlocked only at the end of the migration operation.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level);
+
+/**
+ * kbase_mmu_flush_pa_range() - Flush physical address range from the GPU caches
+ *
+ * @kbdev:    Instance of GPU platform device, allocated from the probe method.
+ * @kctx:     Pointer to kbase context, it can be NULL if the physical address
+ *            range is not associated with User created context.
+ * @phys:     Starting address of the physical range to start the operation on.
+ * @size:     Number of bytes to work on.
+ * @flush_op: Type of cache flush operation to perform.
+ *
+ * Issue a cache flush physical range command. This function won't perform any
+ * flush if the GPU doesn't support FLUSH_PA_RANGE command. The flush would be
+ * performed only if the context has a JASID assigned to it.
+ * This function is basically a wrapper for kbase_gpu_cache_flush_pa_range_and_busy_wait().
+ */
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+			      phys_addr_t phys, size_t size,
+			      enum kbase_mmu_op_type flush_op);
 
 /**
  * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
index 09b3fa809bea..50d2ea5d07c8 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw.h
@@ -54,26 +54,6 @@ enum kbase_mmu_fault_type {
 	KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED
 };
 
-/**
- * enum kbase_mmu_op_type - enum for MMU operations
- * @KBASE_MMU_OP_NONE:        To help catch uninitialized struct
- * @KBASE_MMU_OP_FIRST:       The lower boundary of enum
- * @KBASE_MMU_OP_LOCK:        Lock memory region
- * @KBASE_MMU_OP_UNLOCK:      Unlock memory region
- * @KBASE_MMU_OP_FLUSH_PT:    Flush page table (CLN+INV L2 only)
- * @KBASE_MMU_OP_FLUSH_MEM:   Flush memory (CLN+INV L2+LSC)
- * @KBASE_MMU_OP_COUNT:       The upper boundary of enum
- */
-enum kbase_mmu_op_type {
-	KBASE_MMU_OP_NONE = 0, /* Must be zero */
-	KBASE_MMU_OP_FIRST, /* Must be the first non-zero op */
-	KBASE_MMU_OP_LOCK = KBASE_MMU_OP_FIRST,
-	KBASE_MMU_OP_UNLOCK,
-	KBASE_MMU_OP_FLUSH_PT,
-	KBASE_MMU_OP_FLUSH_MEM,
-	KBASE_MMU_OP_COUNT /* Must be the last in enum */
-};
-
 /**
  * struct kbase_mmu_hw_op_param  - parameters for kbase_mmu_hw_do_* functions
  * @vpfn:           MMU Virtual Page Frame Number to start the operation on.
@@ -104,6 +84,22 @@ struct kbase_mmu_hw_op_param {
 void kbase_mmu_hw_configure(struct kbase_device *kbdev,
 		struct kbase_as *as);
 
+/**
+ * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program
+ *                        the LOCKADDR register.
+ *
+ * @kbdev:     Kbase device to issue the MMU operation on.
+ * @as:        Address space to issue the MMU operation on.
+ * @op_param:  Pointer to struct containing information about the MMU
+ *             operation to perform.
+ *
+ * hwaccess_lock needs to be held when calling this function.
+ *
+ * Return: 0 if issuing the command was successful, otherwise an error code.
+ */
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			 const struct kbase_mmu_hw_op_param *op_param);
+
 /**
  * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without
  *                                  programming the LOCKADDR register and wait
@@ -114,6 +110,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
  * @op_param:  Pointer to struct containing information about the MMU
  *             operation to perform.
  *
+ * This function should be called for GPU where GPU command is used to flush
+ * the cache(s) instead of MMU command.
+ *
  * Return: 0 if issuing the command was successful, otherwise an error code.
  */
 int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
@@ -145,7 +144,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
  * GPUs where MMU command to flush the cache(s) is deprecated.
  * mmu_hw_mutex needs to be held when calling this function.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 			  const struct kbase_mmu_hw_op_param *op_param);
@@ -164,7 +163,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
  * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this
  * function.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
 				 const struct kbase_mmu_hw_op_param *op_param);
@@ -181,7 +180,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as
  * specified inside @op_param. GPU command is used to flush the cache(s)
  * instead of the MMU command.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
 				      const struct kbase_mmu_hw_op_param *op_param);
diff --git a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
index cc764b483f05..858d4bf6edcd 100644
--- a/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu_hw_direct.c
@@ -424,6 +424,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 	return ret;
 }
 
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			 const struct kbase_mmu_hw_op_param *op_param)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return mmu_hw_do_lock(kbdev, as, op_param);
+}
+
 int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
 				   const struct kbase_mmu_hw_op_param *op_param)
 {
diff --git a/drivers/gpu/arm/bifrost/tests/Mconfig b/drivers/gpu/arm/bifrost/tests/Mconfig
deleted file mode 100644
index 67b38a28cf96..000000000000
--- a/drivers/gpu/arm/bifrost/tests/Mconfig
+++ /dev/null
@@ -1,73 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
-#
-# This program is free software and is provided to you under the terms of the
-# GNU General Public License version 2 as published by the Free Software
-# Foundation, and any use by you of this program is subject to the terms
-# of such GNU license.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-#
-
-menuconfig MALI_KUTF
-	bool "Build Mali Kernel Unit Test Framework modules"
-	depends on MALI_BIFROST && MALI_BIFROST_DEBUG
-	default y if BACKEND_KERNEL && MALI_BIFROST_DEBUG
-	help
-	  This option will build the Mali testing framework modules.
-
-	  Modules:
-	  - kutf.ko
-	  - kutf_test.ko
-
-config MALI_KUTF_IRQ_TEST
-	bool "Build Mali KUTF IRQ test module"
-	depends on MALI_KUTF
-	default y
-	help
-	  This option will build the IRQ latency measurement test module.
-	  It can determine the latency of the Mali GPU IRQ on your system.
-
-	  Modules:
-	  - mali_kutf_irq_test.ko
-
-config MALI_KUTF_CLK_RATE_TRACE
-	bool "Build Mali KUTF Clock rate trace test module"
-	depends on MALI_KUTF
-	default y
-	help
-	  This option will build the clock rate trace portal test module.
-	  It can test the clocks integration into the platform and exercise some
-	  basic trace test in the system.
-
-	  Modules:
-	  - mali_kutf_clk_rate_trace_test_portal.ko
-
-config MALI_KUTF_MGM_INTEGRATION_TEST
-	bool "Build Mali KUTF MGM integration test module"
-	depends on MALI_KUTF
-	default y
-	help
-	  This option will build the MGM integration test module.
-	  It can test the implementation of PTE translation for specific
-	  group ids.
-
-	  Modules:
-	  - mali_kutf_mgm_integration_test.ko
-
-
-# Enable MALI_BIFROST_DEBUG for KUTF modules support
-
-config UNIT_TEST_KERNEL_MODULES
-	bool
-	default y if UNIT_TEST_CODE && BACKEND_KERNEL
-	default n
diff --git a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
index c4c713c838cf..3f68efa4257d 100644
--- a/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
+++ b/drivers/gpu/arm/bifrost/tests/include/kutf/kutf_helpers.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,7 @@
  */
 
 #include <kutf/kutf_suite.h>
+#include <linux/device.h>
 
 /**
  * kutf_helper_pending_input() - Check any pending lines sent by user space
@@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context,
  */
 void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
 
+/**
+ * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser
+ *                              to ignore errors, until the counterpart
+ *                              is written to dmesg to stop ignoring errors.
+ * @dev:  Device pointer to write to dmesg using.
+ *
+ * This function writes "Start ignoring dmesg warnings" to dmesg, which
+ * the parser will read and not log any errors. Only to be used in cases where
+ * we expect an error to be produced in dmesg but that we do not want to be
+ * flagged as an error.
+ */
+void kutf_helper_ignore_dmesg(struct device *dev);
+
+/**
+ * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser
+ *                                     to stop ignoring errors.
+ * @dev:  Device pointer to write to dmesg using.
+ *
+ * This function writes "Stop ignoring dmesg warnings" to dmesg, which
+ * the parser will read and continue to log any errors. Counterpart to
+ * kutf_helper_ignore_dmesg().
+ */
+void kutf_helper_stop_ignoring_dmesg(struct device *dev);
+
 #endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
index d207d1c6e04f..42736195e071 100644
--- a/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
+++ b/drivers/gpu/arm/bifrost/tests/kutf/kutf_helpers.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
 {
 	kutf_helper_input_enqueue(context, NULL, 0);
 }
+
+void kutf_helper_ignore_dmesg(struct device *dev)
+{
+	dev_info(dev, "KUTF: Start ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_ignore_dmesg);
+
+void kutf_helper_stop_ignoring_dmesg(struct device *dev)
+{
+	dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg);
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
index 644d69bc209d..359d06371aff 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_timeline_io.c
@@ -30,6 +30,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/anon_inodes.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* The timeline stream file operations functions. */
 static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
 				       size_t size, loff_t *f_pos);
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
index 6660cf5bc276..c1428495b11c 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tlstream.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,17 +27,13 @@
 #include <linux/wait.h>
 
 /* The maximum size of a single packet used by timeline. */
-#define PACKET_SIZE        4096 /* bytes */
+#define PACKET_SIZE 4096 /* bytes */
 
 /* The number of packets used by one timeline stream. */
-#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
-	#define PACKET_COUNT       64
-#else
-	#define PACKET_COUNT       32
-#endif
+#define PACKET_COUNT 128
 
 /* The maximum expected length of string in tracepoint descriptor. */
-#define STRLEN_MAX         64 /* bytes */
+#define STRLEN_MAX 64 /* bytes */
 
 /**
  * struct kbase_tlstream - timeline stream structure
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
index fd0d0c01adde..e8a74e9dafa6 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.c
@@ -84,6 +84,7 @@ enum tl_msg_id_obj {
 	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
 	KBASE_TL_ATTRIB_ATOM_JIT,
 	KBASE_TL_KBASE_NEW_DEVICE,
+	KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
 	KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
 	KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
 	KBASE_TL_KBASE_DEVICE_HALT_CSG,
@@ -352,6 +353,10 @@ enum tl_msg_id_obj {
 		"New KBase Device", \
 		"@IIIIIII", \
 		"kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count,kbase_device_sb_entry_count,kbase_device_has_cross_stream_sync,kbase_device_supports_gpu_sleep") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_GPUCMDQUEUE_KICK, \
+		"Kernel receives a request to process new GPU queue instructions", \
+		"@IL", \
+		"kernel_ctx_id,buffer_gpu_addr") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
 		"CSG is programmed to a slot", \
 		"@IIIII", \
@@ -2092,6 +2097,33 @@ void __kbase_tlstream_tl_kbase_new_device(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+	struct kbase_tlstream *stream,
+	u32 kernel_ctx_id,
+	u64 buffer_gpu_addr
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_GPUCMDQUEUE_KICK;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kernel_ctx_id)
+		+ sizeof(buffer_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &buffer_gpu_addr, sizeof(buffer_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_device_program_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
diff --git a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
index be0c62edecd7..c690a75fe22c 100644
--- a/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
+++ b/drivers/gpu/arm/bifrost/tl/mali_kbase_tracepoints.h
@@ -396,6 +396,12 @@ void __kbase_tlstream_tl_kbase_new_device(
 	u32 kbase_device_supports_gpu_sleep
 );
 
+void __kbase_tlstream_tl_kbase_gpucmdqueue_kick(
+	struct kbase_tlstream *stream,
+	u32 kernel_ctx_id,
+	u64 buffer_gpu_addr
+);
+
 void __kbase_tlstream_tl_kbase_device_program_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
@@ -1981,6 +1987,37 @@ struct kbase_tlstream;
 	do { } while (0)
 #endif /* MALI_USE_CSF */
 
+/**
+ * KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK - Kernel receives a request to process new GPU queue instructions
+ *
+ * @kbdev: Kbase device
+ * @kernel_ctx_id: Unique ID for the KBase Context
+ * @buffer_gpu_addr: Address of the GPU queue's command buffer
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(	\
+	kbdev,	\
+	kernel_ctx_id,	\
+	buffer_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_gpucmdqueue_kick(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kernel_ctx_id,	\
+				buffer_gpu_addr	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(	\
+	kbdev,	\
+	kernel_ctx_id,	\
+	buffer_gpu_addr	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
 /**
  * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot
  *
diff --git a/drivers/hwtracing/coresight/mali/Kbuild b/drivers/hwtracing/coresight/mali/Kbuild
new file mode 100644
index 000000000000..4d7d665f5652
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/Kbuild
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+# make $(src) as absolute path if it is not already, by prefixing $(srctree)
+# This is to prevent any build issue due to wrong path.
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
+
+#
+# ccflags
+#
+ccflags-y += \
+    -I$(srctree)/include/linux \
+    -I$(src) \
+    -I$(srctree)/drivers/hwtracing/coresight/ \
+    -I$(src)/../../../../include
+
+subdir-ccflags-y += $(ccflags-y)
+
+ifneq ($(CONFIG_CORESIGHT), n)
+ifneq ($(CONFIG_CORESIGHT), )
+
+
+ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ETM),y)
+    obj-m += coresight_mali_source_etm.o
+    coresight_mali_source_etm-y := \
+        sources/etm/coresight_mali_source_etm_core.o \
+        sources/coresight_mali_sources.o \
+        coresight_mali_common.o
+endif
+
+ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ITM),y)
+    obj-m += coresight_mali_source_itm.o
+    coresight_mali_source_itm-y := \
+        sources/itm/coresight_mali_source_itm_core.o \
+        sources/coresight_mali_sources.o \
+        coresight_mali_common.o
+endif
+
+ifeq ($(CONFIG_CORESIGHT_MALI_SOURCES_ELA),y)
+    obj-m += coresight_mali_source_ela.o
+    coresight_mali_source_ela-y := \
+        sources/ela/coresight_mali_source_ela_core.o \
+        sources/coresight_mali_sources.o \
+        coresight_mali_common.o
+endif
+
+endif
+endif
diff --git a/drivers/hwtracing/coresight/mali/Kconfig b/drivers/hwtracing/coresight/mali/Kconfig
new file mode 100644
index 000000000000..283e2b56b641
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/Kconfig
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+
+config CORESIGHT_MALI_SOURCES_ETM
+	depends on MALI_CORESIGHT && ARM64
+	bool "Enable CoreSight Mali Sources ETM module"
+	default y
+	help
+	  This option will build coresight ETM source driver,
+	  that is used for configuring, enabling and disabling
+	  the ETM component.
+
+config CORESIGHT_MALI_SOURCES_ITM
+	depends on MALI_CORESIGHT
+	bool "Enable CoreSight Mali Sources ITM module"
+	default y
+	help
+	  This option will build coresight ITM source driver,
+	  that is used for configuring, enabling and disabling
+	  the ITM component.
+
+config CORESIGHT_MALI_SOURCES_ELA
+	depends on MALI_CORESIGHT
+	bool "Enable CoreSight Mali Sources ELA module"
+	default y
+	help
+	  This option will build coresight ELA source driver,
+	  that is used for configuring, enabling and disabling
+	  the ELA component.
diff --git a/drivers/hwtracing/coresight/mali/Makefile b/drivers/hwtracing/coresight/mali/Makefile
new file mode 100644
index 000000000000..a6b5622c92ae
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/Makefile
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
+KDIR ?= $(KERNEL_SRC)
+
+ifeq ($(KDIR),)
+    $(error Must specify KDIR to point to the kernel to target))
+endif
+
+CONFIG_MALI_CORESIGHT ?= n
+ifeq ($(CONFIG_MALI_CORESIGHT),y)
+
+    ifeq ($(CONFIG_ARM64), y)
+        CONFIG_CORESIGHT_MALI_SOURCES_ETM ?= y
+    endif
+
+    CONFIG_CORESIGHT_MALI_SOURCES_ITM ?= y
+    CONFIG_CORESIGHT_MALI_SOURCES_ELA ?= y
+endif
+
+CONFIGS := \
+    CONFIG_MALI_CORESIGHT \
+    CONFIG_CORESIGHT_MALI_SOURCES_ETM \
+    CONFIG_CORESIGHT_MALI_SOURCES_ITM \
+    CONFIG_CORESIGHT_MALI_SOURCES_ELA
+
+
+#
+# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build
+#
+# Generate the list of CONFIGs and values.
+# $(value config) is the name of the CONFIG option.
+# $(value $(value config)) is its value (y, m).
+# When the CONFIG is not set to y or m, it defaults to n.
+MAKE_ARGS := $(foreach config,$(CONFIGS), \
+                    $(if $(filter y m,$(value $(value config))), \
+                        $(value config)=$(value $(value config)), \
+                        $(value config)=n))
+
+#
+# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
+#
+# Generate the list of CONFIGs defines with values from CONFIGS.
+# $(value config) is the name of the CONFIG option.
+# When set to y or m, the CONFIG gets defined to 1.
+EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
+                    $(if $(filter y m,$(value $(value config))), \
+                        -D$(value config)=1))
+
+#
+# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+#
+EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/gpu/arm/midgard/Module.symvers
+EXTRA_SYMBOLS += $(CURDIR)/../../../../../drivers/hwtracing/coresight/mali/Module.symvers
+
+# The following were added to align with W=1 in scripts/Makefile.extrawarn
+# from the Linux source tree
+KBUILD_CFLAGS += -Wall -Werror
+KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
+KBUILD_CFLAGS += -Wmissing-declarations
+KBUILD_CFLAGS += -Wmissing-format-attribute
+KBUILD_CFLAGS += -Wmissing-prototypes
+KBUILD_CFLAGS += -Wold-style-definition
+KBUILD_CFLAGS += -Wmissing-include-dirs
+KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
+KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-missing-field-initializers
+KBUILD_CFLAGS += -Wno-sign-compare
+KBUILD_CFLAGS += -Wno-type-limits
+
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
+
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+modules_install:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean
diff --git a/drivers/hwtracing/coresight/mali/build.bp b/drivers/hwtracing/coresight/mali/build.bp
new file mode 100644
index 000000000000..824ae54c9e43
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/build.bp
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+bob_defaults {
+    name: "coresight_mali_defaults",
+    srcs: [
+        "Makefile",
+        "Kbuild",
+        "coresight_mali_common.c",
+        "coresight_mali_common.h",
+    ],
+}
+
+bob_defaults {
+    name: "coresight_mali_source_defaults",
+    srcs: [
+        "sources/coresight_mali_sources.c",
+        "sources/coresight_mali_sources.h",
+    ],
+}
+
+
+bob_kernel_module {
+    name: "coresight_mali_source_etm",
+    defaults: [
+        "kernel_defaults",
+        "coresight_mali_defaults",
+        "coresight_mali_source_defaults",
+    ],
+    srcs: [
+        "sources/etm/coresight_mali_source_etm_core.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+    ],
+    enabled: false,
+    mali_coresight: {
+        kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ETM=y"],
+        enabled: true,
+    },
+}
+
+bob_kernel_module {
+    name: "coresight_mali_source_itm",
+    defaults: [
+        "kernel_defaults",
+        "coresight_mali_defaults",
+        "coresight_mali_source_defaults",
+    ],
+    srcs: [
+        "sources/itm/coresight_mali_source_itm_core.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+    ],
+    enabled: false,
+    mali_coresight: {
+        kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ITM=y"],
+        enabled: true,
+    },
+}
+
+bob_kernel_module {
+    name: "coresight_mali_source_ela",
+    defaults: [
+        "kernel_defaults",
+        "coresight_mali_defaults",
+        "coresight_mali_source_defaults",
+    ],
+    srcs: [
+        "sources/ela/coresight_mali_source_ela_core.c",
+        "sources/ela/coresight-ela600.h"
+    ],
+    extra_symbols: [
+        "mali_kbase",
+    ],
+    enabled: false,
+    mali_coresight: {
+        kbuild_options: ["CONFIG_CORESIGHT_MALI_SOURCES_ELA=y"],
+        enabled: true,
+    },
+}
diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.c b/drivers/hwtracing/coresight/mali/coresight_mali_common.c
new file mode 100644
index 000000000000..8e3af76c5267
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/coresight.h>
+
+#include <coresight-priv.h>
+#include "coresight_mali_common.h"
+
+int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode)
+{
+	struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int res = 0;
+
+	if (mode != CS_MODE_SYSFS) {
+		dev_err(drvdata->dev, "Unsupported Mali CS_MODE: %d, expected: %d\n", mode,
+			CS_MODE_SYSFS);
+		return -EINVAL;
+	}
+
+	drvdata->mode = mode;
+
+	res = kbase_debug_coresight_csf_config_enable(drvdata->config);
+	if (res) {
+		dev_err(drvdata->dev, "Config failed to enable with error code %d\n", res);
+		drvdata->mode = CS_MODE_DISABLED;
+	}
+
+	return res;
+}
+
+int coresight_mali_disable_component(struct coresight_device *csdev)
+{
+	struct coresight_mali_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int res = 0;
+
+	res = kbase_debug_coresight_csf_config_disable(drvdata->config);
+	if (res)
+		dev_err(drvdata->dev, "config failed to disable with error code %d\n", res);
+
+	drvdata->mode = CS_MODE_DISABLED;
+
+	return res;
+}
diff --git a/drivers/hwtracing/coresight/mali/coresight_mali_common.h b/drivers/hwtracing/coresight/mali/coresight_mali_common.h
new file mode 100644
index 000000000000..43154c1f639d
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/coresight_mali_common.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _CORESIGHT_MALI_COMMON_H
+#define _CORESIGHT_MALI_COMMON_H
+
+#include <linux/types.h>
+#include <linux/mali_kbase_debug_coresight_csf.h>
+
+/* Macros for CoreSight OP types. */
+#define WRITE_IMM_OP(_reg_addr, _val)                                                              \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM,                               \
+		.op.write_imm.reg_addr = _reg_addr, .op.write_imm.val = _val                       \
+	}
+
+#define WRITE_RANGE_OP(_reg_start, _reg_end, _val)                                                 \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE,                         \
+		.op.write_imm_range.reg_start = _reg_start,                                        \
+		.op.write_imm_range.reg_end = _reg_end, .op.write_imm_range.val = _val             \
+	}
+
+#define WRITE_PTR_OP(_reg_addr, _ptr)                                                              \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE, .op.write.reg_addr = _reg_addr,   \
+		.op.write.ptr = _ptr                                                               \
+	}
+
+#define READ_OP(_reg_addr, _ptr)                                                                   \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ, .op.read.reg_addr = _reg_addr,     \
+		.op.read.ptr = _ptr                                                                \
+	}
+
+#define POLL_OP(_reg_addr, _mask, _val)                                                            \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL, .op.poll.reg_addr = _reg_addr,     \
+		.op.poll.mask = _mask, .op.poll.val = _val                                         \
+	}
+
+#define BIT_OR_OP(_ptr, _val)                                                                      \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR, .op.bitw.ptr = _ptr,             \
+		.op.bitw.val = _val                                                                \
+	}
+
+#define BIT_XOR_OP(_ptr, _val)                                                                     \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR, .op.bitw.ptr = _ptr,            \
+		.op.bitw.val = _val                                                                \
+	}
+
+#define BIT_AND_OP(_ptr, _val)                                                                     \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND, .op.bitw.ptr = _ptr,            \
+		.op.bitw.val = _val                                                                \
+	}
+
+#define BIT_NOT_OP(_ptr)                                                                           \
+	{                                                                                          \
+		.type = KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT, .op.bitw.ptr = _ptr,            \
+	}
+
+#ifndef CS_MALI_UNLOCK_COMPONENT
+/**
+ * CS_MALI_UNLOCK_COMPONENT - A write of 0xC5ACCE55 enables write access to the block
+ */
+#define CS_MALI_UNLOCK_COMPONENT 0xC5ACCE55
+#endif
+
+/**
+ * struct coresight_mali_drvdata - Coresight mali driver data
+ *
+ * @csdev:        Coresight device pointer
+ * @dev:          Device pointer
+ * @kbase_client: Pointer to coresight mali client
+ * @config:       Pointer to coresight mali config, used for enabling and
+ *                disabling the coresight component
+ * @enable_seq:   Enable sequence needed to enable coresight block
+ * @disable_seq:  Disable sequence needed to enable coresight block
+ * @gpu_dev:      Pointer to gpu device structure
+ * @mode:         Mode in which the driver operates
+ */
+struct coresight_mali_drvdata {
+	struct coresight_device *csdev;
+	struct device *dev;
+	void *kbase_client;
+	void *config;
+	struct kbase_debug_coresight_csf_sequence enable_seq;
+	struct kbase_debug_coresight_csf_sequence disable_seq;
+	void *gpu_dev;
+	u32 mode;
+};
+
+/**
+ * coresight_mali_enable_component - Generic enable for a coresight block
+ *
+ * @csdev:  Coresight device to be enabled
+ * @mode: Mode in which the block should start operating in
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int coresight_mali_enable_component(struct coresight_device *csdev, u32 mode);
+
+/**
+ * coresight_mali_disable_component - Generic disable for a coresight block
+ *
+ * @csdev:  Coresight device to be disabled
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int coresight_mali_disable_component(struct coresight_device *csdev);
+
+#endif /* _CORESIGHT_MALI_COMMON_H */
diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
new file mode 100644
index 000000000000..e6d2dc71096b
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/coresight.h>
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/of_platform.h>
+
+#include <linux/mali_kbase_debug_coresight_csf.h>
+#include <coresight-priv.h>
+#include "sources/coresight_mali_sources.h"
+
+static int coresight_mali_source_trace_id(struct coresight_device *csdev)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return drvdata->trcid;
+}
+
+static int coresight_mali_enable_source(struct coresight_device *csdev, struct perf_event *event,
+					u32 mode)
+{
+	return coresight_mali_enable_component(csdev, mode);
+}
+
+static void coresight_mali_disable_source(struct coresight_device *csdev, struct perf_event *event)
+{
+	coresight_mali_disable_component(csdev);
+}
+
+static const struct coresight_ops_source coresight_mali_source_ops = {
+	.trace_id = coresight_mali_source_trace_id,
+	.enable = coresight_mali_enable_source,
+	.disable = coresight_mali_disable_source
+};
+
+static const struct coresight_ops mali_cs_ops = {
+	.source_ops = &coresight_mali_source_ops,
+};
+
+int coresight_mali_sources_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct coresight_platform_data *pdata = NULL;
+	struct coresight_mali_source_drvdata *drvdata = NULL;
+	struct coresight_desc desc = { 0 };
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct platform_device *gpu_pdev = NULL;
+	struct device_node *gpu_node = NULL;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, drvdata);
+	drvdata->base.dev = dev;
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	pdata = coresight_get_platform_data(dev);
+#else
+	if (np)
+		pdata = of_get_coresight_platform_data(dev, np);
+#endif
+	if (IS_ERR(pdata)) {
+		dev_err(drvdata->base.dev, "Failed to get platform data\n");
+		ret = PTR_ERR(pdata);
+		goto devm_kfree_drvdata;
+	}
+
+	dev->platform_data = pdata;
+
+	gpu_node = of_parse_phandle(np, "gpu", 0);
+	if (!gpu_node) {
+		dev_err(drvdata->base.dev, "GPU node not available\n");
+		goto devm_kfree_drvdata;
+	}
+	gpu_pdev = of_find_device_by_node(gpu_node);
+	if (gpu_pdev == NULL) {
+		dev_err(drvdata->base.dev, "Couldn't find GPU device from node\n");
+		goto devm_kfree_drvdata;
+	}
+
+	drvdata->base.gpu_dev = platform_get_drvdata(gpu_pdev);
+	if (!drvdata->base.gpu_dev) {
+		dev_err(drvdata->base.dev, "GPU dev not available\n");
+		goto devm_kfree_drvdata;
+	}
+
+	ret = coresight_mali_sources_init_drvdata(drvdata);
+	if (ret) {
+		dev_err(drvdata->base.dev, "Failed to init source driver data\n");
+		goto kbase_client_unregister;
+	}
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &mali_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_mali_source_groups_get();
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	desc.name = devm_kasprintf(dev, GFP_KERNEL, "%s", drvdata->type_name);
+	if (!desc.name) {
+		ret = -ENOMEM;
+		goto devm_kfree_drvdata;
+	}
+#endif
+	drvdata->base.csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->base.csdev)) {
+		dev_err(drvdata->base.dev, "Failed to register coresight device\n");
+		ret = PTR_ERR(drvdata->base.csdev);
+		goto devm_kfree_drvdata;
+	}
+
+	return ret;
+
+kbase_client_unregister:
+	if (drvdata->base.csdev != NULL)
+		coresight_unregister(drvdata->base.csdev);
+
+	coresight_mali_sources_deinit_drvdata(drvdata);
+
+devm_kfree_drvdata:
+	devm_kfree(dev, drvdata);
+
+	return ret;
+}
+
+int coresight_mali_sources_remove(struct platform_device *pdev)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(&pdev->dev);
+
+	if (drvdata->base.csdev != NULL)
+		coresight_unregister(drvdata->base.csdev);
+
+	coresight_mali_sources_deinit_drvdata(drvdata);
+
+	devm_kfree(&pdev->dev, drvdata);
+
+	return 0;
+}
+
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_DESCRIPTION("Arm Coresight Mali source");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h
new file mode 100644
index 000000000000..0915dd107488
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/coresight_mali_sources.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _CORESIGHT_MALI_SOURCES_H
+#define _CORESIGHT_MALI_SOURCES_H
+
+#include <linux/platform_device.h>
+#include <linux/sysfs.h>
+#include <linux/version.h>
+
+#include "coresight_mali_common.h"
+
+/**
+ * struct coresight_mali_source_drvdata - Coresight mali source driver data
+ *
+ * @base:       Common driver data structure between coresight mali sources and sinks
+ * @trcid:      Trace id
+ * @type_name:  Type name of the driver, for example "itm" or "etm"
+ */
+struct coresight_mali_source_drvdata {
+	struct coresight_mali_drvdata base;
+	u32 trcid;
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	char *type_name;
+#endif
+};
+
+/**
+ * coresight_mali_sources_probe - Generic probe for a coresight mali source
+ *
+ * @pdev: Pointer to a platform device
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int coresight_mali_sources_probe(struct platform_device *pdev);
+
+/**
+ * coresight_mali_sources_remove - Generic remove for a coresight mali source
+ *
+ * @pdev: Pointer to a platform device
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int coresight_mali_sources_remove(struct platform_device *pdev);
+
+/**
+ * coresight_mali_sources_init_drvdata - Driver data initialization hook.
+ *
+ * @drvdata: Driver data structure to initialize
+ *
+ * Used for initializing source specific enable and disable sequences and other relevant data.
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata);
+
+/**
+ * coresight_mali_sources_deinit_drvdata - Driver data deinitialization hook.
+ *
+ * @drvdata: Driver data structure to deinitialize
+ *
+ * Used for releasing source specific enable and disable sequences and other relevant data.
+ */
+void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata);
+
+/**
+ * coresight_mali_source_groups_get - Getter for source groups.
+ *
+ * Return: a pointer to an array of attribute groups of the driver. Can also be NULL.
+ *
+ * Groups are drivers sysfs subnodes that can be used to read state of the coresight component
+ * or write component configuration.
+ */
+const struct attribute_group **coresight_mali_source_groups_get(void);
+
+#endif /* _CORESIGHT_MALI_SOURCES_H */
diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h
new file mode 100644
index 000000000000..0ee96bc2f047
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight-ela600.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _CORESIGHT_ELA600_H
+#define _CORESIGHT_ELA600_H
+
+#include <linux/bits.h>
+
+#define ELA_CTRL 0x000
+#define ELA_TIMECTRL 0x004
+#define ELA_TSSR 0x008
+#define ELA_ATBCTRL 0x00C
+#define ELA_PTACTION 0x010
+#define ELA_AUXCTRL 0x014
+#define ELA_CNTSEL 0x018
+
+#define ELA_CTSR 0x020
+#define ELA_CCVR 0x024
+#define ELA_CAVR 0x028
+#define ELA_RDCAPTID 0x02C
+#define ELA_RDCAPTIDEXT 0x030
+
+#define ELA_RRAR 0x040
+#define ELA_RRDR 0x044
+#define ELA_RWAR 0x048
+#define ELA_RWDR 0x04C
+
+#define ELA_SIGSEL(x) (0x100 + 0x100 * (x))
+#define ELA_TRIGCTRL(x) (ELA_SIGSEL(x) + 0x004)
+#define ELA_NEXTSTATE(x) (ELA_SIGSEL(x) + 0x008)
+#define ELA_ACTION(x) (ELA_SIGSEL(x) + 0x00C)
+#define ELA_ALTNEXTSTATE(x) (ELA_SIGSEL(x) + 0x010)
+#define ELA_ALTACTION(x) (ELA_SIGSEL(x) + 0x014)
+#define ELA_COMPCTRL(x) (ELA_SIGSEL(x) + 0x018)
+#define ELA_ALTCOMPCTRL(x) (ELA_SIGSEL(x) + 0x01C)
+#define ELA_COUNTCOMP(x) (ELA_SIGSEL(x) + 0x020)
+#define ELA_TWBSEL(x) (ELA_SIGSEL(x) + 0x028)
+#define ELA_EXTMASK(x) (ELA_SIGSEL(x) + 0x030)
+#define ELA_EXTCOMP(x) (ELA_SIGSEL(x) + 0x034)
+#define ELA_QUALMASK(x) (ELA_SIGSEL(x) + 0x038)
+#define ELA_QUALCOMP(x) (ELA_SIGSEL(x) + 0x03C)
+#define ELA_SIGMASK(x, y) (ELA_SIGSEL(x) + 0x040 + 4 * (y))
+#define ELA_SIGCOMP(x, y) (ELA_SIGSEL(x) + 0x080 + 4 * (y))
+
+#define ELA_ITTRIGOUT 0xEE8
+#define ELA_ITATBDATA 0xEEC
+#define ELA_ITATBCTR1 0xEF0
+#define ELA_ITATBCTR0 0xEF4
+#define ELA_ITTRIGIN 0xEF8
+#define ELA_ITCTRL 0xF00
+
+#define ELA_AUTHSTATUS 0xFB8
+
+#define ELA_DEVARCH 0xFBC
+#define ELA_DEVID2 0xFC0
+#define ELA_DEVID1 0xFC4
+#define ELA_DEVID 0xFC8
+#define ELA_DEVTYPE 0xFCC
+
+#define ELA_PIDR4 0xFD0
+#define ELA_PIDR5 0xFD4
+#define ELA_PIDR6 0xFD8
+#define ELA_PIDR7 0xFDC
+#define ELA_PIDR0 0xFE0
+#define ELA_PIDR1 0xFE4
+#define ELA_PIDR2 0xFE8
+#define ELA_PIDR3 0xFEC
+#define ELA_CIDR0 0xFF0
+#define ELA_CIDR1 0xFF4
+#define ELA_CIDR2 0xFF8
+#define ELA_CIDR3 0xFFC
+
+/* REGISTER MASKS */
+#define ELA_CTRL_RUN BIT(0)
+#define ELA_CTRL_TRACE_BUSY BIT(1)
+
+#define ELA_TIMECTRL_TSEN BIT(16)
+#define ELA_TIMECTRL_TSINT GEN_MASK(15, 12)
+#define ELA_TIMECTRL_TCSEL1 GEN_MASK(7, 4)
+#define ELA_TIMECTRL_TCSEL0 GEN_MASK(3, 0)
+
+#define ELA_ATBCTRL_PREDICT BIT(31)
+#define ELA_ATBCTRL_ATID_TRIG_EN BIT(15)
+#define ELA_ATBCTRL_ATID_VALUE GEN_MASK(14, 8)
+#define ELA_ATBCTRL_ASYNC_INTERVAL GEN_MASK(7, 0)
+
+#define ELA_ACTION_ELAOUTPUT GEN_MASK(7, 4)
+#define ELA_ACTION_TRACE BIT(3)
+#define ELA_ACTION_STOPCLOCK BIT(2)
+#define ELA_ACTION_CTTRIGOUT GEN_MASK(1, 0)
+
+#define ELA_AUXCTRL_FLUSH_DIS BIT(0)
+
+#define ELA_SIGSEL_JCN_REQUEST BIT(0)
+#define ELA_SIGSEL_JCN_RESPONSE BIT(1)
+#define ELA_SIGSEL_CEU_EXECUTION BIT(2)
+#define ELA_SIGSEL_MCU_AHBP BIT(3)
+#define ELA_SIGSEL_HOST_AXI BIT(4)
+
+#define ELA_TRIGCTRL_ALTCOMPSEL BIT(15)
+#define ELA_TRIGCTRL_ALTCOMP GEN_MASK(14, 12)
+#define ELA_TRIGCTRL_CAPTID GEN_MASK(11, 10)
+#define ELA_TRIGCTRL_COUNTBRK BIT(9)
+#define ELA_TRIGCTRL_COUNTCLR BIT(8)
+#define ELA_TRIGCTRL_TRACE GEN_MASK(7, 6)
+#define ELA_TRIGCTRL_COUNTSRC BIT(5)
+#define ELA_TRIGCTRL_WATCHRST BIT(4)
+#define ELA_TRIGCTRL_COMPSEL BIT(3)
+#define ELA_TRIGCTRL_COMP GEN_MASK(2, 0)
+
+#endif /* _CORESIGHT_ELA600_H */
diff --git a/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c
new file mode 100644
index 000000000000..0da37a75ecfb
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/ela/coresight_mali_source_ela_core.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/of_platform.h>
+#include <coresight-priv.h>
+#include "sources/coresight_mali_sources.h"
+#include "coresight-ela600.h"
+
+/* Linux Coresight framework does not support multiple sources enabled
+ * at the same time.
+ *
+ * To avoid Kernel instability, all Mali Coresight sources use the
+ * same trace ID value as the mandatory ETM one.
+ */
+#define CS_MALI_TRACE_ID 0x00000010
+
+#define CS_ELA_BASE_ADDR 0xE0043000
+#define CS_GPU_COMMAND_ADDR 0x40003030
+#define CS_GPU_COMMAND_TRACE_CONTROL_EN 0x000001DC
+#define CS_ELA_MAX_SIGNAL_GROUPS 12U
+#define CS_SG_NAME_MAX_LEN 10U
+#define CS_ELA_NR_SIG_REGS 8U
+
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+#define CS_ELA_SIGREGS_ATTR_RW(_a, _b)                                                             \
+	static ssize_t _a##_show(struct device *dev, struct device_attribute *attr,                \
+				 char *const buf)                                                  \
+	{                                                                                          \
+		return sprintf_regs(buf, CS_ELA_##_b##_0, CS_ELA_##_b##_7);                        \
+	}                                                                                          \
+	static ssize_t _a##_store(struct device *dev, struct device_attribute *attr,               \
+				  const char *buf, size_t count)                                   \
+	{                                                                                          \
+		return verify_store_8_regs(dev, buf, count, CS_ELA_##_b##_0);                      \
+	}                                                                                          \
+	static DEVICE_ATTR_RW(_a)
+
+enum cs_ela_dynamic_regs {
+	CS_ELA_TIMECTRL,
+	CS_ELA_TSSR,
+
+	CS_ELA_SIGSEL0,
+	CS_ELA_COMPCTRL0,
+	CS_ELA_ALTCOMPCTRL0,
+	CS_ELA_TWBSEL0,
+	CS_ELA_QUALMASK0,
+	CS_ELA_QUALCOMP0,
+	CS_ELA_SIGMASK0_0,
+	CS_ELA_SIGMASK0_1,
+	CS_ELA_SIGMASK0_2,
+	CS_ELA_SIGMASK0_3,
+	CS_ELA_SIGMASK0_4,
+	CS_ELA_SIGMASK0_5,
+	CS_ELA_SIGMASK0_6,
+	CS_ELA_SIGMASK0_7,
+	CS_ELA_SIGCOMP0_0,
+	CS_ELA_SIGCOMP0_1,
+	CS_ELA_SIGCOMP0_2,
+	CS_ELA_SIGCOMP0_3,
+	CS_ELA_SIGCOMP0_4,
+	CS_ELA_SIGCOMP0_5,
+	CS_ELA_SIGCOMP0_6,
+	CS_ELA_SIGCOMP0_7,
+
+	CS_ELA_SIGSEL4,
+	CS_ELA_NEXTSTATE4,
+	CS_ELA_ACTION4,
+	CS_ELA_ALTNEXTSTATE4,
+	CS_ELA_COMPCTRL4,
+	CS_ELA_TWBSEL4,
+	CS_ELA_SIGMASK4_0,
+	CS_ELA_SIGMASK4_1,
+	CS_ELA_SIGMASK4_2,
+	CS_ELA_SIGMASK4_3,
+	CS_ELA_SIGMASK4_4,
+	CS_ELA_SIGMASK4_5,
+	CS_ELA_SIGMASK4_6,
+	CS_ELA_SIGMASK4_7,
+	CS_ELA_SIGCOMP4_0,
+	CS_ELA_SIGCOMP4_1,
+	CS_ELA_SIGCOMP4_2,
+	CS_ELA_SIGCOMP4_3,
+	CS_ELA_SIGCOMP4_4,
+	CS_ELA_SIGCOMP4_5,
+	CS_ELA_SIGCOMP4_6,
+	CS_ELA_SIGCOMP4_7,
+
+	CS_ELA_NR_DYN_REGS
+};
+
+enum cs_ela_tracemodes {
+	CS_ELA_TRACEMODE_NONE,
+	CS_ELA_TRACEMODE_JCN,
+	CS_ELA_TRACEMODE_CEU_EXEC,
+	CS_ELA_TRACEMODE_CEU_CMDS,
+	CS_ELA_TRACEMODE_MCU_AHBP,
+	CS_ELA_TRACEMODE_HOST_AXI,
+	CS_ELA_NR_TRACEMODE
+};
+
+enum cs_ela_signal_types {
+	CS_ELA_SIGTYPE_JCN_REQ,
+	CS_ELA_SIGTYPE_JCN_RES,
+	CS_ELA_SIGTYPE_CEU_EXEC,
+	CS_ELA_SIGTYPE_CEU_CMDS,
+	CS_ELA_SIGTYPE_MCU_AHBP,
+	CS_ELA_SIGTYPE_HOST_AXI,
+	CS_ELA_NR_SIGTYPE,
+};
+
+struct cs_ela_state {
+	enum cs_ela_tracemodes tracemode;
+	u32 supported_tracemodes;
+	int enabled;
+	u32 signal_types[CS_ELA_NR_SIGTYPE];
+	u32 regs[CS_ELA_NR_DYN_REGS];
+};
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+static char *type_name = "mali-source-ela";
+#endif
+
+static struct cs_ela_state ela_state = { 0 };
+
+/* Setup ELA sysfs attributes */
+static char *tracemode_names[] = {
+	[CS_ELA_TRACEMODE_NONE] = "NONE",	  [CS_ELA_TRACEMODE_JCN] = "JCN",
+	[CS_ELA_TRACEMODE_CEU_EXEC] = "CEU_EXEC", [CS_ELA_TRACEMODE_CEU_CMDS] = "CEU_CMDS",
+	[CS_ELA_TRACEMODE_MCU_AHBP] = "MCU_AHBP", [CS_ELA_TRACEMODE_HOST_AXI] = "HOST_AXI",
+};
+
+static char *signal_type_names[] = {
+	[CS_ELA_SIGTYPE_JCN_REQ] = "jcn-request",    [CS_ELA_SIGTYPE_JCN_RES] = "jcn-response",
+	[CS_ELA_SIGTYPE_CEU_EXEC] = "ceu-execution", [CS_ELA_SIGTYPE_CEU_CMDS] = "ceu-commands",
+	[CS_ELA_SIGTYPE_MCU_AHBP] = "mcu-ahbp",	     [CS_ELA_SIGTYPE_HOST_AXI] = "host-axi",
+};
+
+static int signal_type_tracemode_map[] = {
+	[CS_ELA_SIGTYPE_JCN_REQ] = CS_ELA_TRACEMODE_JCN,
+	[CS_ELA_SIGTYPE_JCN_RES] = CS_ELA_TRACEMODE_JCN,
+	[CS_ELA_SIGTYPE_CEU_EXEC] = CS_ELA_TRACEMODE_CEU_EXEC,
+	[CS_ELA_SIGTYPE_CEU_CMDS] = CS_ELA_TRACEMODE_CEU_CMDS,
+	[CS_ELA_SIGTYPE_MCU_AHBP] = CS_ELA_TRACEMODE_MCU_AHBP,
+	[CS_ELA_SIGTYPE_HOST_AXI] = CS_ELA_TRACEMODE_HOST_AXI,
+};
+
+static void setup_tracemode_registers(int tracemode)
+{
+	switch (tracemode) {
+	case CS_ELA_TRACEMODE_NONE:
+		/* Perform full reset of all dynamic registers */
+		memset(ela_state.regs, 0x00000000, sizeof(u32) * CS_ELA_NR_DYN_REGS);
+
+		ela_state.tracemode = CS_ELA_TRACEMODE_NONE;
+		break;
+	case CS_ELA_TRACEMODE_JCN:
+
+		if (ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ] ==
+		    ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES]) {
+			ela_state.regs[CS_ELA_TSSR] = 0x00000000;
+
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ];
+
+			ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000010;
+			ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000;
+			ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF;
+			ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000;
+			ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000;
+
+			memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000,
+			       sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1));
+			ela_state.regs[CS_ELA_SIGMASK0_1] = 0x80000000;
+			ela_state.regs[CS_ELA_SIGMASK0_3] = 0x80000000;
+			ela_state.regs[CS_ELA_SIGCOMP0_1] = 0x80000000;
+			ela_state.regs[CS_ELA_SIGCOMP0_3] = 0x80000000;
+
+			memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000,
+			       sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1));
+
+			ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111;
+
+		} else {
+			ela_state.regs[CS_ELA_TSSR] = 0x00000010;
+
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_JCN_REQ];
+
+			ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000100;
+			ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111;
+			ela_state.regs[CS_ELA_TWBSEL0] = 0x00000FFF;
+			ela_state.regs[CS_ELA_QUALMASK0] = 0x00000000;
+			ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000000;
+
+			memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000,
+			       sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1));
+			ela_state.regs[CS_ELA_SIGMASK0_2] |= 0x80000000;
+			ela_state.regs[CS_ELA_SIGCOMP0_2] |= 0x80000000;
+
+			ela_state.regs[CS_ELA_SIGSEL4] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_JCN_RES];
+			ela_state.regs[CS_ELA_NEXTSTATE4] = 0x00000010;
+			ela_state.regs[CS_ELA_ACTION4] = 0x00000008;
+			ela_state.regs[CS_ELA_ALTNEXTSTATE4] = 0x00000001;
+			ela_state.regs[CS_ELA_COMPCTRL4] = 0x00000100;
+			ela_state.regs[CS_ELA_TWBSEL4] = 0x00000FFF;
+
+			memset(&ela_state.regs[CS_ELA_SIGMASK4_0], 0x00000000,
+			       sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGMASK4_0 + 1));
+			ela_state.regs[CS_ELA_SIGMASK4_2] |= 0x80000000;
+			ela_state.regs[CS_ELA_SIGCOMP4_2] |= 0x80000000;
+		}
+
+		break;
+	case CS_ELA_TRACEMODE_CEU_EXEC:
+	case CS_ELA_TRACEMODE_CEU_CMDS:
+		ela_state.regs[CS_ELA_TSSR] = 0x00000000;
+
+		if (tracemode == CS_ELA_TRACEMODE_CEU_EXEC) {
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_CEU_EXEC];
+			ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x00001000;
+		} else if (tracemode == CS_ELA_TRACEMODE_CEU_CMDS) {
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_CEU_CMDS];
+			ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111;
+		}
+
+		ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001;
+		ela_state.regs[CS_ELA_TWBSEL0] = 0x0000FFFF;
+		ela_state.regs[CS_ELA_QUALMASK0] = 0x0000000F;
+		ela_state.regs[CS_ELA_QUALCOMP0] = 0x0000000F;
+
+		memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000,
+		       sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1));
+
+		memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000,
+		       sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1));
+
+		ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111;
+
+		break;
+	case CS_ELA_TRACEMODE_MCU_AHBP:
+	case CS_ELA_TRACEMODE_HOST_AXI:
+		ela_state.regs[CS_ELA_TSSR] = 0x00000000;
+
+		if (tracemode == CS_ELA_TRACEMODE_MCU_AHBP)
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_MCU_AHBP];
+		else if (tracemode == CS_ELA_TRACEMODE_HOST_AXI)
+			ela_state.regs[CS_ELA_SIGSEL0] =
+				ela_state.signal_types[CS_ELA_SIGTYPE_HOST_AXI];
+
+		ela_state.regs[CS_ELA_COMPCTRL0] = 0x00000001;
+		ela_state.regs[CS_ELA_ALTCOMPCTRL0] = 0x11111111;
+		ela_state.regs[CS_ELA_TWBSEL0] = 0x000000FF;
+		ela_state.regs[CS_ELA_QUALMASK0] = 0x00000003;
+		ela_state.regs[CS_ELA_QUALCOMP0] = 0x00000003;
+
+		memset(&ela_state.regs[CS_ELA_SIGMASK0_0], 0x00000000,
+		       sizeof(u32) * (CS_ELA_SIGCOMP0_7 - CS_ELA_SIGMASK0_0 + 1));
+
+		memset(&ela_state.regs[CS_ELA_SIGSEL4], 0x00000000,
+		       sizeof(u32) * (CS_ELA_SIGCOMP4_7 - CS_ELA_SIGSEL4 + 1));
+
+		ela_state.regs[CS_ELA_COMPCTRL4] = 0x11111111;
+
+		break;
+	}
+	ela_state.tracemode = tracemode;
+}
+
+static ssize_t select_show(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	ssize_t ret = 0;
+	unsigned int mode;
+
+	for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) {
+		if (ela_state.supported_tracemodes & (1U << mode)) {
+			if (ela_state.tracemode == mode)
+				ret += sprintf(buf + ret, "[%s]\n", tracemode_names[mode]);
+			else
+				ret += sprintf(buf + ret, "%s\n", tracemode_names[mode]);
+		}
+	}
+	return ret;
+}
+
+static ssize_t select_store(struct device *dev, struct device_attribute *attr, const char *buf,
+			    size_t count)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned int mode = 0;
+
+	/* Check if enabled and return error */
+	if (ela_state.enabled == 1) {
+		dev_err(drvdata->base.dev,
+			"Config needs to be disabled before modifying registers");
+		return -EINVAL;
+	}
+
+	for (mode = CS_ELA_TRACEMODE_NONE; mode < CS_ELA_NR_TRACEMODE; mode++) {
+		if (sysfs_streq(tracemode_names[mode], buf) &&
+		    (ela_state.supported_tracemodes & (1U << mode))) {
+			setup_tracemode_registers(mode);
+			return count;
+		}
+	}
+
+	dev_err(drvdata->base.dev, "Invalid tracemode: %s", buf);
+	return -EINVAL;
+}
+
+static DEVICE_ATTR_RW(select);
+
+static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	return sprintf(buf, "%d\n", ela_state.enabled);
+}
+
+static DEVICE_ATTR_RO(is_enabled);
+
+static ssize_t sprintf_regs(char *const buf, int from_reg, int to_reg)
+{
+	ssize_t ret = 0;
+	unsigned int i = 0;
+
+	for (i = from_reg; i <= to_reg; i++)
+		ret += sprintf(buf + ret, "0x%08X ", ela_state.regs[i]);
+
+	ret += sprintf(buf + ret, "\n");
+	return ret;
+}
+
+static ssize_t verify_store_8_regs(struct device *dev, const char *buf, size_t count, int from_reg)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	u32 regs[CS_ELA_NR_SIG_REGS] = { 0 };
+	int items;
+	unsigned int i;
+
+	if (ela_state.enabled == 1) {
+		dev_err(drvdata->base.dev,
+			"Config needs to be disabled before modifying registers");
+		return -EINVAL;
+	}
+
+	items = sscanf(buf, "%x %x %x %x %x %x %x %x", &regs[0], &regs[1], &regs[2], &regs[3],
+		       &regs[4], &regs[5], &regs[6], &regs[7]);
+	if (items <= 0) {
+		dev_err(drvdata->base.dev, "Invalid register value");
+		return -EINVAL;
+	}
+	if (items != CS_ELA_NR_SIG_REGS) {
+		dev_err(drvdata->base.dev, "Incorrect number of registers set (%d != %d)", items,
+			CS_ELA_NR_SIG_REGS);
+		return -EINVAL;
+	}
+	for (i = 0; i < CS_ELA_NR_SIG_REGS; i++)
+		ela_state.regs[from_reg + i] = regs[i];
+
+	return count;
+}
+
+CS_ELA_SIGREGS_ATTR_RW(sigmask0, SIGMASK0);
+CS_ELA_SIGREGS_ATTR_RW(sigcomp0, SIGCOMP0);
+CS_ELA_SIGREGS_ATTR_RW(sigmask4, SIGMASK4);
+CS_ELA_SIGREGS_ATTR_RW(sigcomp4, SIGCOMP4);
+
+static struct attribute *coresight_ela_attrs[] = {
+	&dev_attr_select.attr,
+	&dev_attr_is_enabled.attr,
+	&dev_attr_sigmask0.attr,
+	&dev_attr_sigcomp0.attr,
+	&dev_attr_sigmask4.attr,
+	&dev_attr_sigcomp4.attr,
+	NULL,
+};
+
+static struct attribute_group coresight_ela_group = {
+	.attrs = coresight_ela_attrs,
+};
+
+static const struct attribute_group *coresight_ela_groups[] = {
+	&coresight_ela_group,
+	NULL,
+};
+
+const struct attribute_group **coresight_mali_source_groups_get(void)
+{
+	return coresight_ela_groups;
+}
+
+/* Initialize ELA coresight driver */
+
+static struct kbase_debug_coresight_csf_address_range ela_range[] = {
+	{ CS_ELA_BASE_ADDR, CS_ELA_BASE_ADDR + CORESIGHT_DEVTYPE },
+	{ CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_ADDR }
+};
+
+static struct kbase_debug_coresight_csf_op ela_enable_ops[] = {
+	/* Clearing CTRL.RUN and the read only CTRL.TRACE_BUSY. */
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000),
+	/* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */
+	POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0),
+	/* 0 for now. TSEN = 1 or TSINT = 8 in future */
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TIMECTRL, &ela_state.regs[CS_ELA_TIMECTRL]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TSSR, &ela_state.regs[CS_ELA_TSSR]),
+	/* ATID[6:0] = 4; valid range 0x1-0x6F, value must be unique and needs to be
+	 * known for trace extraction
+	 */
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ATBCTRL, 0x00000400),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_PTACTION, ELA_ACTION_TRACE),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_AUXCTRL, 0x00000000),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CNTSEL, 0x00000000),
+
+	/* Trigger State 0 */
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(0), &ela_state.regs[CS_ELA_SIGSEL0]),
+	/* May need to be configurable in future. */
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(0), 0x00000000),
+
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(0), 0x00000001),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ACTION(0), ELA_ACTION_TRACE),
+
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(0), 0x00000001),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(0), ELA_ACTION_TRACE),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(0), &ela_state.regs[CS_ELA_COMPCTRL0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(0), &ela_state.regs[CS_ELA_ALTCOMPCTRL0]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(0), 0x00000000),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(0), &ela_state.regs[CS_ELA_TWBSEL0]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(0), 0x00000000),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(0), 0x00000000),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(0), &ela_state.regs[CS_ELA_QUALMASK0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(0), &ela_state.regs[CS_ELA_QUALCOMP0]),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 0), &ela_state.regs[CS_ELA_SIGMASK0_0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 1), &ela_state.regs[CS_ELA_SIGMASK0_1]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 2), &ela_state.regs[CS_ELA_SIGMASK0_2]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 3), &ela_state.regs[CS_ELA_SIGMASK0_3]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 4), &ela_state.regs[CS_ELA_SIGMASK0_4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 5), &ela_state.regs[CS_ELA_SIGMASK0_5]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 6), &ela_state.regs[CS_ELA_SIGMASK0_6]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(0, 7), &ela_state.regs[CS_ELA_SIGMASK0_7]),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 0), &ela_state.regs[CS_ELA_SIGCOMP0_0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 1), &ela_state.regs[CS_ELA_SIGCOMP0_1]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 2), &ela_state.regs[CS_ELA_SIGCOMP0_2]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 3), &ela_state.regs[CS_ELA_SIGCOMP0_3]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 4), &ela_state.regs[CS_ELA_SIGCOMP0_4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 5), &ela_state.regs[CS_ELA_SIGCOMP0_5]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 6), &ela_state.regs[CS_ELA_SIGCOMP0_6]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(0, 7), &ela_state.regs[CS_ELA_SIGCOMP0_7]),
+
+	WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(1), CS_ELA_BASE_ADDR + ELA_SIGCOMP(1, 7),
+		       0x00000000),
+	WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(2), CS_ELA_BASE_ADDR + ELA_SIGCOMP(2, 7),
+		       0x00000000),
+	WRITE_RANGE_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(3), CS_ELA_BASE_ADDR + ELA_SIGCOMP(3, 7),
+		       0x00000000),
+
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(1), 0x11111111),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(2), 0x11111111),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(3), 0x11111111),
+
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(1), 0x11111111),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(2), 0x11111111),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(3), 0x11111111),
+
+	/* Trigger State 4 */
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGSEL(4), &ela_state.regs[CS_ELA_SIGSEL4]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_TRIGCTRL(4), 0x00000000),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_NEXTSTATE(4), &ela_state.regs[CS_ELA_NEXTSTATE4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ACTION(4), &ela_state.regs[CS_ELA_ACTION4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_ALTNEXTSTATE(4), &ela_state.regs[CS_ELA_ALTNEXTSTATE4]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTACTION(4), ELA_ACTION_TRACE),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_COMPCTRL(4), &ela_state.regs[CS_ELA_COMPCTRL4]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_ALTCOMPCTRL(4), 0x11111111),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_COUNTCOMP(4), 0x00000000),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_TWBSEL(4), &ela_state.regs[CS_ELA_TWBSEL4]),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTMASK(4), 0x00000000),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_EXTCOMP(4), 0x00000000),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALMASK(4), 0x00000000),
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_QUALCOMP(4), 0x00000000),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 0), &ela_state.regs[CS_ELA_SIGMASK4_0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 1), &ela_state.regs[CS_ELA_SIGMASK4_1]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 2), &ela_state.regs[CS_ELA_SIGMASK4_2]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 3), &ela_state.regs[CS_ELA_SIGMASK4_3]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 4), &ela_state.regs[CS_ELA_SIGMASK4_4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 5), &ela_state.regs[CS_ELA_SIGMASK4_5]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 6), &ela_state.regs[CS_ELA_SIGMASK4_6]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGMASK(4, 7), &ela_state.regs[CS_ELA_SIGMASK4_7]),
+
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 0), &ela_state.regs[CS_ELA_SIGCOMP4_0]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 1), &ela_state.regs[CS_ELA_SIGCOMP4_1]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 2), &ela_state.regs[CS_ELA_SIGCOMP4_2]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 3), &ela_state.regs[CS_ELA_SIGCOMP4_3]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 4), &ela_state.regs[CS_ELA_SIGCOMP4_4]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 5), &ela_state.regs[CS_ELA_SIGCOMP4_5]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 6), &ela_state.regs[CS_ELA_SIGCOMP4_6]),
+	WRITE_PTR_OP(CS_ELA_BASE_ADDR + ELA_SIGCOMP(4, 7), &ela_state.regs[CS_ELA_SIGCOMP4_7]),
+
+	WRITE_IMM_OP(CS_GPU_COMMAND_ADDR, CS_GPU_COMMAND_TRACE_CONTROL_EN),
+
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_RUN),
+
+	BIT_OR_OP(&ela_state.enabled, 0x1),
+};
+
+static struct kbase_debug_coresight_csf_op ela_disable_ops[] = {
+	WRITE_IMM_OP(CS_ELA_BASE_ADDR + ELA_CTRL, 0x00000000),
+	/* Poll CTRL.TRACE_BUSY until it becomes low to ensure that trace has stopped. */
+	POLL_OP(CS_ELA_BASE_ADDR + ELA_CTRL, ELA_CTRL_TRACE_BUSY, 0x0),
+
+	BIT_AND_OP(&ela_state.enabled, 0x0),
+};
+
+static int parse_signal_groups(struct coresight_mali_source_drvdata *drvdata)
+{
+	struct device_node *signal_groups = NULL;
+	unsigned int siggrp_idx;
+
+	if (drvdata->base.dev->of_node)
+		signal_groups = of_get_child_by_name(drvdata->base.dev->of_node, "signal-groups");
+
+	if (!signal_groups) {
+		dev_err(drvdata->base.dev, "Failed to find signal groups OF node");
+		return -EINVAL;
+	}
+
+	for (siggrp_idx = 0; siggrp_idx < CS_ELA_MAX_SIGNAL_GROUPS; siggrp_idx++) {
+		char buf[CS_SG_NAME_MAX_LEN];
+		ssize_t res;
+		const char *name;
+		struct property *prop;
+
+		res = snprintf(buf, CS_SG_NAME_MAX_LEN, "sg%d", siggrp_idx);
+		if (res <= 0) {
+			dev_err(drvdata->base.dev,
+				"Signal group name %d snprintf failed unexpectedly", siggrp_idx);
+			return -EINVAL;
+		}
+
+		of_property_for_each_string(signal_groups, buf, prop, name) {
+			int sig_type;
+
+			for (sig_type = 0; sig_type < CS_ELA_NR_SIGTYPE; sig_type++) {
+				if (!strncmp(signal_type_names[sig_type], name,
+					     strlen(signal_type_names[sig_type]))) {
+					ela_state.signal_types[sig_type] = (1U << siggrp_idx);
+					ela_state.supported_tracemodes |=
+						(1U << signal_type_tracemode_map[sig_type]);
+				}
+			}
+		}
+	}
+
+	/* Add TRACEMODE_NONE as supported to allow printing */
+	ela_state.supported_tracemodes |= (1U << CS_ELA_TRACEMODE_NONE);
+
+	return 0;
+}
+
+int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	int res = 0;
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	drvdata->type_name = type_name;
+#endif
+
+	drvdata->base.kbase_client = kbase_debug_coresight_csf_register(
+		drvdata->base.gpu_dev, ela_range, NELEMS(ela_range));
+	if (drvdata->base.kbase_client == NULL) {
+		dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly");
+		return -EINVAL;
+	}
+
+	drvdata->trcid = CS_MALI_TRACE_ID;
+
+	drvdata->base.enable_seq.ops = ela_enable_ops;
+	drvdata->base.enable_seq.nr_ops = NELEMS(ela_enable_ops);
+
+	drvdata->base.disable_seq.ops = ela_disable_ops;
+	drvdata->base.disable_seq.nr_ops = NELEMS(ela_disable_ops);
+
+	drvdata->base.config = kbase_debug_coresight_csf_config_create(
+		drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq);
+	if (!drvdata->base.config) {
+		dev_err(drvdata->base.dev, "config create failed unexpectedly");
+		return -EINVAL;
+	}
+
+	res = parse_signal_groups(drvdata);
+	if (res) {
+		dev_err(drvdata->base.dev, "Failed to parse signal groups");
+		return res;
+	}
+
+	return 0;
+}
+
+void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	if (drvdata->base.config != NULL)
+		kbase_debug_coresight_csf_config_free(drvdata->base.config);
+
+	if (drvdata->base.kbase_client != NULL)
+		kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client);
+}
+
+static const struct of_device_id mali_source_ids[] = { { .compatible =
+								 "arm,coresight-mali-source-ela" },
+						       {} };
+
+static struct platform_driver mali_sources_platform_driver = {
+	.probe      = coresight_mali_sources_probe,
+	.remove     = coresight_mali_sources_remove,
+	.driver = {
+		.name = "coresight-mali-source-ela",
+		.owner = THIS_MODULE,
+		.of_match_table = mali_source_ids,
+		.suppress_bind_attrs    = true,
+	},
+};
+
+static int __init mali_sources_init(void)
+{
+	return platform_driver_register(&mali_sources_platform_driver);
+}
+
+static void __exit mali_sources_exit(void)
+{
+	platform_driver_unregister(&mali_sources_platform_driver);
+}
+
+module_init(mali_sources_init);
+module_exit(mali_sources_exit);
+
+MODULE_AUTHOR("Arm Ltd.");
+MODULE_DESCRIPTION("Arm Coresight Mali source ELA");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c
new file mode 100644
index 000000000000..ae9c2f7f2a8c
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/etm/coresight_mali_source_etm_core.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/of_platform.h>
+#include <coresight-priv.h>
+#include <coresight-etm4x.h>
+#include "sources/coresight_mali_sources.h"
+
+#define CS_ETM_BASE_ADDR 0xE0041000
+#define CS_MALI_TRACE_ID 0x00000010
+
+#ifndef TRCVICTLR_SSSTATUS
+#define TRCVICTLR_SSSTATUS BIT(9)
+#endif
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+static char *type_name = "mali-source-etm";
+#endif
+
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+enum cs_etm_dynamic_regs {
+	CS_ETM_TRCCONFIGR,
+	CS_ETM_TRCTRACEIDR,
+	CS_ETM_TRCVDARCCTLR,
+	CS_ETM_TRCSTALLCTLR,
+	CS_ETM_TRCVIIECTLR,
+	CS_ETM_NR_DYN_REGS
+};
+
+struct cs_etm_state {
+	int enabled;
+	u32 regs[CS_ETM_NR_DYN_REGS];
+};
+
+static struct cs_etm_state etm_state = { 0 };
+
+static struct kbase_debug_coresight_csf_address_range etm_range[] = {
+	{ CS_ETM_BASE_ADDR, CS_ETM_BASE_ADDR + CORESIGHT_DEVTYPE },
+};
+
+struct kbase_debug_coresight_csf_op etm_enable_ops[] = {
+	// Unlock ETM configuration
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
+	// Power up request
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPDCR, TRCPDCR_PU),
+	// Disable Tracing
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000),
+	// Check the tracing unit is inactive before programming
+	POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)),
+	// Set trace configuration to enable global timestamping, and data value tracing
+	WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCCONFIGR, &etm_state.regs[CS_ETM_TRCCONFIGR]),
+	// Set event control 0 register
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL0R, 0x00000000),
+	// Set event control 1 register
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCEVENTCTL1R, 0x00000000),
+	// Set trace ID
+	WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCTRACEIDR, &etm_state.regs[CS_ETM_TRCTRACEIDR]),
+	// Configure stall control register
+	WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCSTALLCTLR, &etm_state.regs[CS_ETM_TRCSTALLCTLR]),
+	// Synchronization period register - sync every 2^11 bytes
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCSYNCPR, 0x0000000C),
+	// Set global timestamp control register to select resource 0
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCTSCTLR, 0x00000000),
+	// Set viewData include/exclude address range comparators to 0
+	WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVDARCCTLR, &etm_state.regs[CS_ETM_TRCVDARCCTLR]),
+	// Set viewData main control to select resource 0
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDCTLR, 0x00000001),
+	//Set viewData comparators to 0
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVDSACCTLR, 0x00000000),
+	// Set stop/start logic to started state, select resource 1
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVICTLR, TRCVICTLR_SSSTATUS | BIT(0)),
+	// Set viewInst start and stop control
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCVISSCTLR, 0x00000000),
+	// Set viewInst include and exclude control to math all addresses in range
+	WRITE_PTR_OP(CS_ETM_BASE_ADDR + TRCVIIECTLR, &etm_state.regs[CS_ETM_TRCVIIECTLR]),
+	// enable trace
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x1),
+	// Wait that the unit is busy
+	POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), 0),
+	// Lock the ETM configuration
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Set enabled bit on at the end of sequence
+	BIT_OR_OP(&etm_state.enabled, 0x1),
+};
+
+struct kbase_debug_coresight_csf_op etm_disable_ops[] = {
+	// Unlock ETM configuration
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
+	// Disable trace unit
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + TRCPRGCTLR, 0x00000000),
+	// Poll until idle
+	POLL_OP(CS_ETM_BASE_ADDR + TRCSTATR, BIT(TRCSTATR_IDLE_BIT), BIT(TRCSTATR_IDLE_BIT)),
+	// Lock ETM configuration
+	WRITE_IMM_OP(CS_ETM_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Set enabled bit off at the end of sequence
+	BIT_AND_OP(&etm_state.enabled, 0x0),
+};
+
+static void set_default_regs(void)
+{
+	// Turn on instruction tracing
+	etm_state.regs[CS_ETM_TRCCONFIGR] = 0x00000800;
+	// Set ID
+	etm_state.regs[CS_ETM_TRCTRACEIDR] = CS_MALI_TRACE_ID;
+	// Set data comparators to none
+	etm_state.regs[CS_ETM_TRCVDARCCTLR] = 0x00000000;
+	// Set instructions address filter to none
+	etm_state.regs[CS_ETM_TRCVIIECTLR] = 0x00000000;
+	// Set stall configuration to a basic setting
+	etm_state.regs[CS_ETM_TRCSTALLCTLR] = 0x00000000;
+}
+
+static const struct of_device_id mali_source_ids[] = { { .compatible =
+								 "arm,coresight-mali-source-etm" },
+						       {} };
+
+int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	int ret = 0;
+
+	if (drvdata == NULL)
+		return -EINVAL;
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	drvdata->type_name = type_name;
+#endif
+	etm_state.enabled = 0x0;
+
+	drvdata->base.kbase_client = kbase_debug_coresight_csf_register(
+		drvdata->base.gpu_dev, etm_range, NELEMS(etm_range));
+	if (drvdata->base.kbase_client == NULL) {
+		dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n");
+		return -EINVAL;
+	}
+
+	set_default_regs();
+	drvdata->trcid = CS_MALI_TRACE_ID;
+
+	drvdata->base.enable_seq.ops = etm_enable_ops;
+	drvdata->base.enable_seq.nr_ops = NELEMS(etm_enable_ops);
+
+	drvdata->base.disable_seq.ops = etm_disable_ops;
+	drvdata->base.disable_seq.nr_ops = NELEMS(etm_disable_ops);
+
+	drvdata->base.config = kbase_debug_coresight_csf_config_create(
+		drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq);
+	if (!drvdata->base.config) {
+		dev_err(drvdata->base.dev, "Config create failed unexpectedly\n");
+		kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	if (drvdata->base.config != NULL)
+		kbase_debug_coresight_csf_config_free(drvdata->base.config);
+
+	if (drvdata->base.kbase_client != NULL)
+		kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client);
+}
+
+static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	u32 val;
+	int err;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	if (etm_state.enabled == 1) {
+		dev_err(drvdata->base.dev,
+			"Config needs to be disabled before modifying registers\n");
+		return -EINVAL;
+	}
+
+	err = kstrtou32(buf, 0, &val);
+	if (err) {
+		dev_err(drvdata->base.dev, "Invalid input value\n");
+		return -EINVAL;
+	}
+
+	etm_state.regs[reg] = val;
+	return count;
+}
+
+#define CS_ETM_REG_ATTR_RW(_a, _b)                                                                 \
+	static ssize_t _a##_show(struct device *dev, struct device_attribute *attr,                \
+				 char *const buf)                                                  \
+	{                                                                                          \
+		return sprintf(buf, "%#x\n", etm_state.regs[CS_ETM_##_b]);                         \
+	}                                                                                          \
+	static ssize_t _a##_store(struct device *dev, struct device_attribute *attr,               \
+				  const char *buf, size_t count)                                   \
+	{                                                                                          \
+		return verify_store_reg(dev, buf, count, CS_ETM_##_b);                             \
+	}                                                                                          \
+	static DEVICE_ATTR_RW(_a)
+
+CS_ETM_REG_ATTR_RW(trcconfigr, TRCCONFIGR);
+CS_ETM_REG_ATTR_RW(trctraceidr, TRCTRACEIDR);
+CS_ETM_REG_ATTR_RW(trcvdarcctlr, TRCVDARCCTLR);
+CS_ETM_REG_ATTR_RW(trcviiectlr, TRCVIIECTLR);
+CS_ETM_REG_ATTR_RW(trcstallctlr, TRCSTALLCTLR);
+
+static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	return sprintf(buf, "%d\n", etm_state.enabled);
+}
+static DEVICE_ATTR_RO(is_enabled);
+
+static struct attribute *coresight_etm_attrs[] = {
+	&dev_attr_is_enabled.attr,
+	&dev_attr_trcconfigr.attr,
+	&dev_attr_trctraceidr.attr,
+	&dev_attr_trcvdarcctlr.attr,
+	&dev_attr_trcviiectlr.attr,
+	&dev_attr_trcstallctlr.attr,
+	NULL,
+};
+static struct attribute_group coresight_etm_group = { .attrs = coresight_etm_attrs,
+						      .name = "mgmt" };
+static const struct attribute_group *coresight_etm_groups[] = {
+	&coresight_etm_group,
+	NULL,
+};
+const struct attribute_group **coresight_mali_source_groups_get(void)
+{
+	return coresight_etm_groups;
+}
+
+static struct platform_driver mali_sources_platform_driver = {
+	.probe      = coresight_mali_sources_probe,
+	.remove     = coresight_mali_sources_remove,
+	.driver = {
+		.name = "coresight-mali-source-etm",
+		.owner = THIS_MODULE,
+		.of_match_table = mali_source_ids,
+		.suppress_bind_attrs    = true,
+	},
+};
+
+static int __init mali_sources_init(void)
+{
+	return platform_driver_register(&mali_sources_platform_driver);
+}
+
+static void __exit mali_sources_exit(void)
+{
+	platform_driver_unregister(&mali_sources_platform_driver);
+}
+
+module_init(mali_sources_init);
+module_exit(mali_sources_exit);
+
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_DESCRIPTION("Arm Coresight Mali source ETM");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
new file mode 100644
index 000000000000..9f60192a682b
--- /dev/null
+++ b/drivers/hwtracing/coresight/mali/sources/itm/coresight_mali_source_itm_core.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/of_platform.h>
+#include <coresight-priv.h>
+#include "sources/coresight_mali_sources.h"
+
+/* Linux Coresight framework does not support multiple sources enabled
+ * at the same time.
+ *
+ * To avoid Kernel instability, all Mali Coresight sources use the
+ * same trace ID value as the mandatory ETM one.
+ */
+#define CS_MALI_TRACE_ID 0x00000010
+
+#define CS_SCS_BASE_ADDR 0xE000E000
+#define SCS_DEMCR 0xDFC
+#define CS_ITM_BASE_ADDR 0xE0000000
+#define ITM_TCR 0xE80
+#define ITM_TCR_BUSY_BIT (0x1 << 22)
+#define CS_DWT_BASE_ADDR 0xE0001000
+#define DWT_CTRL 0x000
+#define DWT_CYCCNT 0x004
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+static char *type_name = "mali-source-itm";
+#endif
+
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+
+enum cs_itm_dwt_dynamic_regs { CS_DWT_CTRL, CS_ITM_TCR, CS_ITM_DWT_NR_DYN_REGS };
+
+struct cs_itm_state {
+	int enabled;
+	u32 regs[CS_ITM_DWT_NR_DYN_REGS];
+};
+
+static struct cs_itm_state itm_state = { 0 };
+
+static struct kbase_debug_coresight_csf_address_range dwt_itm_range[] = {
+	{ CS_SCS_BASE_ADDR, CS_SCS_BASE_ADDR + CORESIGHT_DEVTYPE },
+	{ CS_ITM_BASE_ADDR, CS_ITM_BASE_ADDR + CORESIGHT_DEVTYPE },
+	{ CS_DWT_BASE_ADDR, CS_DWT_BASE_ADDR + CORESIGHT_DEVTYPE }
+};
+
+static struct kbase_debug_coresight_csf_op dwt_itm_enable_ops[] = {
+	// enable ITM/DWT functionality via DEMCR register
+	WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x01000000),
+	// Unlock DWT configuration
+	WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
+	// prep DWT counter to immediately send sync packet ((1 << 24) - 1)
+	WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CYCCNT, 0x00ffffff),
+	// Write initial value of post count counter
+	WRITE_IMM_OP(CS_DWT_BASE_ADDR + DWT_CTRL, 0x00000020),
+	// Set DWT configuration:
+	WRITE_PTR_OP(CS_DWT_BASE_ADDR + DWT_CTRL, &itm_state.regs[CS_DWT_CTRL]),
+	// Lock DWT Configuration
+	WRITE_IMM_OP(CS_DWT_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Unlock DWT configuration
+	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
+	// Set ITM configuration:
+	WRITE_PTR_OP(CS_ITM_BASE_ADDR + ITM_TCR, &itm_state.regs[CS_ITM_TCR]),
+	// Lock DWT configuration
+	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Set enabled bit on at the end of sequence
+	BIT_OR_OP(&itm_state.enabled, 0x1),
+};
+
+static struct kbase_debug_coresight_csf_op dwt_itm_disable_ops[] = {
+	// Disable ITM/DWT functionality via DEMCR register
+	WRITE_IMM_OP(CS_SCS_BASE_ADDR + SCS_DEMCR, 0x00000000),
+	// Unlock ITM configuration
+	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, CS_MALI_UNLOCK_COMPONENT),
+	// Check ITM is disabled
+	POLL_OP(CS_ITM_BASE_ADDR + ITM_TCR, ITM_TCR_BUSY_BIT, 0x0),
+	// Lock
+	WRITE_IMM_OP(CS_ITM_BASE_ADDR + CORESIGHT_LAR, 0x00000000),
+	// Set enabled bit off at the end of sequence
+	BIT_AND_OP(&itm_state.enabled, 0x0),
+};
+
+static void set_default_regs(void)
+{
+	// DWT configuration:
+	// [0] = 1, enable cycle counter
+	// [4:1] = 4, set PC sample rate pf 256 cycles
+	// [8:5] = 1, set initial post count value
+	// [9] = 1, select position of post count tap on the cycle counter
+	// [10:11] = 1, enable sync packets
+	// [12] = 1, enable periodic PC sample packets
+	itm_state.regs[CS_DWT_CTRL] = 0x00001629;
+	// ITM configuration:
+	// [0] = 1, Enable ITM
+	// [1] = 1, Enable Time stamp generation
+	// [2] = 1, Enable sync packet transmission
+	// [3] = 1, Enable HW event forwarding
+	// [11:10] = 1, Generate TS request approx every 128 cycles
+	// [22:16] = 1, Trace bus ID
+	itm_state.regs[CS_ITM_TCR] = 0x0001040F;
+}
+
+static int verify_store_reg(struct device *dev, const char *buf, size_t count, int reg)
+{
+	struct coresight_mali_source_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	u32 val;
+	int err;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	if (itm_state.enabled == 1) {
+		dev_err(drvdata->base.dev,
+			"Config needs to be disabled before modifying registers\n");
+		return -EINVAL;
+	}
+
+	err = kstrtou32(buf, 0, &val);
+	if (err) {
+		dev_err(drvdata->base.dev, "Invalid input value\n");
+		return -EINVAL;
+	}
+
+	itm_state.regs[reg] = val;
+	return count;
+}
+
+static ssize_t is_enabled_show(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	return sprintf(buf, "%d\n", itm_state.enabled);
+}
+static DEVICE_ATTR_RO(is_enabled);
+
+#define CS_ITM_DWT_REG_ATTR_RW(_a, _b)                                                             \
+	static ssize_t _a##_show(struct device *dev, struct device_attribute *attr,                \
+				 char *const buf)                                                  \
+	{                                                                                          \
+		return sprintf(buf, "%#x\n", itm_state.regs[CS_##_b]);                             \
+	}                                                                                          \
+	static ssize_t _a##_store(struct device *dev, struct device_attribute *attr,               \
+				  const char *buf, size_t count)                                   \
+	{                                                                                          \
+		return verify_store_reg(dev, buf, count, CS_##_b);                                 \
+	}                                                                                          \
+	static DEVICE_ATTR_RW(_a)
+
+CS_ITM_DWT_REG_ATTR_RW(dwt_ctrl, DWT_CTRL);
+CS_ITM_DWT_REG_ATTR_RW(itm_tcr, ITM_TCR);
+
+static struct attribute *coresight_mali_source_attrs[] = {
+	&dev_attr_is_enabled.attr,
+	&dev_attr_dwt_ctrl.attr,
+	&dev_attr_itm_tcr.attr,
+	NULL,
+};
+
+static const struct attribute_group coresight_mali_source_group = {
+	.attrs = coresight_mali_source_attrs,
+	.name = "mgmt"
+};
+
+static const struct attribute_group *coresight_mali_source_groups[] = {
+	&coresight_mali_source_group,
+	NULL,
+};
+
+const struct attribute_group **coresight_mali_source_groups_get(void)
+{
+	return coresight_mali_source_groups;
+}
+
+int coresight_mali_sources_init_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	if (drvdata == NULL)
+		return -EINVAL;
+
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	drvdata->type_name = type_name;
+#endif
+
+	drvdata->base.kbase_client = kbase_debug_coresight_csf_register(
+		drvdata->base.gpu_dev, dwt_itm_range, NELEMS(dwt_itm_range));
+	if (drvdata->base.kbase_client == NULL) {
+		dev_err(drvdata->base.dev, "Registration with full range failed unexpectedly\n");
+		return -EINVAL;
+	}
+
+	drvdata->trcid = CS_MALI_TRACE_ID;
+
+	drvdata->base.enable_seq.ops = dwt_itm_enable_ops;
+	drvdata->base.enable_seq.nr_ops = NELEMS(dwt_itm_enable_ops);
+
+	drvdata->base.disable_seq.ops = dwt_itm_disable_ops;
+	drvdata->base.disable_seq.nr_ops = NELEMS(dwt_itm_disable_ops);
+
+	set_default_regs();
+
+	drvdata->base.config = kbase_debug_coresight_csf_config_create(
+		drvdata->base.kbase_client, &drvdata->base.enable_seq, &drvdata->base.disable_seq);
+	if (!drvdata->base.config) {
+		dev_err(drvdata->base.dev, "config create failed unexpectedly\n");
+		kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void coresight_mali_sources_deinit_drvdata(struct coresight_mali_source_drvdata *drvdata)
+{
+	if (drvdata->base.config != NULL)
+		kbase_debug_coresight_csf_config_free(drvdata->base.config);
+
+	if (drvdata->base.kbase_client != NULL)
+		kbase_debug_coresight_csf_unregister(drvdata->base.kbase_client);
+}
+
+static const struct of_device_id mali_source_ids[] = { { .compatible =
+								 "arm,coresight-mali-source-itm" },
+						       {} };
+
+static struct platform_driver mali_sources_platform_driver = {
+	.probe      = coresight_mali_sources_probe,
+	.remove     = coresight_mali_sources_remove,
+	.driver = {
+		.name = "coresight-mali-source-itm",
+		.owner = THIS_MODULE,
+		.of_match_table = mali_source_ids,
+		.suppress_bind_attrs    = true,
+	},
+};
+
+static int __init mali_sources_init(void)
+{
+	return platform_driver_register(&mali_sources_platform_driver);
+}
+
+static void __exit mali_sources_exit(void)
+{
+	platform_driver_unregister(&mali_sources_platform_driver);
+}
+
+module_init(mali_sources_init);
+module_exit(mali_sources_exit);
+
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_DESCRIPTION("Arm Coresight Mali source ITM");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mali_kbase_debug_coresight_csf.h b/include/linux/mali_kbase_debug_coresight_csf.h
new file mode 100644
index 000000000000..8356fd497e74
--- /dev/null
+++ b/include/linux/mali_kbase_debug_coresight_csf.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_DEBUG_CORESIGHT_CSF_
+#define _KBASE_DEBUG_CORESIGHT_CSF_
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_NOP 0U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM 1U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE_IMM_RANGE 2U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_WRITE 3U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_READ 4U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_POLL 5U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_OR 6U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_XOR 7U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_AND 8U
+#define KBASE_DEBUG_CORESIGHT_CSF_OP_TYPE_BIT_NOT 9U
+
+/**
+ * struct kbase_debug_coresight_csf_write_imm_op - Coresight immediate write operation structure
+ *
+ * @reg_addr: Register address to write to.
+ * @val:      Value to write at @reg_addr.
+ */
+struct kbase_debug_coresight_csf_write_imm_op {
+	__u32 reg_addr;
+	__u32 val;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_write_imm_range_op - Coresight immediate write range
+ *                                                       operation structure
+ *
+ * @reg_start: Register address to start writing from.
+ * @reg_end:   Register address to stop writing from. End address included in the write range.
+ * @val:       Value to write at @reg_addr.
+ */
+struct kbase_debug_coresight_csf_write_imm_range_op {
+	__u32 reg_start;
+	__u32 reg_end;
+	__u32 val;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_write_op - Coresight write operation structure
+ *
+ * @reg_addr: Register address to write to.
+ * @ptr:      Pointer to the value to write at @reg_addr.
+ */
+struct kbase_debug_coresight_csf_write_op {
+	__u32 reg_addr;
+	__u32 *ptr;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_read_op - Coresight read operation structure
+ *
+ * @reg_addr: Register address to read.
+ * @ptr:      Pointer where to store the read value.
+ */
+struct kbase_debug_coresight_csf_read_op {
+	__u32 reg_addr;
+	__u32 *ptr;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_poll_op - Coresight poll operation structure
+ *
+ * @reg_addr: Register address to poll.
+ * @val:      Expected value after poll.
+ * @mask:     Mask to apply on the read value from @reg_addr when comparing against @val.
+ */
+struct kbase_debug_coresight_csf_poll_op {
+	__u32 reg_addr;
+	__u32 val;
+	__u32 mask;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_bitw_op - Coresight bitwise operation structure
+ *
+ * @ptr: Pointer to the variable on which to execute the bit operation.
+ * @val: Value with which the operation should be executed against @ptr value.
+ */
+struct kbase_debug_coresight_csf_bitw_op {
+	__u32 *ptr;
+	__u32 val;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_op - Coresight supported operations
+ *
+ * @type:               Operation type.
+ * @padding:            Padding for 64bit alignment.
+ * @op:                 Operation union.
+ * @op.write_imm:       Parameters for immediate write operation.
+ * @op.write_imm_range: Parameters for immediate range write operation.
+ * @op.write:           Parameters for write operation.
+ * @op.read:            Parameters for read operation.
+ * @op.poll:            Parameters for poll operation.
+ * @op.bitw:            Parameters for bitwise operation.
+ * @op.padding:         Padding for 64bit alignment.
+ *
+ * All operation structures should include padding to ensure they are the same size.
+ */
+struct kbase_debug_coresight_csf_op {
+	__u8 type;
+	__u8 padding[7];
+	union {
+		struct kbase_debug_coresight_csf_write_imm_op write_imm;
+		struct kbase_debug_coresight_csf_write_imm_range_op write_imm_range;
+		struct kbase_debug_coresight_csf_write_op write;
+		struct kbase_debug_coresight_csf_read_op read;
+		struct kbase_debug_coresight_csf_poll_op poll;
+		struct kbase_debug_coresight_csf_bitw_op bitw;
+		u32 padding[3];
+	} op;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_sequence - Coresight sequence of operations
+ *
+ * @ops:    Arrays containing Coresight operations.
+ * @nr_ops: Size of @ops.
+ */
+struct kbase_debug_coresight_csf_sequence {
+	struct kbase_debug_coresight_csf_op *ops;
+	int nr_ops;
+};
+
+/**
+ * struct kbase_debug_coresight_csf_address_range - Coresight client address range
+ *
+ * @start: Start offset of the address range.
+ * @end:   End offset of the address range.
+ */
+struct kbase_debug_coresight_csf_address_range {
+	__u32 start;
+	__u32 end;
+};
+
+/**
+ * kbase_debug_coresight_csf_register - Register as a client for set ranges of MCU memory.
+ *
+ * @drv_data:  Pointer to driver device data.
+ * @ranges:    Pointer to an array of struct kbase_debug_coresight_csf_address_range
+ *             that contains start and end addresses that the client will manage.
+ * @nr_ranges: Size of @ranges array.
+ *
+ * This function checks @ranges against current client claimed ranges. If there
+ * are no overlaps, a new client is created and added to the list.
+ *
+ * Return: A pointer of the registered client instance on success. NULL on failure.
+ */
+void *kbase_debug_coresight_csf_register(void *drv_data,
+					 struct kbase_debug_coresight_csf_address_range *ranges,
+					 int nr_ranges);
+
+/**
+ * kbase_debug_coresight_csf_unregister - Removes a coresight client.
+ *
+ * @client_data: A pointer to a coresight client.
+ *
+ * This function removes a client from the client list and frees the client struct.
+ */
+void kbase_debug_coresight_csf_unregister(void *client_data);
+
+/**
+ * kbase_debug_coresight_csf_config_create - Creates a configuration containing
+ *                                           enable and disable sequence.
+ *
+ * @client_data:      Pointer to a coresight client.
+ * @enable_seq:  Pointer to a struct containing the ops needed to enable coresight blocks.
+ *               It's optional so could be NULL.
+ * @disable_seq: Pointer to a struct containing ops to run to disable coresight blocks.
+ *               It's optional so could be NULL.
+ *
+ * Return: Valid pointer on success. NULL on failure.
+ */
+void *
+kbase_debug_coresight_csf_config_create(void *client_data,
+					struct kbase_debug_coresight_csf_sequence *enable_seq,
+					struct kbase_debug_coresight_csf_sequence *disable_seq);
+/**
+ * kbase_debug_coresight_csf_config_free - Frees a configuration containing
+ *                                         enable and disable sequence.
+ *
+ * @config_data: Pointer to a coresight configuration.
+ */
+void kbase_debug_coresight_csf_config_free(void *config_data);
+
+/**
+ * kbase_debug_coresight_csf_config_enable - Enables a coresight configuration
+ *
+ * @config_data: Pointer to coresight configuration.
+ *
+ * If GPU is turned on, the configuration is immediately applied the CoreSight blocks.
+ * If the GPU is turned off, the configuration is scheduled to be applied on the next
+ * time the GPU is turned on.
+ *
+ * A configuration is enabled by executing read/write/poll ops defined in config->enable_seq.
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int kbase_debug_coresight_csf_config_enable(void *config_data);
+/**
+ * kbase_debug_coresight_csf_config_disable - Disables a coresight configuration
+ *
+ * @config_data: Pointer to coresight configuration.
+ *
+ * If the GPU is turned off, this is effective a NOP as kbase should have disabled
+ * the configuration when GPU is off.
+ * If the GPU is on, the configuration will be disabled.
+ *
+ * A configuration is disabled by executing read/write/poll ops defined in config->disable_seq.
+ *
+ * Return: 0 if success. Error code on failure.
+ */
+int kbase_debug_coresight_csf_config_disable(void *config_data);
+
+#endif /* _KBASE_DEBUG_CORESIGHT_CSF_ */
diff --git a/include/linux/version_compat_defs.h b/include/linux/version_compat_defs.h
index d0a09985c5ca..335147cada2c 100644
--- a/include/linux/version_compat_defs.h
+++ b/include/linux/version_compat_defs.h
@@ -24,10 +24,12 @@
 
 #include <linux/version.h>
 
-#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
 typedef unsigned int __poll_t;
 #endif
 
+#if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE
+
 #ifndef EPOLLHUP
 #define EPOLLHUP POLLHUP
 #endif
@@ -44,4 +46,6 @@ typedef unsigned int __poll_t;
 #define EPOLLRDNORM POLLRDNORM
 #endif
 
+#endif
+
 #endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
index 613eb1fdd081..a44da7beb041 100644
--- a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
+++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_dummy.h
@@ -29,7 +29,11 @@
 #include <linux/types.h>
 
 #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#if MALI_USE_CSF
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (65)
+#else /* MALI_USE_CSF */
 #define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (60)
+#endif /* !MALI_USE_CSF */
 #define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT      (4)
 #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
 	(enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
@@ -56,7 +60,16 @@
 #define KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE                                                          \
 	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE)
 
+/*
+ * Bit mask - no. bits set is no. cores
+ * Values obtained from talking to HW team
+ * Example: tODx has 10 cores, 0b11 1111 1111 -> 0x3FF
+ */
 #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull)
 #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
 #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull)
 #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull)
diff --git a/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
new file mode 100644
index 000000000000..69bc44c26361
--- /dev/null
+++ b/include/uapi/gpu/arm/bifrost/backend/gpu/mali_kbase_model_linux.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Dummy Model interface
+ */
+
+#ifndef _UAPI_KBASE_MODEL_LINUX_H_
+#define _UAPI_KBASE_MODEL_LINUX_H_
+
+/* Generic model IRQs */
+enum model_linux_irqs {
+	MODEL_LINUX_JOB_IRQ,
+	MODEL_LINUX_GPU_IRQ,
+	MODEL_LINUX_MMU_IRQ,
+	MODEL_LINUX_NONE_IRQ,
+	MODEL_LINUX_NUM_TYPE_IRQ
+};
+
+#endif /* _UAPI_KBASE_MODEL_LINUX_H_ */
diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
index d9813c055809..ec8c02f18e16 100644
--- a/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_base_csf_kernel.h
@@ -118,9 +118,21 @@
 
 #define BASE_QUEUE_MAX_PRIORITY (15U)
 
-/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */
-#define BASEP_EVENT_VAL_INDEX (0U)
-#define BASEP_EVENT_ERR_INDEX (1U)
+/* Sync32 object fields definition */
+#define BASEP_EVENT32_VAL_OFFSET (0U)
+#define BASEP_EVENT32_ERR_OFFSET (4U)
+#define BASEP_EVENT32_SIZE_BYTES (8U)
+
+/* Sync64 object fields definition */
+#define BASEP_EVENT64_VAL_OFFSET (0U)
+#define BASEP_EVENT64_ERR_OFFSET (8U)
+#define BASEP_EVENT64_SIZE_BYTES (16U)
+
+/* Sync32 object alignment, equal to its size */
+#define BASEP_EVENT32_ALIGN_BYTES (8U)
+
+/* Sync64 object alignment, equal to its size */
+#define BASEP_EVENT64_ALIGN_BYTES (16U)
 
 /* The upper limit for number of objects that could be waited/set per command.
  * This limit is now enforced as internally the error inherit inputs are
diff --git a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
index d9a1867e13c3..642ca3465ead 100644
--- a/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/csf/mali_kbase_csf_ioctl.h
@@ -72,10 +72,18 @@
  *   - base_jit_alloc_info_11_5
  *   - kbase_ioctl_mem_jit_init_10_2
  *   - kbase_ioctl_mem_jit_init_11_5
+ * 1.17:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 1.18:
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
  */
 
 #define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 16
+#define BASE_UK_VERSION_MINOR 17
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
index 1f34d99830fe..784e09a7edc2 100644
--- a/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
+++ b/include/uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h
@@ -119,7 +119,6 @@
 #define GPU_ID2_PRODUCT_TBEX              GPU_ID2_MODEL_MAKE(9, 2)
 #define GPU_ID2_PRODUCT_LBEX              GPU_ID2_MODEL_MAKE(9, 4)
 #define GPU_ID2_PRODUCT_TBAX              GPU_ID2_MODEL_MAKE(9, 5)
-#define GPU_ID2_PRODUCT_TDUX              GPU_ID2_MODEL_MAKE(10, 1)
 #define GPU_ID2_PRODUCT_TODX              GPU_ID2_MODEL_MAKE(10, 2)
 #define GPU_ID2_PRODUCT_TGRX              GPU_ID2_MODEL_MAKE(10, 3)
 #define GPU_ID2_PRODUCT_TVAX              GPU_ID2_MODEL_MAKE(10, 4)
diff --git a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
index 9c7553ff2bd2..902d0ce9145a 100644
--- a/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/jm/mali_kbase_jm_ioctl.h
@@ -133,9 +133,17 @@
  *   - base_jit_alloc_info_11_5
  *   - kbase_ioctl_mem_jit_init_10_2
  *   - kbase_ioctl_mem_jit_init_11_5
+ * 11.37:
+ * - Fix kinstr_prfcnt issues:
+ *   - Missing implicit sample for CMD_STOP when HWCNT buffer is full.
+ *   - Race condition when stopping periodic sampling.
+ *   - prfcnt_block_metadata::block_idx gaps.
+ *   - PRFCNT_CONTROL_CMD_SAMPLE_ASYNC is removed.
+ * 11.38:
+ * - CPU mappings of USER_BUFFER imported memory handles must be cached.
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 36
+#define BASE_UK_VERSION_MINOR 37
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
index 962decc10efc..5089bf249528 100644
--- a/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_hwcnt_reader.h
@@ -445,7 +445,7 @@ struct prfcnt_metadata {
  * @PRFCNT_CONTROL_CMD_STOP:         Stop the counter data dump run for the
  *                                   calling client session.
  * @PRFCNT_CONTROL_CMD_SAMPLE_SYNC:  Trigger a synchronous manual sample.
- * @PRFCNT_CONTROL_CMD_SAMPLE_ASYNC: Trigger an asynchronous manual sample.
+ * @PRFCNT_CONTROL_CMD_RESERVED:     Previously SAMPLE_ASYNC not supported any more.
  * @PRFCNT_CONTROL_CMD_DISCARD:      Discard all samples which have not yet
  *                                   been consumed by userspace. Note that
  *                                   this can race with new samples if
@@ -455,7 +455,7 @@ enum prfcnt_control_cmd_code {
 	PRFCNT_CONTROL_CMD_START = 1,
 	PRFCNT_CONTROL_CMD_STOP,
 	PRFCNT_CONTROL_CMD_SAMPLE_SYNC,
-	PRFCNT_CONTROL_CMD_SAMPLE_ASYNC,
+	PRFCNT_CONTROL_CMD_RESERVED,
 	PRFCNT_CONTROL_CMD_DISCARD,
 };
 
diff --git a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
index 63bf48b603ef..c8a54f91165e 100644
--- a/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
+++ b/include/uapi/gpu/arm/bifrost/mali_kbase_ioctl.h
@@ -46,8 +46,7 @@ struct kbase_ioctl_set_flags {
 	__u32 create_flags;
 };
 
-#define KBASE_IOCTL_SET_FLAGS \
-	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
+#define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
 
 /**
  * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
@@ -81,8 +80,7 @@ struct kbase_ioctl_get_gpuprops {
 	__u32 flags;
 };
 
-#define KBASE_IOCTL_GET_GPUPROPS \
-	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
+#define KBASE_IOCTL_GET_GPUPROPS _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
 
 /**
  * union kbase_ioctl_mem_alloc - Allocate memory on the GPU
@@ -108,8 +106,7 @@ union kbase_ioctl_mem_alloc {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_ALLOC \
-	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
+#define KBASE_IOCTL_MEM_ALLOC _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
 
 /**
  * struct kbase_ioctl_mem_query - Query properties of a GPU memory region
@@ -131,12 +128,11 @@ union kbase_ioctl_mem_query {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_QUERY \
-	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
+#define KBASE_IOCTL_MEM_QUERY _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
 
-#define KBASE_MEM_QUERY_COMMIT_SIZE	((__u64)1)
-#define KBASE_MEM_QUERY_VA_SIZE		((__u64)2)
-#define KBASE_MEM_QUERY_FLAGS		((__u64)3)
+#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1)
+#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2)
+#define KBASE_MEM_QUERY_FLAGS ((__u64)3)
 
 /**
  * struct kbase_ioctl_mem_free - Free a memory region
@@ -146,8 +142,7 @@ struct kbase_ioctl_mem_free {
 	__u64 gpu_addr;
 };
 
-#define KBASE_IOCTL_MEM_FREE \
-	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
+#define KBASE_IOCTL_MEM_FREE _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
 
 /**
  * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
@@ -167,7 +162,7 @@ struct kbase_ioctl_hwcnt_reader_setup {
 	__u32 mmu_l2_bm;
 };
 
-#define KBASE_IOCTL_HWCNT_READER_SETUP \
+#define KBASE_IOCTL_HWCNT_READER_SETUP                                                             \
 	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
 
 /**
@@ -182,8 +177,7 @@ struct kbase_ioctl_hwcnt_values {
 	__u32 padding;
 };
 
-#define KBASE_IOCTL_HWCNT_SET \
-	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
+#define KBASE_IOCTL_HWCNT_SET _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
 
 /**
  * struct kbase_ioctl_disjoint_query - Query the disjoint counter
@@ -193,8 +187,7 @@ struct kbase_ioctl_disjoint_query {
 	__u32 counter;
 };
 
-#define KBASE_IOCTL_DISJOINT_QUERY \
-	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
+#define KBASE_IOCTL_DISJOINT_QUERY _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
 
 /**
  * struct kbase_ioctl_get_ddk_version - Query the kernel version
@@ -215,8 +208,7 @@ struct kbase_ioctl_get_ddk_version {
 	__u32 padding;
 };
 
-#define KBASE_IOCTL_GET_DDK_VERSION \
-	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
+#define KBASE_IOCTL_GET_DDK_VERSION _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
 
 /**
  * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
@@ -241,8 +233,7 @@ struct kbase_ioctl_mem_jit_init {
 	__u64 phys_pages;
 };
 
-#define KBASE_IOCTL_MEM_JIT_INIT \
-	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
+#define KBASE_IOCTL_MEM_JIT_INIT _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
 
 /**
  * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
@@ -262,8 +253,7 @@ struct kbase_ioctl_mem_sync {
 	__u8 padding[7];
 };
 
-#define KBASE_IOCTL_MEM_SYNC \
-	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
+#define KBASE_IOCTL_MEM_SYNC _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
 
 /**
  * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
@@ -286,7 +276,7 @@ union kbase_ioctl_mem_find_cpu_offset {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
+#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET                                                            \
 	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
 
 /**
@@ -298,8 +288,7 @@ struct kbase_ioctl_get_context_id {
 	__u32 id;
 };
 
-#define KBASE_IOCTL_GET_CONTEXT_ID \
-	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
+#define KBASE_IOCTL_GET_CONTEXT_ID _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
 
 /**
  * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
@@ -312,11 +301,9 @@ struct kbase_ioctl_tlstream_acquire {
 	__u32 flags;
 };
 
-#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
-	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
+#define KBASE_IOCTL_TLSTREAM_ACQUIRE _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
 
-#define KBASE_IOCTL_TLSTREAM_FLUSH \
-	_IO(KBASE_IOCTL_TYPE, 19)
+#define KBASE_IOCTL_TLSTREAM_FLUSH _IO(KBASE_IOCTL_TYPE, 19)
 
 /**
  * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
@@ -333,8 +320,7 @@ struct kbase_ioctl_mem_commit {
 	__u64 pages;
 };
 
-#define KBASE_IOCTL_MEM_COMMIT \
-	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
+#define KBASE_IOCTL_MEM_COMMIT _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
 
 /**
  * union kbase_ioctl_mem_alias - Create an alias of memory regions
@@ -362,8 +348,7 @@ union kbase_ioctl_mem_alias {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_ALIAS \
-	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
+#define KBASE_IOCTL_MEM_ALIAS _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
 
 /**
  * union kbase_ioctl_mem_import - Import memory for use by the GPU
@@ -391,8 +376,7 @@ union kbase_ioctl_mem_import {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_IMPORT \
-	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
+#define KBASE_IOCTL_MEM_IMPORT _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
 
 /**
  * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
@@ -406,8 +390,7 @@ struct kbase_ioctl_mem_flags_change {
 	__u64 mask;
 };
 
-#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
-	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
+#define KBASE_IOCTL_MEM_FLAGS_CHANGE _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
 
 /**
  * struct kbase_ioctl_stream_create - Create a synchronisation stream
@@ -424,8 +407,7 @@ struct kbase_ioctl_stream_create {
 	char name[32];
 };
 
-#define KBASE_IOCTL_STREAM_CREATE \
-	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
+#define KBASE_IOCTL_STREAM_CREATE _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
 
 /**
  * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
@@ -435,8 +417,7 @@ struct kbase_ioctl_fence_validate {
 	int fd;
 };
 
-#define KBASE_IOCTL_FENCE_VALIDATE \
-	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
+#define KBASE_IOCTL_FENCE_VALIDATE _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
 
 /**
  * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
@@ -452,8 +433,7 @@ struct kbase_ioctl_mem_profile_add {
 	__u32 padding;
 };
 
-#define KBASE_IOCTL_MEM_PROFILE_ADD \
-	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
+#define KBASE_IOCTL_MEM_PROFILE_ADD _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
 
 /**
  * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
@@ -465,7 +445,7 @@ struct kbase_ioctl_sticky_resource_map {
 	__u64 address;
 };
 
-#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
+#define KBASE_IOCTL_STICKY_RESOURCE_MAP                                                            \
 	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
 
 /**
@@ -479,7 +459,7 @@ struct kbase_ioctl_sticky_resource_unmap {
 	__u64 address;
 };
 
-#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
+#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP                                                          \
 	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
 
 /**
@@ -507,14 +487,12 @@ union kbase_ioctl_mem_find_gpu_start_and_offset {
 	} out;
 };
 
-#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
+#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET                                                  \
 	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
 
-#define KBASE_IOCTL_CINSTR_GWT_START \
-	_IO(KBASE_IOCTL_TYPE, 33)
+#define KBASE_IOCTL_CINSTR_GWT_START _IO(KBASE_IOCTL_TYPE, 33)
 
-#define KBASE_IOCTL_CINSTR_GWT_STOP \
-	_IO(KBASE_IOCTL_TYPE, 34)
+#define KBASE_IOCTL_CINSTR_GWT_STOP _IO(KBASE_IOCTL_TYPE, 34)
 
 /**
  * union kbase_ioctl_cinstr_gwt_dump - Used to collect all GPU write fault
@@ -547,8 +525,7 @@ union kbase_ioctl_cinstr_gwt_dump {
 	} out;
 };
 
-#define KBASE_IOCTL_CINSTR_GWT_DUMP \
-	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
+#define KBASE_IOCTL_CINSTR_GWT_DUMP _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
 
 /**
  * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
@@ -559,8 +536,7 @@ struct kbase_ioctl_mem_exec_init {
 	__u64 va_pages;
 };
 
-#define KBASE_IOCTL_MEM_EXEC_INIT \
-	_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+#define KBASE_IOCTL_MEM_EXEC_INIT _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
 
 /**
  * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
@@ -589,7 +565,7 @@ union kbase_ioctl_get_cpu_gpu_timeinfo {
 	} out;
 };
 
-#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
+#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO                                                           \
 	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
 
 /**
@@ -601,7 +577,7 @@ struct kbase_ioctl_context_priority_check {
 	__u8 priority;
 };
 
-#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
+#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK                                                         \
 	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
 
 /**
@@ -613,7 +589,7 @@ struct kbase_ioctl_set_limited_core_count {
 	__u8 max_core_count;
 };
 
-#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
+#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT                                                         \
 	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
 
 /**
@@ -634,7 +610,7 @@ struct kbase_ioctl_kinstr_prfcnt_enum_info {
 	__u64 info_list_ptr;
 };
 
-#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO                                    \
+#define KBASE_IOCTL_KINSTR_PRFCNT_ENUM_INFO                                                        \
 	_IOWR(KBASE_IOCTL_TYPE, 56, struct kbase_ioctl_kinstr_prfcnt_enum_info)
 
 /**
@@ -663,7 +639,7 @@ union kbase_ioctl_kinstr_prfcnt_setup {
 	} out;
 };
 
-#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP                                        \
+#define KBASE_IOCTL_KINSTR_PRFCNT_SETUP                                                            \
 	_IOWR(KBASE_IOCTL_TYPE, 57, union kbase_ioctl_kinstr_prfcnt_setup)
 
 /***************
@@ -687,8 +663,7 @@ struct kbase_ioctl_tlstream_stats {
 	__u32 bytes_generated;
 };
 
-#define KBASE_IOCTL_TLSTREAM_STATS \
-	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
+#define KBASE_IOCTL_TLSTREAM_STATS _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
 
 #endif /* MALI_UNIT_TEST */
 
@@ -706,108 +681,107 @@ struct kbase_ioctl_tlstream_stats {
  *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
  */
 
-
 /**********************************
  * Definitions for GPU properties *
  **********************************/
-#define KBASE_GPUPROP_VALUE_SIZE_U8	(0x0)
-#define KBASE_GPUPROP_VALUE_SIZE_U16	(0x1)
-#define KBASE_GPUPROP_VALUE_SIZE_U32	(0x2)
-#define KBASE_GPUPROP_VALUE_SIZE_U64	(0x3)
+#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
+#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
+#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
 
-#define KBASE_GPUPROP_PRODUCT_ID			1
-#define KBASE_GPUPROP_VERSION_STATUS			2
-#define KBASE_GPUPROP_MINOR_REVISION			3
-#define KBASE_GPUPROP_MAJOR_REVISION			4
+#define KBASE_GPUPROP_PRODUCT_ID 1
+#define KBASE_GPUPROP_VERSION_STATUS 2
+#define KBASE_GPUPROP_MINOR_REVISION 3
+#define KBASE_GPUPROP_MAJOR_REVISION 4
 /* 5 previously used for GPU speed */
-#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX			6
+#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6
 /* 7 previously used for minimum GPU speed */
-#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE		8
-#define KBASE_GPUPROP_TEXTURE_FEATURES_0		9
-#define KBASE_GPUPROP_TEXTURE_FEATURES_1		10
-#define KBASE_GPUPROP_TEXTURE_FEATURES_2		11
-#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE		12
+#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8
+#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9
+#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10
+#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11
+#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12
 
-#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE			13
-#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE		14
-#define KBASE_GPUPROP_L2_NUM_L2_SLICES			15
+#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13
+#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14
+#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
 
-#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES		16
-#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS		17
+#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16
+#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17
 
-#define KBASE_GPUPROP_MAX_THREADS			18
-#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE		19
-#define KBASE_GPUPROP_MAX_BARRIER_SIZE			20
-#define KBASE_GPUPROP_MAX_REGISTERS			21
-#define KBASE_GPUPROP_MAX_TASK_QUEUE			22
-#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT		23
-#define KBASE_GPUPROP_IMPL_TECH				24
+#define KBASE_GPUPROP_MAX_THREADS 18
+#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19
+#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20
+#define KBASE_GPUPROP_MAX_REGISTERS 21
+#define KBASE_GPUPROP_MAX_TASK_QUEUE 22
+#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23
+#define KBASE_GPUPROP_IMPL_TECH 24
 
-#define KBASE_GPUPROP_RAW_SHADER_PRESENT		25
-#define KBASE_GPUPROP_RAW_TILER_PRESENT			26
-#define KBASE_GPUPROP_RAW_L2_PRESENT			27
-#define KBASE_GPUPROP_RAW_STACK_PRESENT			28
-#define KBASE_GPUPROP_RAW_L2_FEATURES			29
-#define KBASE_GPUPROP_RAW_CORE_FEATURES			30
-#define KBASE_GPUPROP_RAW_MEM_FEATURES			31
-#define KBASE_GPUPROP_RAW_MMU_FEATURES			32
-#define KBASE_GPUPROP_RAW_AS_PRESENT			33
-#define KBASE_GPUPROP_RAW_JS_PRESENT			34
-#define KBASE_GPUPROP_RAW_JS_FEATURES_0			35
-#define KBASE_GPUPROP_RAW_JS_FEATURES_1			36
-#define KBASE_GPUPROP_RAW_JS_FEATURES_2			37
-#define KBASE_GPUPROP_RAW_JS_FEATURES_3			38
-#define KBASE_GPUPROP_RAW_JS_FEATURES_4			39
-#define KBASE_GPUPROP_RAW_JS_FEATURES_5			40
-#define KBASE_GPUPROP_RAW_JS_FEATURES_6			41
-#define KBASE_GPUPROP_RAW_JS_FEATURES_7			42
-#define KBASE_GPUPROP_RAW_JS_FEATURES_8			43
-#define KBASE_GPUPROP_RAW_JS_FEATURES_9			44
-#define KBASE_GPUPROP_RAW_JS_FEATURES_10		45
-#define KBASE_GPUPROP_RAW_JS_FEATURES_11		46
-#define KBASE_GPUPROP_RAW_JS_FEATURES_12		47
-#define KBASE_GPUPROP_RAW_JS_FEATURES_13		48
-#define KBASE_GPUPROP_RAW_JS_FEATURES_14		49
-#define KBASE_GPUPROP_RAW_JS_FEATURES_15		50
-#define KBASE_GPUPROP_RAW_TILER_FEATURES		51
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0		52
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1		53
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2		54
-#define KBASE_GPUPROP_RAW_GPU_ID			55
-#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS		56
-#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE	57
-#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE	58
-#define KBASE_GPUPROP_RAW_THREAD_FEATURES		59
-#define KBASE_GPUPROP_RAW_COHERENCY_MODE		60
+#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25
+#define KBASE_GPUPROP_RAW_TILER_PRESENT 26
+#define KBASE_GPUPROP_RAW_L2_PRESENT 27
+#define KBASE_GPUPROP_RAW_STACK_PRESENT 28
+#define KBASE_GPUPROP_RAW_L2_FEATURES 29
+#define KBASE_GPUPROP_RAW_CORE_FEATURES 30
+#define KBASE_GPUPROP_RAW_MEM_FEATURES 31
+#define KBASE_GPUPROP_RAW_MMU_FEATURES 32
+#define KBASE_GPUPROP_RAW_AS_PRESENT 33
+#define KBASE_GPUPROP_RAW_JS_PRESENT 34
+#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35
+#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36
+#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37
+#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38
+#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39
+#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40
+#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41
+#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42
+#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43
+#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44
+#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45
+#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46
+#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47
+#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48
+#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49
+#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50
+#define KBASE_GPUPROP_RAW_TILER_FEATURES 51
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54
+#define KBASE_GPUPROP_RAW_GPU_ID 55
+#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56
+#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57
+#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58
+#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59
+#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60
 
-#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
-#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
-#define KBASE_GPUPROP_COHERENCY_COHERENCY		63
-#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
-#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
-#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
-#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
-#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
-#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
-#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
-#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
-#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
-#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
-#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
-#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
-#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
-#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
-#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
-#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
+#define KBASE_GPUPROP_COHERENCY_COHERENCY 63
+#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
 
-#define KBASE_GPUPROP_TEXTURE_FEATURES_3		80
-#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3		81
+#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80
+#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81
 
-#define KBASE_GPUPROP_NUM_EXEC_ENGINES			82
+#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82
 
-#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC		83
-#define KBASE_GPUPROP_TLS_ALLOC				84
-#define KBASE_GPUPROP_RAW_GPU_FEATURES			85
+#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83
+#define KBASE_GPUPROP_TLS_ALLOC 84
+#define KBASE_GPUPROP_RAW_GPU_FEATURES 85
 #ifdef __cpluscplus
 }
 #endif